[PATCH v4 1/2] powerpc: silence a -Wcast-function-type warning in dawr_write_file_bool

2019-05-28 Thread Michael Neuling
From: Mathieu Malaterre 

In commit c1fe190c0672 ("powerpc: Add force enable of DAWR on P9
option") the following piece of code was added:

   smp_call_function((smp_call_func_t)set_dawr, _brk, 0);

Since GCC 8 this triggers the following warning about incompatible
function types:

  arch/powerpc/kernel/hw_breakpoint.c:408:21: error: cast between incompatible 
function types from 'int (*)(struct arch_hw_breakpoint *)' to 'void (*)(void 
*)' [-Werror=cast-function-type]

Since the warning is there for a reason, and should not be hidden behind
a cast, provide an intermediate callback function to avoid the warning.

Fixes: c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option")
Suggested-by: Christoph Hellwig 
Signed-off-by: Mathieu Malaterre 
Signed-off-by: Michael Neuling 
---
 arch/powerpc/kernel/hw_breakpoint.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index da307dd93e..ca3a2358b7 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -384,6 +384,11 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
 bool dawr_force_enable;
 EXPORT_SYMBOL_GPL(dawr_force_enable);
 
+static void set_dawr_cb(void *info)
+{
+   set_dawr(info);
+}
+
 static ssize_t dawr_write_file_bool(struct file *file,
const char __user *user_buf,
size_t count, loff_t *ppos)
@@ -403,7 +408,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
 
/* If we are clearing, make sure all CPUs have the DAWR cleared */
if (!dawr_force_enable)
-   smp_call_function((smp_call_func_t)set_dawr, _brk, 0);
+   smp_call_function(set_dawr_cb, _brk, 0);
 
return rc;
 }
-- 
2.21.0



[PATCH v4 2/2] powerpc: Fix compile issue with force DAWR

2019-05-28 Thread Michael Neuling
If you compile with KVM but without CONFIG_HAVE_HW_BREAKPOINT you fail
at linking with:
  arch/powerpc/kvm/book3s_hv_rmhandlers.o:(.text+0x708): undefined reference to 
`dawr_force_enable'

This was caused by commit c1fe190c0672 ("powerpc: Add force enable of
DAWR on P9 option").

This moves a bunch of code around to fix this. It moves a lot of the
DAWR code in a new file and creates a new CONFIG_PPC_DAWR to enable
compiling it.

Fixes: c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option")
Signed-off-by: Michael Neuling 
--
v4:
  - Fix merge conflict with patch from Mathieu Malaterre:
 powerpc: silence a -Wcast-function-type warning in dawr_write_file_bool
  - Fixed checkpatch issues noticed by Christophe Leroy.

v3:
  Fixes based on Christophe Leroy's comments:
  - Fix Kconfig options to better reflect reality
  - Reorder alphabetically
  - Inline vs #define
  - Fixed default return for dawr_enabled() when CONFIG_PPC_DAWR=N

V2:
  Fixes based on Christophe Leroy's comments:
  - Fix commit message formatting
  - Move more DAWR code into dawr.c
---
 arch/powerpc/Kconfig |   5 ++
 arch/powerpc/include/asm/hw_breakpoint.h |  21 +++--
 arch/powerpc/kernel/Makefile |   1 +
 arch/powerpc/kernel/dawr.c   | 100 +++
 arch/powerpc/kernel/hw_breakpoint.c  |  61 --
 arch/powerpc/kernel/process.c|  28 ---
 arch/powerpc/kvm/Kconfig |   1 +
 7 files changed, 121 insertions(+), 96 deletions(-)
 create mode 100644 arch/powerpc/kernel/dawr.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c1c636308..87a3ce4e92 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -234,6 +234,7 @@ config PPC
select OLD_SIGSUSPEND
select PCI_DOMAINS  if PCI
select PCI_SYSCALL  if PCI
+   select PPC_DAWR if PPC64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
@@ -370,6 +371,10 @@ config PPC_ADV_DEBUG_DAC_RANGE
depends on PPC_ADV_DEBUG_REGS && 44x
default y
 
+config PPC_DAWR
+   bool
+   default n
+
 config ZONE_DMA
bool
default y if PPC_BOOK3E_64
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 0fe8c1e46b..41abdae6d0 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -90,18 +90,25 @@ static inline void hw_breakpoint_disable(void)
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
 int hw_breakpoint_handler(struct die_args *args);
 
-extern int set_dawr(struct arch_hw_breakpoint *brk);
+#else  /* CONFIG_HAVE_HW_BREAKPOINT */
+static inline void hw_breakpoint_disable(void) { }
+static inline void thread_change_pc(struct task_struct *tsk,
+   struct pt_regs *regs) { }
+
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+
+#ifdef CONFIG_PPC_DAWR
 extern bool dawr_force_enable;
 static inline bool dawr_enabled(void)
 {
return dawr_force_enable;
 }
-
-#else  /* CONFIG_HAVE_HW_BREAKPOINT */
-static inline void hw_breakpoint_disable(void) { }
-static inline void thread_change_pc(struct task_struct *tsk,
-   struct pt_regs *regs) { }
+int set_dawr(struct arch_hw_breakpoint *brk);
+#else
 static inline bool dawr_enabled(void) { return false; }
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; }
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0ea6c4aa3a..56dfa7a2a6 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_PPC64)   += setup_64.o sys_ppc32.o \
 obj-$(CONFIG_VDSO32)   += vdso32/
 obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)   += hw_breakpoint.o
+obj-$(CONFIG_PPC_DAWR) += dawr.o
 obj-$(CONFIG_PPC_BOOK3S_64)+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)+= cpu_setup_power.o
 obj-$(CONFIG_PPC_BOOK3S_64)+= mce.o mce_power.o
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
new file mode 100644
index 00..c8b3fb610c
--- /dev/null
+++ b/arch/powerpc/kernel/dawr.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// DAWR infrastructure
+//
+// Copyright 2019, Michael Neuling, IBM Corporation.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+int set_dawr(struct arch_hw_breakpoint *brk)
+{
+   unsigned long dawr, dawrx, mrd;
+
+   dawr = brk->address;
+
+   dawrx  = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE))
+   << (63 - 58);
+   dawrx |= 

Re: [PATCH] powerpc/configs: Rename foo_basic_defconfig to foo_base.config

2019-05-28 Thread Masahiro Yamada
On Tue, May 28, 2019 at 9:10 PM Christoph Hellwig  wrote:
>
> On Tue, May 28, 2019 at 06:16:14PM +1000, Michael Ellerman wrote:
> > We have several "defconfigs" that are not actually full defconfigs
> > they are just a base set of options which are then merged with other
> > fragments to produce a working defconfig.

The default values from Kconfig files are used
where CONFIG options are not specified by the defconfig.

So, I think corenet_basic_defconfig is a full defconfig
even if it contains a single CONFIG option.

Since the difference between "*_defconfig" and "*.config"
is ambiguous in some cases, it depends on the intended usage.


> > The most obvious example is corenet_basic_defconfig which only
> > contains one symbol CONFIG_CORENET_GENERIC=y. But there is also
> > mpc85xx_base_defconfig which doesn't actually enable CONFIG_PPC_85xx.
> >
> > To avoid confusion, rename these config fragments to "foo_base.config"
> > to make it clearer that they are not full defconfigs.
>
> Adding linux-kbuild, maybe we can make the handling of these fragments
> generic and actually document it..

I do not know how it should be documented.



> >
> > Reported-by: Christophe Leroy 
> > Signed-off-by: Michael Ellerman 
> > ---
> >  arch/powerpc/Makefile| 12 ++--
> >  .../{corenet_basic_defconfig => corenet_base.config} |  0
> >  .../{mpc85xx_basic_defconfig => mpc85xx_base.config} |  0
> >  .../{mpc86xx_basic_defconfig => mpc86xx_base.config} |  0
> >  4 files changed, 6 insertions(+), 6 deletions(-)
> >  rename arch/powerpc/configs/{corenet_basic_defconfig => 
> > corenet_base.config} (100%)
> >  rename arch/powerpc/configs/{mpc85xx_basic_defconfig => 
> > mpc85xx_base.config} (100%)
> >  rename arch/powerpc/configs/{mpc86xx_basic_defconfig => 
> > mpc86xx_base.config} (100%)
> >
> > diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
> > index c345b79414a9..94f735db2229 100644
> > --- a/arch/powerpc/Makefile
> > +++ b/arch/powerpc/Makefile
> > @@ -333,32 +333,32 @@ PHONY += powernv_be_defconfig
> >
> >  PHONY += mpc85xx_defconfig
> >  mpc85xx_defconfig:
> > - $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
> > + $(call merge_into_defconfig,mpc85xx_base.config,\
> >   85xx-32bit 85xx-hw fsl-emb-nonhw)
> >
> >  PHONY += mpc85xx_smp_defconfig
> >  mpc85xx_smp_defconfig:
> > - $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
> > + $(call merge_into_defconfig,mpc85xx_base.config,\
> >   85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
> >
> >  PHONY += corenet32_smp_defconfig
> >  corenet32_smp_defconfig:
> > - $(call merge_into_defconfig,corenet_basic_defconfig,\
> > + $(call merge_into_defconfig,corenet_base.config,\
> >   85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
> >
> >  PHONY += corenet64_smp_defconfig
> >  corenet64_smp_defconfig:
> > - $(call merge_into_defconfig,corenet_basic_defconfig,\
> > + $(call merge_into_defconfig,corenet_base.config,\
> >   85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
> >
> >  PHONY += mpc86xx_defconfig
> >  mpc86xx_defconfig:
> > - $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
> > + $(call merge_into_defconfig,mpc86xx_base.config,\
> >   86xx-hw fsl-emb-nonhw)
> >
> >  PHONY += mpc86xx_smp_defconfig
> >  mpc86xx_smp_defconfig:
> > - $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
> > + $(call merge_into_defconfig,mpc86xx_base.config,\
> >   86xx-smp 86xx-hw fsl-emb-nonhw)
> >
> >  PHONY += ppc32_allmodconfig
> > diff --git a/arch/powerpc/configs/corenet_basic_defconfig 
> > b/arch/powerpc/configs/corenet_base.config
> > similarity index 100%
> > rename from arch/powerpc/configs/corenet_basic_defconfig
> > rename to arch/powerpc/configs/corenet_base.config
> > diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig 
> > b/arch/powerpc/configs/mpc85xx_base.config
> > similarity index 100%
> > rename from arch/powerpc/configs/mpc85xx_basic_defconfig
> > rename to arch/powerpc/configs/mpc85xx_base.config
> > diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig 
> > b/arch/powerpc/configs/mpc86xx_base.config
> > similarity index 100%
> > rename from arch/powerpc/configs/mpc86xx_basic_defconfig
> > rename to arch/powerpc/configs/mpc86xx_base.config
> > --
> > 2.20.1
> >
> ---end quoted text---



--
Best Regards
Masahiro Yamada


[PATCH] powerpc/pseries: avoid blocking in irq when queuing hotplug events

2019-05-28 Thread Nathan Lynch
A couple of bugs in queue_hotplug_event():

1. Unchecked kmalloc result which could lead to an oops.
2. Use of GFP_KERNEL allocations in interrupt context (this code's
   only caller is ras_hotplug_interrupt()).

Use kmemdup to avoid open-coding the allocation+copy and check for
failure; use GFP_ATOMIC for both allocations.

Ultimately it probably would be better to avoid or reduce allocations
in this path if possible.

Signed-off-by: Nathan Lynch 
---

Found by inspection, built but not runtime-tested.

 arch/powerpc/platforms/pseries/dlpar.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index 17958043e7f7..d70f9b925378 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -386,11 +386,11 @@ void queue_hotplug_event(struct pseries_hp_errorlog 
*hp_errlog)
struct pseries_hp_work *work;
struct pseries_hp_errorlog *hp_errlog_copy;
 
-   hp_errlog_copy = kmalloc(sizeof(struct pseries_hp_errorlog),
-GFP_KERNEL);
-   memcpy(hp_errlog_copy, hp_errlog, sizeof(struct pseries_hp_errorlog));
+   hp_errlog_copy = kmemdup(hp_errlog, sizeof(*hp_errlog), GFP_ATOMIC);
+   if (!hp_errlog_copy)
+   return;
 
-   work = kmalloc(sizeof(struct pseries_hp_work), GFP_KERNEL);
+   work = kmalloc(sizeof(struct pseries_hp_work), GFP_ATOMIC);
if (work) {
INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
work->errlog = hp_errlog_copy;
-- 
2.20.1



Re: [PATCH v10 09/12] ima: Implement support for module-style appended signatures

2019-05-28 Thread Mimi Zohar
On Tue, 2019-05-28 at 16:23 -0300, Thiago Jung Bauermann wrote:
> Mimi Zohar  writes:
> 
> > Hi Thiago,
> >
> >> diff --git a/security/integrity/ima/ima_policy.c 
> >> b/security/integrity/ima/ima_policy.c
> >> index fca7a3f23321..a7a20a8c15c1 100644
> >> --- a/security/integrity/ima/ima_policy.c
> >> +++ b/security/integrity/ima/ima_policy.c
> >> @@ -1144,6 +1144,12 @@ void ima_delete_rules(void)
> >>}
> >>  }
> >>
> >> +#define __ima_hook_stringify(str) (#str),
> >> +
> >> +const char *const func_tokens[] = {
> >> +  __ima_hooks(__ima_hook_stringify)
> >> +};
> >> +
> >>  #ifdefCONFIG_IMA_READ_POLICY
> >>  enum {
> >>mask_exec = 0, mask_write, mask_read, mask_append
> >> @@ -1156,12 +1162,6 @@ static const char *const mask_tokens[] = {
> >>"MAY_APPEND"
> >>  };
> >>
> >> -#define __ima_hook_stringify(str) (#str),
> >> -
> >> -static const char *const func_tokens[] = {
> >> -  __ima_hooks(__ima_hook_stringify)
> >> -};
> >> -
> >>  void *ima_policy_start(struct seq_file *m, loff_t *pos)
> >>  {
> >>loff_t l = *pos;
> >
> > Is moving this something left over from previous versions or there is
> > a need for this change?
> 
> Well, it's not a strong need, but it's still relevant in the current
> version. I use func_tokens in ima_read_modsig() in order to be able to
> mention the hook name in mod_check_sig()'s error message:
> 
> In ima_read_modsig():
> 
>   rc = mod_check_sig(sig, buf_len, func_tokens[func]);
> 
> And in mod_check_sig():
> 
>   pr_err("%s: Module is not signed with expected PKCS#7 
> message\n",
>  name);
> 
> If you think it's not worth it to expose func_tokens, I can make
> ima_read_modsig() pass a more generic const string such as "IMA modsig"
> for example.

This is fine.  I somehow missed moving func_tokens[] outside of the
ifdef was in order to make it independent of "CONFIG_IMA_READ_POLICY".

thanks,

Mimi



Re: [PATCH RFC 0/5] Remove some notrace RCU APIs

2019-05-28 Thread Paul E. McKenney
On Tue, May 28, 2019 at 03:00:07PM -0400, Joel Fernandes wrote:
> On Tue, May 28, 2019 at 05:24:47AM -0700, Paul E. McKenney wrote:
> > On Sat, May 25, 2019 at 02:14:07PM -0400, Joel Fernandes wrote:
> > > On Sat, May 25, 2019 at 08:50:35AM -0700, Paul E. McKenney wrote:
> > > > On Sat, May 25, 2019 at 10:19:54AM -0400, Joel Fernandes wrote:
> > > > > On Sat, May 25, 2019 at 07:08:26AM -0400, Steven Rostedt wrote:
> > > > > > On Sat, 25 May 2019 04:14:44 -0400
> > > > > > Joel Fernandes  wrote:
> > > > > > 
> > > > > > > > I guess the difference between the _raw_notrace and just _raw 
> > > > > > > > variants
> > > > > > > > is that _notrace ones do a rcu_check_sparse(). Don't we want to 
> > > > > > > > keep
> > > > > > > > that check?  
> > > > > > > 
> > > > > > > This is true.
> > > > > > > 
> > > > > > > Since the users of _raw_notrace are very few, is it worth keeping 
> > > > > > > this API
> > > > > > > just for sparse checking? The API naming is also confusing. I was 
> > > > > > > expecting
> > > > > > > _raw_notrace to do fewer checks than _raw, instead of more. 
> > > > > > > Honestly, I just
> > > > > > > want to nuke _raw_notrace as done in this series and later we can 
> > > > > > > introduce a
> > > > > > > sparse checking version of _raw if need-be. The other option 
> > > > > > > could be to
> > > > > > > always do sparse checking for _raw however that used to be the 
> > > > > > > case and got
> > > > > > > changed in 
> > > > > > > http://lists.infradead.org/pipermail/linux-afs/2016-July/001016.html
> > > > > > 
> > > > > > What if we just rename _raw to _raw_nocheck, and _raw_notrace to 
> > > > > > _raw ?
> > > > > 
> > > > > That would also mean changing 160 usages of _raw to _raw_nocheck in 
> > > > > the
> > > > > kernel :-/.
> > > > > 
> > > > > The tracing usage of _raw_notrace is only like 2 or 3 users. Can we 
> > > > > just call
> > > > > rcu_check_sparse directly in the calling code for those and eliminate 
> > > > > the APIs?
> > > > > 
> > > > > I wonder what Paul thinks about the matter as well.
> > > > 
> > > > My thought is that it is likely that a goodly number of the current uses
> > > > of _raw should really be some form of _check, with lockdep expressions
> > > > spelled out.  Not that working out what exactly those lockdep 
> > > > expressions
> > > > should be is necessarily a trivial undertaking.  ;-)
> > > 
> > > Yes, currently where I am a bit stuck is the rcu_dereference_raw()
> > > cannot possibly know what SRCU domain it is under, so lockdep cannot 
> > > check if
> > > an SRCU lock is held without the user also passing along the SRCU domain. 
> > > I
> > > am trying to change lockdep to see if it can check if *any* srcu domain 
> > > lock
> > > is held (regardless of which one) and complain if none are. This is at 
> > > least
> > > better than no check at all.
> > > 
> > > However, I think it gets tricky for mutexes. If you have something like:
> > > mutex_lock(some_mutex);
> > > p = rcu_dereference_raw(gp);
> > > mutex_unlock(some_mutex);
> > > 
> > > This might be a perfectly valid invocation of _raw, however my checks 
> > > (patch
> > > is still cooking) trigger a lockdep warning becase _raw cannot know that 
> > > this
> > > is Ok. lockdep thinks it is not in a reader section. This then gets into 
> > > the
> > > territory of a new rcu_derference_raw_protected(gp, 
> > > assert_held(some_mutex))
> > > which sucks because its yet another API. To circumvent this issue, can we
> > > just have callers of rcu_dereference_raw ensure that they call
> > > rcu_read_lock() if they are protecting dereferences by a mutex? That would
> > > make things a lot easier and also may be Ok since rcu_read_lock is quite
> > > cheap.
> > 
> > Why not just rcu_dereference_protected(lockdep_is_held(some_mutex))?
> > The API is already there, and no need for spurious readers.
> 
> Hmm, so I gave a bad example, here is a better example:
> 
> fib_get_table calls hlist_for_each_entry_rcu()
> hlist_for_each_entry_rcu calls rcu_dereference_raw().
> 
> This is perfectly Ok to be called under rtnl_mutex. However rcu_dererence_raw
> in hlist_for_each_entry_rcu has no way of knowing that the rtnl_mutex held is
> sufficient for the protection since it is not directly called by the caller.

Agreed, and this just happens to be one of the use cases that led to
rcu_dereference_raw().  The calling code (in this case, FIB) simply has
no idea what the synchronization strategy might be.

> I am almost sure I saw other examples of rcu_dereference_raw being called
> this way as well.

And I am OK with this sort of use case.  The ones I am less happy with
are the ones where there really is a lockdep expression that could be
constructed.

> I was trying to make an "automatic" lockdep check for all this, but it is
> quite hard to do so without passing down lockdep experessions down a call
> chain thus complicating all such callchains.

Understood!  Not an easy task.

> Further I don't think code can 

Re: [PATCH v10 09/12] ima: Implement support for module-style appended signatures

2019-05-28 Thread Thiago Jung Bauermann


Mimi Zohar  writes:

> Hi Thiago,
>
> On Thu, 2019-04-18 at 00:51 -0300, Thiago Jung Bauermann wrote:
>> 
>> @@ -326,6 +356,10 @@ int ima_appraise_measurement(enum ima_hooks func,
>> case INTEGRITY_UNKNOWN:
>> break;
>> case INTEGRITY_NOXATTRS:/* No EVM protected xattrs. */
>> +/* It's fine not to have xattrs when using a modsig. */
>> +if (try_modsig)
>> +break;
>> +/* fall through */
>> case INTEGRITY_NOLABEL:/* No security.evm xattr. */
>> cause = "missing-HMAC";
>> goto out;
>> @@ -340,6 +374,14 @@ int ima_appraise_measurement(enum ima_hooks func,
>> rc = xattr_verify(func, iint, xattr_value, xattr_len, ,
>>  );
>> 
>> +/*
>> + * If we have a modsig and either no imasig or the imasig's key isn't
>> + * known, then try verifying the modsig.
>> + */
>> +if (status != INTEGRITY_PASS && try_modsig &&
>> + (!xattr_value || rc == -ENOKEY))
>> +rc = modsig_verify(func, modsig, , );
>
> EVM protects other security xattrs, not just security.ima, if they
> exist. As a result, evm_verifyxattr() could pass based on the other
> security xattrs.

Indeed! It doesn't make sense to test for status != INTEGRITY_PASS here.
Not sure what I was thinking. Thanks for spotting it. With your other
comments about this if clause, this code now reads:

/*
 * If we have a modsig and either no imasig or the imasig's key isn't
 * known, then try verifying the modsig.
 */
if (try_modsig &&
(!xattr_value || xattr_value->type == IMA_XATTR_DIGEST_NG ||
 rc == -ENOKEY))
rc = modsig_verify(func, modsig, , );

-- 
Thiago Jung Bauermann
IBM Linux Technology Center



Re: [PATCH v10 09/12] ima: Implement support for module-style appended signatures

2019-05-28 Thread Thiago Jung Bauermann


Mimi Zohar  writes:

> Hi Thiago,
>
>> diff --git a/security/integrity/ima/ima_policy.c 
>> b/security/integrity/ima/ima_policy.c
>> index fca7a3f23321..a7a20a8c15c1 100644
>> --- a/security/integrity/ima/ima_policy.c
>> +++ b/security/integrity/ima/ima_policy.c
>> @@ -1144,6 +1144,12 @@ void ima_delete_rules(void)
>>  }
>>  }
>>
>> +#define __ima_hook_stringify(str)   (#str),
>> +
>> +const char *const func_tokens[] = {
>> +__ima_hooks(__ima_hook_stringify)
>> +};
>> +
>>  #ifdef  CONFIG_IMA_READ_POLICY
>>  enum {
>>  mask_exec = 0, mask_write, mask_read, mask_append
>> @@ -1156,12 +1162,6 @@ static const char *const mask_tokens[] = {
>>  "MAY_APPEND"
>>  };
>>
>> -#define __ima_hook_stringify(str)   (#str),
>> -
>> -static const char *const func_tokens[] = {
>> -__ima_hooks(__ima_hook_stringify)
>> -};
>> -
>>  void *ima_policy_start(struct seq_file *m, loff_t *pos)
>>  {
>>  loff_t l = *pos;
>
> Is moving this something left over from previous versions or there is
> a need for this change?

Well, it's not a strong need, but it's still relevant in the current
version. I use func_tokens in ima_read_modsig() in order to be able to
mention the hook name in mod_check_sig()'s error message:

In ima_read_modsig():

rc = mod_check_sig(sig, buf_len, func_tokens[func]);

And in mod_check_sig():

pr_err("%s: Module is not signed with expected PKCS#7 
message\n",
   name);

If you think it's not worth it to expose func_tokens, I can make
ima_read_modsig() pass a more generic const string such as "IMA modsig"
for example.

> Other than this, the patch looks good.

Nice!

--
Thiago Jung Bauermann
IBM Linux Technology Center



Re: [PATCH v10 12/12] ima: Store the measurement again when appraising a modsig

2019-05-28 Thread Thiago Jung Bauermann


Mimi Zohar  writes:

> Hi Thiago,
>
> On Thu, 2019-04-18 at 00:51 -0300, Thiago Jung Bauermann wrote:
>> If the IMA template contains the "modsig" or "d-modsig" field, then the
>> modsig should be added to the measurement list when the file is appraised.
>>
>> And that is what normally happens, but if a measurement rule caused a file
>> containing a modsig to be measured before a different rule causes it to be
>> appraised, the resulting measurement entry will not contain the modsig
>> because it is only fetched during appraisal. When the appraisal rule
>> triggers, it won't store a new measurement containing the modsig because
>> the file was already measured.
>>
>> We need to detect that situation and store an additional measurement with
>> the modsig. This is done by adding an IMA_MEASURE action flag if we read a
>> modsig and the IMA template contains a modsig field.
>
> With the new per policy rule "template" support being added, this
> patch needs to be modified so that the per policy "template" format is
> checked. ima_template_has_modsig() should be called with the
> template_desc being used.

Right. Thanks for point out what needs to be done. After rebasing on top
of Matthew Garret's "IMA: Allow profiles to define the desired IMA
template" patch I changed ima_template_has_modsig() to check the
template_desc obtained from process_measurement().

--
Thiago Jung Bauermann
IBM Linux Technology Center



Re: kmemleak: 1157 new suspected memory leaks (see /sys/kernel/debug/kmemleak)

2019-05-28 Thread Mathieu Malaterre
Hi Michael !

Thanks for the kind help.

On Tue, May 28, 2019 at 7:21 AM Michael Ellerman  wrote:
>
> Mathieu Malaterre  writes:
> > Hi there,
> >
> > Is there a way to dump more context (somewhere in of tree
> > flattening?). I cannot make sense of the following:
>
> Hmm. Not that I know of.
>
> Those don't look related to OF flattening/unflattening. That's just
> sysfs setup based on the unflattened device tree.
>
> The allocations are happening in safe_name() AFAICS.
>
> int __of_add_property_sysfs(struct device_node *np, struct property *pp)
> {
> ...
> pp->attr.attr.name = safe_name(>kobj, pp->name);
>
> And the free is in __of_sysfs_remove_bin_file():
>
> void __of_sysfs_remove_bin_file(struct device_node *np, struct property *prop)
> {
> if (!IS_ENABLED(CONFIG_SYSFS))
> return;
>
> sysfs_remove_bin_file(>kobj, >attr);
> kfree(prop->attr.attr.name);
>

Right. That helped a lot !

> There is this check which could be failing leading to us not calling the
> free at all:
>
> void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
> {
> /* at early boot, bail here and defer setup to of_init() */
> if (of_kset && of_node_is_attached(np))
> __of_sysfs_remove_bin_file(np, prop);
> }
>
>
> So maybe stick a printk() in there to see if you're hitting that
> condition, eg something like:
>
> if (of_kset && of_node_is_attached(np))
> __of_sysfs_remove_bin_file(np, prop);
> else
> printk("%s: leaking prop %s on node %pOF\n", __func__, 
> prop->attr.attr.name, np);
>

If I understand correctly those are false positive. I was first
starting to consider using something like kmemleak_not_leak, but I
remember that I have been using kmemleak for a couple of years now.
Those reports starting to show up only recently.

Catalin, do you have an idea why on a non-SMP machine kmemleak reports
leaks from:

[...]
void __init of_core_init(void)
{
[...]
 for_each_of_allnodes(np)
__of_attach_node_sysfs(np);



> cheers
>
> > kmemleak: 1157 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
> >
> > Where:
> >
> > # head -40 /sys/kernel/debug/kmemleak
> > unreferenced object 0xdf44d180 (size 8):
> >   comm "swapper", pid 1, jiffies 4294892297 (age 4766.460s)
> >   hex dump (first 8 bytes):
> > 62 61 73 65 00 00 00 00  base
> >   backtrace:
> > [<0ca59825>] kstrdup+0x4c/0xb8
> > [] kobject_set_name_vargs+0x34/0xc8
> > [<661b4c86>] kobject_add+0x78/0x120
> > [] __of_attach_node_sysfs+0xa0/0x14c
> > [<2a143d10>] of_core_init+0x90/0x114
> > [] driver_init+0x30/0x48
> > [<84ed01b1>] kernel_init_freeable+0xfc/0x3fc
> > [] kernel_init+0x20/0x110
> > [] ret_from_kernel_thread+0x14/0x1c
> > unreferenced object 0xdf44d178 (size 8):
> >   comm "swapper", pid 1, jiffies 4294892297 (age 4766.460s)
> >   hex dump (first 8 bytes):
> > 6d 6f 64 65 6c 00 97 c8  model...
> >   backtrace:
> > [<0ca59825>] kstrdup+0x4c/0xb8
> > [<0eeb0a3b>] __of_add_property_sysfs+0x88/0x12c
> > [] __of_attach_node_sysfs+0xcc/0x14c
> > [<2a143d10>] of_core_init+0x90/0x114
> > [] driver_init+0x30/0x48
> > [<84ed01b1>] kernel_init_freeable+0xfc/0x3fc
> > [] kernel_init+0x20/0x110
> > [] ret_from_kernel_thread+0x14/0x1c
> > unreferenced object 0xdf4021e0 (size 16):
> >   comm "swapper", pid 1, jiffies 4294892297 (age 4766.460s)
> >   hex dump (first 16 bytes):
> > 63 6f 6d 70 61 74 69 62 6c 65 00 01 00 00 00 00  compatible..
> >   backtrace:
> > [<0ca59825>] kstrdup+0x4c/0xb8
> > [<0eeb0a3b>] __of_add_property_sysfs+0x88/0x12c
> > [] __of_attach_node_sysfs+0xcc/0x14c
> > [<2a143d10>] of_core_init+0x90/0x114
> > [] driver_init+0x30/0x48
> > [<84ed01b1>] kernel_init_freeable+0xfc/0x3fc
> > [] kernel_init+0x20/0x110
> > [] ret_from_kernel_thread+0x14/0x1c


Re: [PATCH v10 11/12] ima: Define ima-modsig template

2019-05-28 Thread Thiago Jung Bauermann


Mimi Zohar  writes:

> On Thu, 2019-04-18 at 00:51 -0300, Thiago Jung Bauermann wrote:
>> Define new "d-modsig" template field which holds the digest that is
>> expected to match the one contained in the modsig, and also new "modsig"
>> template field which holds the appended file signature.
>>
>> Add a new "ima-modsig" defined template descriptor with the new fields as
>> well as the ones from the "ima-sig" descriptor.
>>
>> Change ima_store_measurement() to accept a struct modsig * argument so that
>> it can be passed along to the templates via struct ima_event_data.
>>
>> Suggested-by: Mimi Zohar 
>> Signed-off-by: Thiago Jung Bauermann 
>
> Thanks, Roberto. Just some thoughts inline below.
>
> Reviewed-by: Mimi Zohar 

Thanks!

>> +/*
>> + * Validating the appended signature included in the measurement list 
>> requires
>> + * the file hash calculated without the appended signature (i.e., the 
>> 'd-modsig'
>> + * field). Therefore, notify the user if they have the 'modsig' field but 
>> not
>> + * the 'd-modsig' field in the template.
>> + */
>> +static void check_current_template_modsig(void)
>> +{
>> +#define MSG "template with 'modsig' field also needs 'd-modsig' field\n"
>> +struct ima_template_desc *template;
>> +bool has_modsig, has_dmodsig;
>> +static bool checked;
>> +int i;
>> +
>> +/* We only need to notify the user once. */
>> +if (checked)
>> +return;
>> +
>> +has_modsig = has_dmodsig = false;
>> +template = ima_template_desc_current();
>> +for (i = 0; i < template->num_fields; i++) {
>> +if (!strcmp(template->fields[i]->field_id, "modsig"))
>> +has_modsig = true;
>> +else if (!strcmp(template->fields[i]->field_id, "d-modsig"))
>> +has_dmodsig = true;
>> +}
>> +
>> +if (has_modsig && !has_dmodsig)
>> +pr_notice(MSG);
>> +
>> +checked = true;
>> +#undef MSG
>> +}
>> +
>
> There was some recent discussion about supporting per IMA policy rule
> template formats. This feature will allow just the kexec kernel image
> to require ima-modsig. When per policy rule template formats support
> is upstreamed, this function will need to be updated.

Indeed. Thanks for the clarification. For the next iteration I rebased
on top of Matthew Garret's "IMA: Allow profiles to define the desired
IMA template" patch. I'm currently adapting this check accordingly.

>> @@ -389,3 +425,25 @@ int ima_eventsig_init(struct ima_event_data *event_data,
>>  return ima_write_template_field_data(xattr_value, event_data->xattr_len,
>>   DATA_FMT_HEX, field_data);
>>  }
>> +
>> +int ima_eventmodsig_init(struct ima_event_data *event_data,
>> + struct ima_field_data *field_data)
>> +{
>> +const void *data;
>> +u32 data_len;
>> +int rc;
>> +
>> +if (!event_data->modsig)
>> +return 0;
>> +
>> +/*
>> + * The xattr_value for IMA_MODSIG is a runtime structure containing
>> + * pointers. Get its raw data instead.
>> + */
>
> "xattr_value"? The comment needs some clarification.

Oops, forgot to update this comment. This is the new version:

/*
 * modsig is a runtime structure containing pointers. Get its raw data
 * instead.
 */

--
Thiago Jung Bauermann
IBM Linux Technology Center



Re: [PATCH v10 01/12] MODSIGN: Export module signature definitions

2019-05-28 Thread Thiago Jung Bauermann


Mimi Zohar  writes:

> On Thu, 2019-04-18 at 00:51 -0300, Thiago Jung Bauermann wrote:
>> IMA will use the module_signature format for append signatures, so export
>> the relevant definitions and factor out the code which verifies that the
>> appended signature trailer is valid.
>> 
>> Also, create a CONFIG_MODULE_SIG_FORMAT option so that IMA can select it
>> and be able to use mod_check_sig() without having to depend on either
>> CONFIG_MODULE_SIG or CONFIG_MODULES.
>> 
>> Signed-off-by: Thiago Jung Bauermann 
>> Cc: Jessica Yu 
>
> Just a couple minor questions/comments below.
>
> Reviewed-by: Mimi Zohar 

Thanks for your review and your comments!

>> diff --git a/init/Kconfig b/init/Kconfig
>> index 4592bf7997c0..a71019553ee1 100644
>> --- a/init/Kconfig
>> +++ b/init/Kconfig
>> @@ -1906,7 +1906,7 @@ config MODULE_SRCVERSION_ALL
>>  config MODULE_SIG
>>  bool "Module signature verification"
>>  depends on MODULES
>> -select SYSTEM_DATA_VERIFICATION
>> +select MODULE_SIG_FORMAT
>>  help
>>Check modules for valid signatures upon load: the signature
>>is simply appended to the module. For more information see
>> @@ -2036,6 +2036,10 @@ config TRIM_UNUSED_KSYMS
>>  
>>  endif # MODULES
>>  
>> +config MODULE_SIG_FORMAT
>> +def_bool n
>> +select SYSTEM_DATA_VERIFICATION
>
> Normally Kconfigs, in the same file, are defined before they are used.
>  I'm not sure if that is required or just a convention.

I think it's a convention, because it seemed to work in the current way.
For the next version I moved the config MODULE_SIG_FORMAT definition to
just before "menuconfig MODULES"

>> diff --git a/kernel/module_signature.c b/kernel/module_signature.c
>> new file mode 100644
>> index ..6d5e59f27f55
>> --- /dev/null
>> +++ b/kernel/module_signature.c
>> @@ -0,0 +1,45 @@
>> +// SPDX-License-Identifier: GPL-2.0+
>> +/*
>> + * Module signature checker
>> + *
>> + * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
>> + * Written by David Howells (dhowe...@redhat.com)
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +/**
>> + * mod_check_sig - check that the given signature is sane
>> + *
>> + * @ms: Signature to check.
>> + * @file_len:   Size of the file to which @ms is appended.
>
> "name" is missing.

Fixed.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center



Re: [PATCH RFC 0/5] Remove some notrace RCU APIs

2019-05-28 Thread Joel Fernandes
On Tue, May 28, 2019 at 05:24:47AM -0700, Paul E. McKenney wrote:
> On Sat, May 25, 2019 at 02:14:07PM -0400, Joel Fernandes wrote:
> > On Sat, May 25, 2019 at 08:50:35AM -0700, Paul E. McKenney wrote:
> > > On Sat, May 25, 2019 at 10:19:54AM -0400, Joel Fernandes wrote:
> > > > On Sat, May 25, 2019 at 07:08:26AM -0400, Steven Rostedt wrote:
> > > > > On Sat, 25 May 2019 04:14:44 -0400
> > > > > Joel Fernandes  wrote:
> > > > > 
> > > > > > > I guess the difference between the _raw_notrace and just _raw 
> > > > > > > variants
> > > > > > > is that _notrace ones do a rcu_check_sparse(). Don't we want to 
> > > > > > > keep
> > > > > > > that check?  
> > > > > > 
> > > > > > This is true.
> > > > > > 
> > > > > > Since the users of _raw_notrace are very few, is it worth keeping 
> > > > > > this API
> > > > > > just for sparse checking? The API naming is also confusing. I was 
> > > > > > expecting
> > > > > > _raw_notrace to do fewer checks than _raw, instead of more. 
> > > > > > Honestly, I just
> > > > > > want to nuke _raw_notrace as done in this series and later we can 
> > > > > > introduce a
> > > > > > sparse checking version of _raw if need-be. The other option could 
> > > > > > be to
> > > > > > always do sparse checking for _raw however that used to be the case 
> > > > > > and got
> > > > > > changed in 
> > > > > > http://lists.infradead.org/pipermail/linux-afs/2016-July/001016.html
> > > > > 
> > > > > What if we just rename _raw to _raw_nocheck, and _raw_notrace to _raw 
> > > > > ?
> > > > 
> > > > That would also mean changing 160 usages of _raw to _raw_nocheck in the
> > > > kernel :-/.
> > > > 
> > > > The tracing usage of _raw_notrace is only like 2 or 3 users. Can we 
> > > > just call
> > > > rcu_check_sparse directly in the calling code for those and eliminate 
> > > > the APIs?
> > > > 
> > > > I wonder what Paul thinks about the matter as well.
> > > 
> > > My thought is that it is likely that a goodly number of the current uses
> > > of _raw should really be some form of _check, with lockdep expressions
> > > spelled out.  Not that working out what exactly those lockdep expressions
> > > should be is necessarily a trivial undertaking.  ;-)
> > 
> > Yes, currently where I am a bit stuck is the rcu_dereference_raw()
> > cannot possibly know what SRCU domain it is under, so lockdep cannot check 
> > if
> > an SRCU lock is held without the user also passing along the SRCU domain. I
> > am trying to change lockdep to see if it can check if *any* srcu domain lock
> > is held (regardless of which one) and complain if none are. This is at least
> > better than no check at all.
> > 
> > However, I think it gets tricky for mutexes. If you have something like:
> > mutex_lock(some_mutex);
> > p = rcu_dereference_raw(gp);
> > mutex_unlock(some_mutex);
> > 
> > This might be a perfectly valid invocation of _raw, however my checks (patch
> > is still cooking) trigger a lockdep warning becase _raw cannot know that 
> > this
> > is Ok. lockdep thinks it is not in a reader section. This then gets into the
> > territory of a new rcu_derference_raw_protected(gp, assert_held(some_mutex))
> > which sucks because its yet another API. To circumvent this issue, can we
> > just have callers of rcu_dereference_raw ensure that they call
> > rcu_read_lock() if they are protecting dereferences by a mutex? That would
> > make things a lot easier and also may be Ok since rcu_read_lock is quite
> > cheap.
> 
> Why not just rcu_dereference_protected(lockdep_is_held(some_mutex))?
> The API is already there, and no need for spurious readers.

Hmm, so I gave a bad example, here is a better example:

fib_get_table calls hlist_for_each_entry_rcu()
hlist_for_each_entry_rcu calls rcu_dereference_raw().

This is perfectly Ok to be called under rtnl_mutex. However rcu_dererence_raw
in hlist_for_each_entry_rcu has no way of knowing that the rtnl_mutex held is
sufficient for the protection since it is not directly called by the caller.

I am almost sure I saw other examples of rcu_dereference_raw being called
this way as well.

I was trying to make an "automatic" lockdep check for all this, but it is
quite hard to do so without passing down lockdep experessions down a call
chain thus complicating all such callchains.

Further I don't think code can trivially be converted from
rcu_dereference_raw to rcu_dereference_protected even if the protection being
offered is known, since the former does not do sparse checking and the latter
might trigger false sparse checks in case the pointer in concern is protected
both by RCU and non-RCU methods. I believe this is why you removed sparse
checking from rcu_dereference_raw as well:

http://lists.infradead.org/pipermail/linux-afs/2016-July/001016.html

> > > That aside, if we are going to change the name of an API that is
> > > used 160 places throughout the tree, we would need to have a pretty
> > > good justification.  Without such a justification, it will just look
> > > like 

Re: [PATCH v4 3/3] kselftest: Extend vDSO selftest to clock_getres

2019-05-28 Thread Vincenzo Frascino
Hi Christophe,

On 28/05/2019 18:01, Christophe Leroy wrote:
> Vincenzo Frascino  a écrit :
> 
>> Hi Michael,
>>
>> thank you for your reply.
>>
>> On 28/05/2019 07:19, Michael Ellerman wrote:
>>> Vincenzo Frascino  writes:
>>>
 The current version of the multiarch vDSO selftest verifies only
 gettimeofday.

 Extend the vDSO selftest to clock_getres, to verify that the
 syscall and the vDSO library function return the same information.

 The extension has been used to verify the hrtimer_resoltion fix.
>>>
>>> This is passing for me even without patch 1 applied, shouldn't it fail
>>> without the fix? What am I missing?
>>>
>>
>> This is correct, because during the refactoring process I missed an "n" :)
>>
>> if·((x.tv_sec·!=·y.tv_sec)·||·(x.tv_sec·!=·y.tv_sec))
>>
>> Should be:
>>
>> if·((x.tv_sec·!=·y.tv_sec)·||·(x.tv_nsec·!=·y.tv_nsec))
> 
> Maybe you'd better use timercmp() from sys/time.h
> 

timercmp() takes "struct timeval" not "struct timespec".

> Christophe
> 
>>
>> My mistake, I am going to fix the test and re-post v5 of this set.
>>
>> Without my patch if you pass "highres=off" to the kernel (as a command line
>> parameter) it leads to a broken implementation of clock_getres since  
>> the value
>> of CLOCK_REALTIME_RES does not change at runtime.
>>
>> Expected result (with highres=off):
>>
>> # uname -r
>> 5.2.0-rc2
>> # ./vdso_clock_getres
>> clock_id: CLOCK_REALTIME [FAIL]
>> clock_id: CLOCK_BOOTTIME [PASS]
>> clock_id: CLOCK_TAI [PASS]
>> clock_id: CLOCK_REALTIME_COARSE [PASS]
>> clock_id: CLOCK_MONOTONIC [FAIL]
>> clock_id: CLOCK_MONOTONIC_RAW [PASS]
>> clock_id: CLOCK_MONOTONIC_COARSE [PASS]
>>
>> The reason of this behavior is that the only clocks supported by getres on
>> powerpc are CLOCK_REALTIME and CLOCK_MONOTONIC, the rest on the clocks use
>> always syscalls.
>>
>>> # uname -r
>>> 5.2.0-rc2-gcc-8.2.0
>>>
>>> # ./vdso_clock_getres
>>> clock_id: CLOCK_REALTIME [PASS]
>>> clock_id: CLOCK_BOOTTIME [PASS]
>>> clock_id: CLOCK_TAI [PASS]
>>> clock_id: CLOCK_REALTIME_COARSE [PASS]
>>> clock_id: CLOCK_MONOTONIC [PASS]
>>> clock_id: CLOCK_MONOTONIC_RAW [PASS]
>>> clock_id: CLOCK_MONOTONIC_COARSE [PASS]
>>>
>>> cheers
>>>
 Cc: Shuah Khan 
 Signed-off-by: Vincenzo Frascino 
 ---

 Note: This patch is independent from the others in this series, hence it
 can be merged singularly by the kselftest maintainers.

  tools/testing/selftests/vDSO/Makefile |   2 +
  .../selftests/vDSO/vdso_clock_getres.c| 124 ++
  2 files changed, 126 insertions(+)
  create mode 100644 tools/testing/selftests/vDSO/vdso_clock_getres.c
>>
>> --
>> Regards,
>> Vincenzo
> 
> 

-- 
Regards,
Vincenzo


Re: [PATCH v3 14/16] powerpc/32: implement fast entry for syscalls on BOOKE

2019-05-28 Thread Christophe Leroy

Michael Ellerman  a écrit :


Christophe Leroy  writes:

Le 23/05/2019 à 09:00, Christophe Leroy a écrit :

[...]


arch/powerpc/kernel/head_fsl_booke.o: In function `SystemCall':
arch/powerpc/kernel/head_fsl_booke.S:416: undefined reference to
`kvmppc_handler_BOOKE_INTERRUPT_SYSCALL_SPRN_SRR1'
Makefile:1052: recipe for target 'vmlinux' failed


+.macro SYSCALL_ENTRY trapno intno
+    mfspr    r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+    mtspr    SPRN_SPRG_WSCRATCH0, r10
+    stw    r11, THREAD_NORMSAVE(0)(r10)
+    stw    r13, THREAD_NORMSAVE(2)(r10)
+    mfcr    r13    /* save CR in r13 for now   */
+    mfspr    r11, SPRN_SRR1
+    mtocrf    0x80, r11    /* check MSR[GS] without clobbering reg */
+    bf    3, 1975f
+    b    kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1


It seems to me that the "_SPRN_SRR1" on the end of this line
isn't meant to be there...  However, it still fails to link with that
removed.


It looks like I missed the macro expansion.

The called function should be kvmppc_handler_8_0x01B

Seems like kisskb doesn't build any config like this.


I thought we did, ie:

http://kisskb.ellerman.id.au/kisskb/buildresult/13817941/


That's a ppc64 config it seems. The problem was on booke32.

Christophe



But clearly something is missing to trigger the bug.

cheers





Re: [PATCH v4 3/3] kselftest: Extend vDSO selftest to clock_getres

2019-05-28 Thread Christophe Leroy

Vincenzo Frascino  a écrit :


Hi Michael,

thank you for your reply.

On 28/05/2019 07:19, Michael Ellerman wrote:

Vincenzo Frascino  writes:


The current version of the multiarch vDSO selftest verifies only
gettimeofday.

Extend the vDSO selftest to clock_getres, to verify that the
syscall and the vDSO library function return the same information.

The extension has been used to verify the hrtimer_resoltion fix.


This is passing for me even without patch 1 applied, shouldn't it fail
without the fix? What am I missing?



This is correct, because during the refactoring process I missed an "n" :)

if·((x.tv_sec·!=·y.tv_sec)·||·(x.tv_sec·!=·y.tv_sec))

Should be:

if·((x.tv_sec·!=·y.tv_sec)·||·(x.tv_nsec·!=·y.tv_nsec))


Maybe you'd better use timercmp() from sys/time.h

Christophe



My mistake, I am going to fix the test and re-post v5 of this set.

Without my patch if you pass "highres=off" to the kernel (as a command line
parameter) it leads to a broken implementation of clock_getres since  
the value

of CLOCK_REALTIME_RES does not change at runtime.

Expected result (with highres=off):

# uname -r
5.2.0-rc2
# ./vdso_clock_getres
clock_id: CLOCK_REALTIME [FAIL]
clock_id: CLOCK_BOOTTIME [PASS]
clock_id: CLOCK_TAI [PASS]
clock_id: CLOCK_REALTIME_COARSE [PASS]
clock_id: CLOCK_MONOTONIC [FAIL]
clock_id: CLOCK_MONOTONIC_RAW [PASS]
clock_id: CLOCK_MONOTONIC_COARSE [PASS]

The reason of this behavior is that the only clocks supported by getres on
powerpc are CLOCK_REALTIME and CLOCK_MONOTONIC, the rest on the clocks use
always syscalls.


# uname -r
5.2.0-rc2-gcc-8.2.0

# ./vdso_clock_getres
clock_id: CLOCK_REALTIME [PASS]
clock_id: CLOCK_BOOTTIME [PASS]
clock_id: CLOCK_TAI [PASS]
clock_id: CLOCK_REALTIME_COARSE [PASS]
clock_id: CLOCK_MONOTONIC [PASS]
clock_id: CLOCK_MONOTONIC_RAW [PASS]
clock_id: CLOCK_MONOTONIC_COARSE [PASS]

cheers


Cc: Shuah Khan 
Signed-off-by: Vincenzo Frascino 
---

Note: This patch is independent from the others in this series, hence it
can be merged singularly by the kselftest maintainers.

 tools/testing/selftests/vDSO/Makefile |   2 +
 .../selftests/vDSO/vdso_clock_getres.c| 124 ++
 2 files changed, 126 insertions(+)
 create mode 100644 tools/testing/selftests/vDSO/vdso_clock_getres.c


--
Regards,
Vincenzo





Re: [PATCH v1 00/15] Fixing selftests failure on Talitos driver

2019-05-28 Thread Christophe Leroy

Horia Geanta  a écrit :


On 5/21/2019 4:34 PM, Christophe Leroy wrote:

Several test failures have popped up following recent changes to crypto
selftests.

This series fixes (most of) them.

The last three patches are trivial cleanups.


Thanks Christophe.

For the series:
Reviewed-by: Horia Geantă 

Have you validated the changes also on SEC 2.x+?
Asking since IIRC you mentioned having only HW with SEC 1 and  
changes in patch

"crypto: talitos - fix AEAD processing." look quite complex.


When I ported the driver to SEC1 some years ago I only had a SEC 1.2  
(mpc885) but I now have also a board with a mpc8321E which embeds a  
SEC 2.2 so I also tested the changes on it.


Christophe



Thanks,
Horia





Re: [PATCH] dlpar: Fix a missing-check bug in dlpar_parse_cc_property()

2019-05-28 Thread Nathan Lynch
Gen Zhang  writes:
> In dlpar_parse_cc_property(), 'prop->name' is allocated by kstrdup().
> kstrdup() may return NULL, so it should be checked and handle error.
> And prop should be freed if 'prop->name' is NULL.
>
> Signed-off-by: Gen Zhang 
> ---
> diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
> b/arch/powerpc/platforms/pseries/dlpar.c
> index 1795804..c852024 100644
> --- a/arch/powerpc/platforms/pseries/dlpar.c
> +++ b/arch/powerpc/platforms/pseries/dlpar.c
> @@ -61,6 +61,10 @@ static struct property *dlpar_parse_cc_property(struct 
> cc_workarea *ccwa)
>  
>   name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
>   prop->name = kstrdup(name, GFP_KERNEL);
> + if (!prop->name) {
> + dlpar_free_cc_property(prop);
> + return NULL;
> + }

Acked-by: Nathan Lynch 



Re: [PATCH v2] powerpc/32: sstep: Move variable `rc` within CONFIG_PPC64 sentinels

2019-05-28 Thread Mathieu Malaterre
On Tue, May 28, 2019 at 1:40 PM Michael Ellerman  wrote:
>
> Mathieu Malaterre  writes:
>
> > Fix warnings treated as errors with W=1:
> >
> >   arch/powerpc/lib/sstep.c:1172:31: error: variable 'rc' set but not used 
> > [-Werror=unused-but-set-variable]
> >
> > Suggested-by: Christophe Leroy 
> > Signed-off-by: Mathieu Malaterre 
> > ---
> > v2: as suggested prefer CONFIG_PPC64 sentinel instead of unused keyword
>
> I'd rather avoid adding more ifdefs if we can.
>
> I think this works?

It does ! ;)

Reviewed-by: Mathieu Malaterre 

> cheers
>
> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> index 3d33fb509ef4..600b036ddfda 100644
> --- a/arch/powerpc/lib/sstep.c
> +++ b/arch/powerpc/lib/sstep.c
> @@ -1169,7 +1169,7 @@ static nokprobe_inline int trap_compare(long v1, long 
> v2)
>  int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
>   unsigned int instr)
>  {
> -   unsigned int opcode, ra, rb, rc, rd, spr, u;
> +   unsigned int opcode, ra, rb, rd, spr, u;
> unsigned long int imm;
> unsigned long int val, val2;
> unsigned int mb, me, sh;
> @@ -1292,7 +1292,6 @@ int analyse_instr(struct instruction_op *op, const 
> struct pt_regs *regs,
> rd = (instr >> 21) & 0x1f;
> ra = (instr >> 16) & 0x1f;
> rb = (instr >> 11) & 0x1f;
> -   rc = (instr >> 6) & 0x1f;
>
> switch (opcode) {
>  #ifdef __powerpc64__
> @@ -1307,10 +1306,14 @@ int analyse_instr(struct instruction_op *op, const 
> struct pt_regs *regs,
> return 1;
>
>  #ifdef __powerpc64__
> -   case 4:
> +   case 4: {
> +   unsigned int rc;
> +
> if (!cpu_has_feature(CPU_FTR_ARCH_300))
> return -1;
>
> +   rc = (instr >> 6) & 0x1f;
> +
> switch (instr & 0x3f) {
> case 48:/* maddhd */
> asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
> @@ -1336,6 +1339,7 @@ int analyse_instr(struct instruction_op *op, const 
> struct pt_regs *regs,
>  * primary opcode which do not have emulation support yet.
>  */
> return -1;
> +   }
>  #endif
>
> case 7: /* mulli */


Re: [PATCH v2] powerpc/power: Expose pfn_is_nosave prototype

2019-05-28 Thread Rafael J. Wysocki
On Tuesday, May 28, 2019 3:16:30 AM CEST Michael Ellerman wrote:
> "Rafael J. Wysocki"  writes:
> > On Friday, May 24, 2019 12:44:18 PM CEST Mathieu Malaterre wrote:
> >> The declaration for pfn_is_nosave is only available in
> >> kernel/power/power.h. Since this function can be override in arch,
> >> expose it globally. Having a prototype will make sure to avoid warning
> >> (sometime treated as error with W=1) such as:
> >> 
> >>   arch/powerpc/kernel/suspend.c:18:5: error: no previous prototype for 
> >> 'pfn_is_nosave' [-Werror=missing-prototypes]
> >> 
> >> This moves the declaration into a globally visible header file and add
> >> missing include to avoid a warning on powerpc. Also remove the
> >> duplicated prototypes since not required anymore.
> >> 
> >> Cc: Christophe Leroy 
> >> Signed-off-by: Mathieu Malaterre 
> >> ---
> >> v2: As suggestion by christophe remove duplicates prototypes
> >> 
> >>  arch/powerpc/kernel/suspend.c | 1 +
> >>  arch/s390/kernel/entry.h  | 1 -
> >>  include/linux/suspend.h   | 1 +
> >>  kernel/power/power.h  | 2 --
> >>  4 files changed, 2 insertions(+), 3 deletions(-)
> >> 
> >> diff --git a/kernel/power/power.h b/kernel/power/power.h
> >> index 9e58bdc8a562..44bee462ff57 100644
> >> --- a/kernel/power/power.h
> >> +++ b/kernel/power/power.h
> >> @@ -75,8 +75,6 @@ static inline void hibernate_reserved_size_init(void) {}
> >>  static inline void hibernate_image_size_init(void) {}
> >>  #endif /* !CONFIG_HIBERNATION */
> >>  
> >> -extern int pfn_is_nosave(unsigned long);
> >> -
> >>  #define power_attr(_name) \
> >>  static struct kobj_attribute _name##_attr = { \
> >>.attr   = { \
> >> 
> >
> > With an ACK from the powerpc maintainers, I could apply this one.
> 
> Sent.

Thanks!





Re: [PATCH v2] mm: add account_locked_vm utility function

2019-05-28 Thread Daniel Jordan
On Sat, May 25, 2019 at 02:51:18PM -0700, Andrew Morton wrote:
> On Fri, 24 May 2019 13:50:45 -0400 Daniel Jordan  
> wrote:
> 
> > locked_vm accounting is done roughly the same way in five places, so
> > unify them in a helper.  Standardize the debug prints, which vary
> > slightly, but include the helper's caller to disambiguate between
> > callsites.
> > 
> > Error codes stay the same, so user-visible behavior does too.  The one
> > exception is that the -EPERM case in tce_account_locked_vm is removed
> > because Alexey has never seen it triggered.
> > 
> > ...
> >
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -1564,6 +1564,25 @@ long get_user_pages_unlocked(unsigned long start, 
> > unsigned long nr_pages,
> >  int get_user_pages_fast(unsigned long start, int nr_pages,
> > unsigned int gup_flags, struct page **pages);
> >  
> > +int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool 
> > inc,
> > +   struct task_struct *task, bool bypass_rlim);
> > +
> > +static inline int account_locked_vm(struct mm_struct *mm, unsigned long 
> > pages,
> > +   bool inc)
> > +{
> > +   int ret;
> > +
> > +   if (pages == 0 || !mm)
> > +   return 0;
> > +
> > +   down_write(>mmap_sem);
> > +   ret = __account_locked_vm(mm, pages, inc, current,
> > + capable(CAP_IPC_LOCK));
> > +   up_write(>mmap_sem);
> > +
> > +   return ret;
> > +}
> 
> That's quite a mouthful for an inlined function.  How about uninlining
> the whole thing and fiddling drivers/vfio/vfio_iommu_type1.c to suit. 
> I wonder why it does down_write_killable and whether it really needs
> to...

Sure, I can uninline it.  vfio changelogs don't show a particular reason for
_killable[1].  Maybe Alex has something to add.  Otherwise I'll respin without
it since the simplification seems worth removing _killable.

[1] 0cfef2b7410b ("vfio/type1: Remove locked page accounting workqueue")


Re: [PATCH v10 12/12] ima: Store the measurement again when appraising a modsig

2019-05-28 Thread Mimi Zohar
Hi Thiago,

On Thu, 2019-04-18 at 00:51 -0300, Thiago Jung Bauermann wrote:
> If the IMA template contains the "modsig" or "d-modsig" field, then the
> modsig should be added to the measurement list when the file is appraised.
> 
> And that is what normally happens, but if a measurement rule caused a file
> containing a modsig to be measured before a different rule causes it to be
> appraised, the resulting measurement entry will not contain the modsig
> because it is only fetched during appraisal. When the appraisal rule
> triggers, it won't store a new measurement containing the modsig because
> the file was already measured.
> 
> We need to detect that situation and store an additional measurement with
> the modsig. This is done by adding an IMA_MEASURE action flag if we read a
> modsig and the IMA template contains a modsig field.

With the new per policy rule "template" support being added, this
patch needs to be modified so that the per policy "template" format is
checked.  ima_template_has_modsig() should be called with the
template_desc being used.

thanks,

Mimi


> diff --git a/security/integrity/ima/ima_main.c 
> b/security/integrity/ima/ima_main.c
> index 8e6475854351..f91ed4189f98 100644
> --- a/security/integrity/ima/ima_main.c
> +++ b/security/integrity/ima/ima_main.c
> @@ -282,9 +282,17 @@ static int process_measurement(struct file *file, const 
> struct cred *cred,
>   /* read 'security.ima' */
>   xattr_len = ima_read_xattr(file_dentry(file), _value);
>  
> - /* Read the appended modsig if allowed by the policy. */
> - if (iint->flags & IMA_MODSIG_ALLOWED)
> - ima_read_modsig(func, buf, size, );
> + /*
> +  * Read the appended modsig, if allowed by the policy, and allow
> +  * an additional measurement list entry, if needed, based on the
> +  * template format.
> +  */
> + if (iint->flags & IMA_MODSIG_ALLOWED) {
> + rc = ima_read_modsig(func, buf, size, );
> +
> + if (!rc && ima_template_has_modsig())
> + action |= IMA_MEASURE;
> + }
> 



Re: [PATCH v7 1/1] iommu: enhance IOMMU dma mode build options

2019-05-28 Thread Leizhen (ThunderTown)



On 2019/5/27 22:21, Joerg Roedel wrote:
> Hi Zhen Lei,
> 
> On Mon, May 20, 2019 at 09:59:47PM +0800, Zhen Lei wrote:
>>  arch/ia64/kernel/pci-dma.c|  2 +-
>>  arch/powerpc/platforms/powernv/pci-ioda.c |  3 ++-
>>  arch/s390/pci/pci_dma.c   |  2 +-
>>  arch/x86/kernel/pci-dma.c |  7 ++---
>>  drivers/iommu/Kconfig | 44 
>> ++-
>>  drivers/iommu/amd_iommu_init.c|  3 ++-
>>  drivers/iommu/intel-iommu.c   |  2 +-
>>  drivers/iommu/iommu.c |  3 ++-
>>  8 files changed, 48 insertions(+), 18 deletions(-)
> 
> This needs Acks from the arch maintainers of ia64, powerpc, s390 and
> x86, at least.
> 
> It is easier for them if you split it up into the Kconfig change and
> separete patches per arch and per iommu driver. Then collect the Acks on
> the individual patches.

OK, thanks. I will do it tomorrow.

> 
> Thanks,
> 
>   Joerg
> 
> .
> 

-- 
Thanks!
BestRegards



Re: [alsa-devel] [PATCH] ASoC: fsl: sai: Fix clock source for mclk0

2019-05-28 Thread Daniel Baluta
On Sun, Apr 21, 2019 at 11:26 AM Nicolin Chen  wrote:
>
> On Sun, Apr 21, 2019 at 01:04:39AM -0700, Nicolin Chen wrote:
> > On Sun, Apr 21, 2019 at 10:26:40AM +0300, Daniel Baluta wrote:
> > > > Firstly, according to your commit message, neither imx8qm nor
> > > > imx6sx has an "mclk0" clock in the clock list. Either of them
> > > > starts with "mclk1". So, before you change the driver, I don't
> > > > think it's even a right thing to define an "mclk0" in the DT.
> > >
> > > From what I understand mclk0 means option 00b of MSEL bits which is:
> > > * busclk for i.MX8
> > > * mclk1 for i.MX6/7.
> >
> > MSEL bit is used for an internal clock MUX to select four clock
> > inputs. However,  these four clock inputs aren't exactly 1:1 of
> > SAI's inputs. As fas as I can tell, SAI only has one bus clock
> > and three MCLK[1-3]; the internal clock MUX maps the bus clock
> > or MCLK1 to its input0, and then linearly maps MCLK[1-3] to its
> > inputs[1-3]. So it doesn't sound right to me that you define an
> > "MCLK0" in the DT, as it's supposed to describe input clocks of
> > SAI block, other than its internal clock MUX's.
>
> Daniel, I think I's saying this too confident, though I do feel
> so :) But if you can prove me wrong and justify that there is an
> "MCLK0" as an external input of the SAI block, I will agree with
> this change.

Looking inside the RTL for SAI on i.MX8 I found that there
is a MUX with 4 inputs exactly as RM says:
- bus
- master clock 1
- master clock 2
- master clock 3

My point is that the DT is modelling the internal clock MUX
used for SAI to select its clock source.

thanks,
Daniel.


Re: [PATCH RFC 0/5] Remove some notrace RCU APIs

2019-05-28 Thread Paul E. McKenney
On Sat, May 25, 2019 at 02:14:07PM -0400, Joel Fernandes wrote:
> On Sat, May 25, 2019 at 08:50:35AM -0700, Paul E. McKenney wrote:
> > On Sat, May 25, 2019 at 10:19:54AM -0400, Joel Fernandes wrote:
> > > On Sat, May 25, 2019 at 07:08:26AM -0400, Steven Rostedt wrote:
> > > > On Sat, 25 May 2019 04:14:44 -0400
> > > > Joel Fernandes  wrote:
> > > > 
> > > > > > I guess the difference between the _raw_notrace and just _raw 
> > > > > > variants
> > > > > > is that _notrace ones do a rcu_check_sparse(). Don't we want to keep
> > > > > > that check?  
> > > > > 
> > > > > This is true.
> > > > > 
> > > > > Since the users of _raw_notrace are very few, is it worth keeping 
> > > > > this API
> > > > > just for sparse checking? The API naming is also confusing. I was 
> > > > > expecting
> > > > > _raw_notrace to do fewer checks than _raw, instead of more. Honestly, 
> > > > > I just
> > > > > want to nuke _raw_notrace as done in this series and later we can 
> > > > > introduce a
> > > > > sparse checking version of _raw if need-be. The other option could be 
> > > > > to
> > > > > always do sparse checking for _raw however that used to be the case 
> > > > > and got
> > > > > changed in 
> > > > > http://lists.infradead.org/pipermail/linux-afs/2016-July/001016.html
> > > > 
> > > > What if we just rename _raw to _raw_nocheck, and _raw_notrace to _raw ?
> > > 
> > > That would also mean changing 160 usages of _raw to _raw_nocheck in the
> > > kernel :-/.
> > > 
> > > The tracing usage of _raw_notrace is only like 2 or 3 users. Can we just 
> > > call
> > > rcu_check_sparse directly in the calling code for those and eliminate the 
> > > APIs?
> > > 
> > > I wonder what Paul thinks about the matter as well.
> > 
> > My thought is that it is likely that a goodly number of the current uses
> > of _raw should really be some form of _check, with lockdep expressions
> > spelled out.  Not that working out what exactly those lockdep expressions
> > should be is necessarily a trivial undertaking.  ;-)
> 
> Yes, currently where I am a bit stuck is the rcu_dereference_raw()
> cannot possibly know what SRCU domain it is under, so lockdep cannot check if
> an SRCU lock is held without the user also passing along the SRCU domain. I
> am trying to change lockdep to see if it can check if *any* srcu domain lock
> is held (regardless of which one) and complain if none are. This is at least
> better than no check at all.
> 
> However, I think it gets tricky for mutexes. If you have something like:
> mutex_lock(some_mutex);
> p = rcu_dereference_raw(gp);
> mutex_unlock(some_mutex);
> 
> This might be a perfectly valid invocation of _raw, however my checks (patch
> is still cooking) trigger a lockdep warning becase _raw cannot know that this
> is Ok. lockdep thinks it is not in a reader section. This then gets into the
> territory of a new rcu_derference_raw_protected(gp, assert_held(some_mutex))
> which sucks because its yet another API. To circumvent this issue, can we
> just have callers of rcu_dereference_raw ensure that they call
> rcu_read_lock() if they are protecting dereferences by a mutex? That would
> make things a lot easier and also may be Ok since rcu_read_lock is quite
> cheap.

Why not just rcu_dereference_protected(lockdep_is_held(some_mutex))?
The API is already there, and no need for spurious readers.

> > That aside, if we are going to change the name of an API that is
> > used 160 places throughout the tree, we would need to have a pretty
> > good justification.  Without such a justification, it will just look
> > like pointless churn to the various developers and maintainers on the
> > receiving end of the patches.
> 
> Actually, the API name change is not something I want to do, it is Steven
> suggestion. My suggestion is let us just delete _raw_notrace and just use the
> _raw API for tracing, since _raw doesn't do any tracing anyway. Steve pointed
> that _raw_notrace does sparse checking unlike _raw, but I think that isn't an
> issue since _raw doesn't do such checking at the moment anyway.. (if possible
> check my cover letter again for details/motivation of this series).

Understood, but regardless of who suggested it, if we are to go through
with it, good justification will be required.  ;-)

Thanx, Paul

> thanks!
> 
>  - Joel
> 
> > Thanx, Paul
> > 
> > > thanks, Steven!
> > > 
> > 
> 



Re: [PATCH] powerpc/configs: Rename foo_basic_defconfig to foo_base.config

2019-05-28 Thread Christoph Hellwig
On Tue, May 28, 2019 at 06:16:14PM +1000, Michael Ellerman wrote:
> We have several "defconfigs" that are not actually full defconfigs
> they are just a base set of options which are then merged with other
> fragments to produce a working defconfig.
> 
> The most obvious example is corenet_basic_defconfig which only
> contains one symbol CONFIG_CORENET_GENERIC=y. But there is also
> mpc85xx_base_defconfig which doesn't actually enable CONFIG_PPC_85xx.
> 
> To avoid confusion, rename these config fragments to "foo_base.config"
> to make it clearer that they are not full defconfigs.

Adding linux-kbuild, maybe we can make the handling of these fragments
generic and actually document it..

>
> Reported-by: Christophe Leroy 
> Signed-off-by: Michael Ellerman 
> ---
>  arch/powerpc/Makefile| 12 ++--
>  .../{corenet_basic_defconfig => corenet_base.config} |  0
>  .../{mpc85xx_basic_defconfig => mpc85xx_base.config} |  0
>  .../{mpc86xx_basic_defconfig => mpc86xx_base.config} |  0
>  4 files changed, 6 insertions(+), 6 deletions(-)
>  rename arch/powerpc/configs/{corenet_basic_defconfig => corenet_base.config} 
> (100%)
>  rename arch/powerpc/configs/{mpc85xx_basic_defconfig => mpc85xx_base.config} 
> (100%)
>  rename arch/powerpc/configs/{mpc86xx_basic_defconfig => mpc86xx_base.config} 
> (100%)
> 
> diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
> index c345b79414a9..94f735db2229 100644
> --- a/arch/powerpc/Makefile
> +++ b/arch/powerpc/Makefile
> @@ -333,32 +333,32 @@ PHONY += powernv_be_defconfig
>  
>  PHONY += mpc85xx_defconfig
>  mpc85xx_defconfig:
> - $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
> + $(call merge_into_defconfig,mpc85xx_base.config,\
>   85xx-32bit 85xx-hw fsl-emb-nonhw)
>  
>  PHONY += mpc85xx_smp_defconfig
>  mpc85xx_smp_defconfig:
> - $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
> + $(call merge_into_defconfig,mpc85xx_base.config,\
>   85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
>  
>  PHONY += corenet32_smp_defconfig
>  corenet32_smp_defconfig:
> - $(call merge_into_defconfig,corenet_basic_defconfig,\
> + $(call merge_into_defconfig,corenet_base.config,\
>   85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
>  
>  PHONY += corenet64_smp_defconfig
>  corenet64_smp_defconfig:
> - $(call merge_into_defconfig,corenet_basic_defconfig,\
> + $(call merge_into_defconfig,corenet_base.config,\
>   85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
>  
>  PHONY += mpc86xx_defconfig
>  mpc86xx_defconfig:
> - $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
> + $(call merge_into_defconfig,mpc86xx_base.config,\
>   86xx-hw fsl-emb-nonhw)
>  
>  PHONY += mpc86xx_smp_defconfig
>  mpc86xx_smp_defconfig:
> - $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
> + $(call merge_into_defconfig,mpc86xx_base.config,\
>   86xx-smp 86xx-hw fsl-emb-nonhw)
>  
>  PHONY += ppc32_allmodconfig
> diff --git a/arch/powerpc/configs/corenet_basic_defconfig 
> b/arch/powerpc/configs/corenet_base.config
> similarity index 100%
> rename from arch/powerpc/configs/corenet_basic_defconfig
> rename to arch/powerpc/configs/corenet_base.config
> diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig 
> b/arch/powerpc/configs/mpc85xx_base.config
> similarity index 100%
> rename from arch/powerpc/configs/mpc85xx_basic_defconfig
> rename to arch/powerpc/configs/mpc85xx_base.config
> diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig 
> b/arch/powerpc/configs/mpc86xx_base.config
> similarity index 100%
> rename from arch/powerpc/configs/mpc86xx_basic_defconfig
> rename to arch/powerpc/configs/mpc86xx_base.config
> -- 
> 2.20.1
> 
---end quoted text---


[PATCH v5 3/3] kselftest: Extend vDSO selftest to clock_getres

2019-05-28 Thread Vincenzo Frascino
The current version of the multiarch vDSO selftest verifies only
gettimeofday.

Extend the vDSO selftest to clock_getres, to verify that the
syscall and the vDSO library function return the same information.

The extension has been used to verify the hrtimer_resoltion fix.

Cc: Shuah Khan 
Signed-off-by: Vincenzo Frascino 
---

Note: This patch is independent from the others in this series, hence it
can be merged singularly by the kselftest maintainers.

 tools/testing/selftests/vDSO/Makefile |   2 +
 .../selftests/vDSO/vdso_clock_getres.c| 124 ++
 2 files changed, 126 insertions(+)
 create mode 100644 tools/testing/selftests/vDSO/vdso_clock_getres.c

diff --git a/tools/testing/selftests/vDSO/Makefile 
b/tools/testing/selftests/vDSO/Makefile
index 9e03d61f52fd..d5c5bfdf1ac1 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -5,6 +5,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
 TEST_GEN_PROGS := $(OUTPUT)/vdso_test
+TEST_GEN_PROGS += $(OUTPUT)/vdso_clock_getres
 ifeq ($(ARCH),x86)
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
@@ -18,6 +19,7 @@ endif
 
 all: $(TEST_GEN_PROGS)
 $(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_clock_getres: vdso_clock_getres.c
 $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
vdso_standalone_test_x86.c parse_vdso.c \
diff --git a/tools/testing/selftests/vDSO/vdso_clock_getres.c 
b/tools/testing/selftests/vDSO/vdso_clock_getres.c
new file mode 100644
index ..15dcee16ff72
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_clock_getres.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ * vdso_clock_getres.c: Sample code to test clock_getres.
+ * Copyright (c) 2019 Arm Ltd.
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_clock_getres.c
+ *
+ * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit),
+ * Power (32-bit and 64-bit), S390x (32-bit and 64-bit).
+ * Might work on other architectures.
+ */
+
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../kselftest.h"
+
+static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts)
+{
+   long ret;
+
+   ret = syscall(SYS_clock_getres, _clkid, _ts);
+
+   return ret;
+}
+
+const char *vdso_clock_name[12] = {
+   "CLOCK_REALTIME",
+   "CLOCK_MONOTONIC",
+   "CLOCK_PROCESS_CPUTIME_ID",
+   "CLOCK_THREAD_CPUTIME_ID",
+   "CLOCK_MONOTONIC_RAW",
+   "CLOCK_REALTIME_COARSE",
+   "CLOCK_MONOTONIC_COARSE",
+   "CLOCK_BOOTTIME",
+   "CLOCK_REALTIME_ALARM",
+   "CLOCK_BOOTTIME_ALARM",
+   "CLOCK_SGI_CYCLE",
+   "CLOCK_TAI",
+};
+
+/*
+ * This function calls clock_getres in vdso and by system call
+ * with different values for clock_id.
+ *
+ * Example of output:
+ *
+ * clock_id: CLOCK_REALTIME [PASS]
+ * clock_id: CLOCK_BOOTTIME [PASS]
+ * clock_id: CLOCK_TAI [PASS]
+ * clock_id: CLOCK_REALTIME_COARSE [PASS]
+ * clock_id: CLOCK_MONOTONIC [PASS]
+ * clock_id: CLOCK_MONOTONIC_RAW [PASS]
+ * clock_id: CLOCK_MONOTONIC_COARSE [PASS]
+ */
+static inline int vdso_test_clock(unsigned int clock_id)
+{
+   struct timespec x, y;
+
+   printf("clock_id: %s", vdso_clock_name[clock_id]);
+   clock_getres(clock_id, );
+   syscall_clock_getres(clock_id, );
+
+   if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) {
+   printf(" [FAIL]\n");
+   return KSFT_FAIL;
+   }
+
+   printf(" [PASS]\n");
+   return KSFT_PASS;
+}
+
+int main(int argc, char **argv)
+{
+   int ret;
+
+#if _POSIX_TIMERS > 0
+
+#ifdef CLOCK_REALTIME
+   ret = vdso_test_clock(CLOCK_REALTIME);
+#endif
+
+#ifdef CLOCK_BOOTTIME
+   ret += vdso_test_clock(CLOCK_BOOTTIME);
+#endif
+
+#ifdef CLOCK_TAI
+   ret += vdso_test_clock(CLOCK_TAI);
+#endif
+
+#ifdef CLOCK_REALTIME_COARSE
+   ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+   ret += vdso_test_clock(CLOCK_MONOTONIC);
+#endif
+
+#ifdef CLOCK_MONOTONIC_RAW
+   ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
+#endif
+
+#ifdef CLOCK_MONOTONIC_COARSE
+   ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+#endif
+
+#endif
+   if (ret > 0)
+   return KSFT_FAIL;
+
+   return KSFT_PASS;
+}
-- 
2.21.0



[PATCH v5 2/3] s390: Fix vDSO clock_getres()

2019-05-28 Thread Vincenzo Frascino
clock_getres in the vDSO library has to preserve the same behaviour
of posix_get_hrtimer_res().

In particular, posix_get_hrtimer_res() does:
sec = 0;
ns = hrtimer_resolution;
and hrtimer_resolution depends on the enablement of the high
resolution timers that can happen either at compile or at run time.

Fix the s390 vdso implementation of clock_getres keeping a copy of
hrtimer_resolution in vdso data and using that directly.

Cc: Martin Schwidefsky 
Cc: Heiko Carstens 
Signed-off-by: Vincenzo Frascino 
Acked-by: Martin Schwidefsky 
---

Note: This patch is independent from the others in this series, hence it
can be merged singularly by the s390 maintainers.

 arch/s390/include/asm/vdso.h   |  1 +
 arch/s390/kernel/asm-offsets.c |  2 +-
 arch/s390/kernel/time.c|  1 +
 arch/s390/kernel/vdso32/clock_getres.S | 12 +++-
 arch/s390/kernel/vdso64/clock_getres.S | 10 +-
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 169d7604eb80..f3ba84fa9bd1 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -36,6 +36,7 @@ struct vdso_data {
__u32 tk_shift; /* Shift used for xtime_nsec0x60 */
__u32 ts_dir;   /* TOD steering direction   0x64 */
__u64 ts_end;   /* TOD steering end 0x68 */
+   __u32 hrtimer_res;  /* hrtimer resolution   0x70 */
 };
 
 struct vdso_per_cpu_data {
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 41ac4ad21311..4a229a60b24a 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ int main(void)
OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir);
OFFSET(__VDSO_TS_END, vdso_data, ts_end);
+   OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
@@ -87,7 +88,6 @@ int main(void)
DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
-   DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
BLANK();
/* idle data offsets */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e8766beee5ad..8ea9db599d38 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -310,6 +310,7 @@ void update_vsyscall(struct timekeeper *tk)
 
vdso_data->tk_mult = tk->tkr_mono.mult;
vdso_data->tk_shift = tk->tkr_mono.shift;
+   vdso_data->hrtimer_res = hrtimer_resolution;
smp_wmb();
++vdso_data->tb_update_count;
 }
diff --git a/arch/s390/kernel/vdso32/clock_getres.S 
b/arch/s390/kernel/vdso32/clock_getres.S
index eaf9cf1417f6..fecd7684c645 100644
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ b/arch/s390/kernel/vdso32/clock_getres.S
@@ -18,20 +18,22 @@
 __kernel_clock_getres:
CFI_STARTPROC
basr%r1,0
-   la  %r1,4f-.(%r1)
+10:al  %r1,4f-10b(%r1)
+   l   %r0,__VDSO_CLOCK_REALTIME_RES(%r1)
chi %r2,__CLOCK_REALTIME
je  0f
chi %r2,__CLOCK_MONOTONIC
je  0f
-   la  %r1,5f-4f(%r1)
+   basr%r1,0
+   la  %r1,5f-.(%r1)
+   l   %r0,0(%r1)
chi %r2,__CLOCK_REALTIME_COARSE
je  0f
chi %r2,__CLOCK_MONOTONIC_COARSE
jne 3f
 0: ltr %r3,%r3
jz  2f  /* res == NULL */
-1: l   %r0,0(%r1)
-   xc  0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
+1: xc  0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
st  %r0,4(%r3)  /* store tp->tv_usec */
 2: lhi %r2,0
br  %r14
@@ -39,6 +41,6 @@ __kernel_clock_getres:
svc 0
br  %r14
CFI_ENDPROC
-4: .long   __CLOCK_REALTIME_RES
+4: .long   _vdso_data - 10b
 5: .long   __CLOCK_COARSE_RES
.size   __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_getres.S 
b/arch/s390/kernel/vdso64/clock_getres.S
index 081435398e0a..022b58c980db 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -17,12 +17,14 @@
.type  __kernel_clock_getres,@function
 __kernel_clock_getres:
CFI_STARTPROC
-   larl%r1,4f
+   larl%r1,3f
+   lg  %r0,0(%r1)
cghi%r2,__CLOCK_REALTIME_COARSE
je  0f
cghi%r2,__CLOCK_MONOTONIC_COARSE
je  0f
-

[PATCH v5 1/3] powerpc: Fix vDSO clock_getres()

2019-05-28 Thread Vincenzo Frascino
clock_getres in the vDSO library has to preserve the same behaviour
of posix_get_hrtimer_res().

In particular, posix_get_hrtimer_res() does:
sec = 0;
ns = hrtimer_resolution;
and hrtimer_resolution depends on the enablement of the high
resolution timers that can happen either at compile or at run time.

Fix the powerpc vdso implementation of clock_getres keeping a copy of
hrtimer_resolution in vdso data and using that directly.

Fixes: a7f290dad32e ("[PATCH] powerpc: Merge vdso's and add vdso support
to 32 bits kernel")
Cc: sta...@vger.kernel.org
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Signed-off-by: Vincenzo Frascino 
Reviewed-by: Christophe Leroy 
---

Note: This patch is independent from the others in this series, hence it
can be merged singularly by the powerpc maintainers.

 arch/powerpc/include/asm/vdso_datapage.h  | 2 ++
 arch/powerpc/kernel/asm-offsets.c | 2 +-
 arch/powerpc/kernel/time.c| 1 +
 arch/powerpc/kernel/vdso32/gettimeofday.S | 7 +--
 arch/powerpc/kernel/vdso64/gettimeofday.S | 7 +--
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso_datapage.h 
b/arch/powerpc/include/asm/vdso_datapage.h
index bbc06bd72b1f..4333b9a473dc 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -86,6 +86,7 @@ struct vdso_data {
__s32 wtom_clock_nsec;  /* Wall to monotonic clock nsec 
*/
__s64 wtom_clock_sec;   /* Wall to monotonic clock sec 
*/
struct timespec stamp_xtime;/* xtime as at tb_orig_stamp */
+   __u32 hrtimer_res;  /* hrtimer resolution */
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls  */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
 };
@@ -107,6 +108,7 @@ struct vdso_data {
__s32 wtom_clock_nsec;
struct timespec stamp_xtime;/* xtime as at tb_orig_stamp */
__u32 stamp_sec_fraction;   /* fractional seconds of stamp_xtime */
+   __u32 hrtimer_res;  /* hrtimer resolution */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 dcache_block_size;/* L1 d-cache block size */
__u32 icache_block_size;/* L1 i-cache block size */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 8e02444e9d3d..dfc40f29f2b9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -389,6 +389,7 @@ int main(void)
OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+   OFFSET(CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
@@ -419,7 +420,6 @@ int main(void)
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
-   DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
 
 #ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 325d60633dfa..4ea4e9d7a58e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -963,6 +963,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
vdso_data->stamp_xtime = xt;
vdso_data->stamp_sec_fraction = frac_sec;
+   vdso_data->hrtimer_res = hrtimer_resolution;
smp_wmb();
++(vdso_data->tb_update_count);
 }
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S 
b/arch/powerpc/kernel/vdso32/gettimeofday.S
index afd516b572f8..2b5f9e83c610 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -160,12 +160,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
crorcr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
 
+   mflrr12
+  .cfi_register lr,r12
+   bl  __get_datapage@local
+   lwz r5,CLOCK_REALTIME_RES(r3)
+   mtlrr12
li  r3,0
cmpli   cr0,r4,0
crclr   cr0*4+so
beqlr
-   lis r5,CLOCK_REALTIME_RES@h
-   ori r5,r5,CLOCK_REALTIME_RES@l
stw r3,TSPC32_TV_SEC(r4)
stw r5,TSPC32_TV_NSEC(r4)
blr
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S 
b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 1f324c28705b..f07730f73d5e 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -190,12 +190,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
cror

[PATCH v5 0/3] Fix vDSO clock_getres()

2019-05-28 Thread Vincenzo Frascino
clock_getres in the vDSO library has to preserve the same behaviour
of posix_get_hrtimer_res().

In particular, posix_get_hrtimer_res() does:
sec = 0;
ns = hrtimer_resolution;
and hrtimer_resolution depends on the enablement of the high
resolution timers that can happen either at compile or at run time.

A possible fix is to change the vdso implementation of clock_getres,
keeping a copy of hrtimer_resolution in vdso data and using that
directly [1].

This patchset implements the proposed fix for arm64, powerpc, s390,
nds32 and adds a test to verify that the syscall and the vdso library
implementation of clock_getres return the same values.

Even if these patches are unified by the same topic, there is no
dependency between them, hence they can be merged singularly by each
arch maintainer.

Note: arm64 and nds32 respective fixes have been merged in 5.2-rc1,
hence they have been removed from this series.

[1] https://marc.info/?l=linux-arm-kernel=155110381930196=2

Changes:

v5:
  - Rebased on 5.2-rc2
  - Fixed a bug in kselftest.
v4:
  - Address review comments.
v3:
  - Rebased on 5.2-rc1.
  - Address review comments.
v2:
  - Rebased on 5.1-rc5.
  - Addressed review comments.

Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Martin Schwidefsky 
Cc: Heiko Carstens 
Cc: Shuah Khan 
Cc: Thomas Gleixner 
Cc: Arnd Bergmann 
Signed-off-by: Vincenzo Frascino 

Vincenzo Frascino (3):
  powerpc: Fix vDSO clock_getres()
  s390: Fix vDSO clock_getres()
  kselftest: Extend vDSO selftest to clock_getres

 arch/powerpc/include/asm/vdso_datapage.h  |   2 +
 arch/powerpc/kernel/asm-offsets.c |   2 +-
 arch/powerpc/kernel/time.c|   1 +
 arch/powerpc/kernel/vdso32/gettimeofday.S |   7 +-
 arch/powerpc/kernel/vdso64/gettimeofday.S |   7 +-
 arch/s390/include/asm/vdso.h  |   1 +
 arch/s390/kernel/asm-offsets.c|   2 +-
 arch/s390/kernel/time.c   |   1 +
 arch/s390/kernel/vdso32/clock_getres.S|  12 +-
 arch/s390/kernel/vdso64/clock_getres.S|  10 +-
 tools/testing/selftests/vDSO/Makefile |   2 +
 .../selftests/vDSO/vdso_clock_getres.c| 124 ++
 12 files changed, 155 insertions(+), 16 deletions(-)
 create mode 100644 tools/testing/selftests/vDSO/vdso_clock_getres.c

-- 
2.21.0



Re: [PATCH v4 3/3] kselftest: Extend vDSO selftest to clock_getres

2019-05-28 Thread Vincenzo Frascino
Hi Michael,

thank you for your reply.

On 28/05/2019 07:19, Michael Ellerman wrote:
> Vincenzo Frascino  writes:
> 
>> The current version of the multiarch vDSO selftest verifies only
>> gettimeofday.
>>
>> Extend the vDSO selftest to clock_getres, to verify that the
>> syscall and the vDSO library function return the same information.
>>
>> The extension has been used to verify the hrtimer_resoltion fix.
> 
> This is passing for me even without patch 1 applied, shouldn't it fail
> without the fix? What am I missing?
> 

This is correct, because during the refactoring process I missed an "n" :)

if·((x.tv_sec·!=·y.tv_sec)·||·(x.tv_sec·!=·y.tv_sec))

Should be:

if·((x.tv_sec·!=·y.tv_sec)·||·(x.tv_nsec·!=·y.tv_nsec))

My mistake, I am going to fix the test and re-post v5 of this set.

Without my patch if you pass "highres=off" to the kernel (as a command line
parameter) it leads to a broken implementation of clock_getres since the value
of CLOCK_REALTIME_RES does not change at runtime.

Expected result (with highres=off):

# uname -r
5.2.0-rc2
# ./vdso_clock_getres
clock_id: CLOCK_REALTIME [FAIL]
clock_id: CLOCK_BOOTTIME [PASS]
clock_id: CLOCK_TAI [PASS]
clock_id: CLOCK_REALTIME_COARSE [PASS]
clock_id: CLOCK_MONOTONIC [FAIL]
clock_id: CLOCK_MONOTONIC_RAW [PASS]
clock_id: CLOCK_MONOTONIC_COARSE [PASS]

The reason of this behavior is that the only clocks supported by getres on
powerpc are CLOCK_REALTIME and CLOCK_MONOTONIC, the rest on the clocks use
always syscalls.

> # uname -r
> 5.2.0-rc2-gcc-8.2.0
> 
> # ./vdso_clock_getres
> clock_id: CLOCK_REALTIME [PASS]
> clock_id: CLOCK_BOOTTIME [PASS]
> clock_id: CLOCK_TAI [PASS]
> clock_id: CLOCK_REALTIME_COARSE [PASS]
> clock_id: CLOCK_MONOTONIC [PASS]
> clock_id: CLOCK_MONOTONIC_RAW [PASS]
> clock_id: CLOCK_MONOTONIC_COARSE [PASS]
> 
> cheers
> 
>> Cc: Shuah Khan 
>> Signed-off-by: Vincenzo Frascino 
>> ---
>>
>> Note: This patch is independent from the others in this series, hence it
>> can be merged singularly by the kselftest maintainers.
>>
>>  tools/testing/selftests/vDSO/Makefile |   2 +
>>  .../selftests/vDSO/vdso_clock_getres.c| 124 ++
>>  2 files changed, 126 insertions(+)
>>  create mode 100644 tools/testing/selftests/vDSO/vdso_clock_getres.c

-- 
Regards,
Vincenzo


Re: [PATCH v2] powerpc/32: sstep: Move variable `rc` within CONFIG_PPC64 sentinels

2019-05-28 Thread Michael Ellerman
Mathieu Malaterre  writes:

> Fix warnings treated as errors with W=1:
>
>   arch/powerpc/lib/sstep.c:1172:31: error: variable 'rc' set but not used 
> [-Werror=unused-but-set-variable]
>
> Suggested-by: Christophe Leroy 
> Signed-off-by: Mathieu Malaterre 
> ---
> v2: as suggested prefer CONFIG_PPC64 sentinel instead of unused keyword

I'd rather avoid adding more ifdefs if we can.

I think this works?

cheers

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 3d33fb509ef4..600b036ddfda 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1169,7 +1169,7 @@ static nokprobe_inline int trap_compare(long v1, long v2)
 int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
  unsigned int instr)
 {
-   unsigned int opcode, ra, rb, rc, rd, spr, u;
+   unsigned int opcode, ra, rb, rd, spr, u;
unsigned long int imm;
unsigned long int val, val2;
unsigned int mb, me, sh;
@@ -1292,7 +1292,6 @@ int analyse_instr(struct instruction_op *op, const struct 
pt_regs *regs,
rd = (instr >> 21) & 0x1f;
ra = (instr >> 16) & 0x1f;
rb = (instr >> 11) & 0x1f;
-   rc = (instr >> 6) & 0x1f;
 
switch (opcode) {
 #ifdef __powerpc64__
@@ -1307,10 +1306,14 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
return 1;
 
 #ifdef __powerpc64__
-   case 4:
+   case 4: {
+   unsigned int rc;
+
if (!cpu_has_feature(CPU_FTR_ARCH_300))
return -1;
 
+   rc = (instr >> 6) & 0x1f;
+
switch (instr & 0x3f) {
case 48:/* maddhd */
asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
@@ -1336,6 +1339,7 @@ int analyse_instr(struct instruction_op *op, const struct 
pt_regs *regs,
 * primary opcode which do not have emulation support yet.
 */
return -1;
+   }
 #endif
 
case 7: /* mulli */


Re: [PATCH] [RFC] Remove bdflush syscall stub

2019-05-28 Thread Florian Weimer
* Cyril Hrubis:

> Hi!
>> > I've tested the patch on i386. Before the patch calling bdflush() with
>> > attempt to tune a variable returned 0 and after the patch the syscall
>> > fails with EINVAL.
>> 
>> Should be ENOSYS, doesn't it?
>
> My bad, the LTP syscall wrapper handles ENOSYS and produces skipped
> results based on that.
>
> EINVAL is what you get for not yet implemented syscalls, i.e. new
> syscall on old kernel.

EINVAL?  Is that a bdflush-specific thing, test-specific, or is itmore
general?

glibc has fallback paths that test for ENOSYS only.  EINVAL will be
passed to the application, skipping fallback.  For missing system calls,
this is not what we want.

Thanks,
Florian


Re: [PATCH] [RFC] Remove bdflush syscall stub

2019-05-28 Thread Andreas Schwab
On Mai 28 2019, Cyril Hrubis  wrote:

> I've tested the patch on i386. Before the patch calling bdflush() with
> attempt to tune a variable returned 0 and after the patch the syscall
> fails with EINVAL.

Should be ENOSYS, doesn't it?

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [PATCH v2 2/2] tests: add close_range() tests

2019-05-28 Thread Christian Brauner
On Tue, May 28, 2019 at 12:33:41PM +1000, Michael Ellerman wrote:
> Christian Brauner  writes:
> > This adds basic tests for the new close_range() syscall.
> > - test that no invalid flags can be passed
> > - test that a range of file descriptors is correctly closed
> > - test that a range of file descriptors is correctly closed if there there
> >   are already closed file descriptors in the range
> > - test that max_fd is correctly capped to the current fdtable maximum
> >
> > Signed-off-by: Christian Brauner 
> > Cc: Arnd Bergmann 
> > Cc: Jann Horn 
> > Cc: David Howells 
> > Cc: Dmitry V. Levin 
> > Cc: Oleg Nesterov 
> > Cc: Linus Torvalds 
> > Cc: Florian Weimer 
> > Cc: linux-...@vger.kernel.org
> > ---
> > v1: unchanged
> > v2:
> > - Christian Brauner :
> >   - verify that close_range() correctly closes a single file descriptor
> > ---
> >  tools/testing/selftests/Makefile  |   1 +
> >  tools/testing/selftests/core/.gitignore   |   1 +
> >  tools/testing/selftests/core/Makefile |   6 +
> >  .../testing/selftests/core/close_range_test.c | 142 ++
> >  4 files changed, 150 insertions(+)
> >  create mode 100644 tools/testing/selftests/core/.gitignore
> >  create mode 100644 tools/testing/selftests/core/Makefile
> >  create mode 100644 tools/testing/selftests/core/close_range_test.c
> >
> > diff --git a/tools/testing/selftests/core/.gitignore 
> > b/tools/testing/selftests/core/.gitignore
> > new file mode 100644
> > index ..6e6712ce5817
> > --- /dev/null
> > +++ b/tools/testing/selftests/core/.gitignore
> > @@ -0,0 +1 @@
> > +close_range_test
> > diff --git a/tools/testing/selftests/core/Makefile 
> > b/tools/testing/selftests/core/Makefile
> > new file mode 100644
> > index ..de3ae68aa345
> > --- /dev/null
> > +++ b/tools/testing/selftests/core/Makefile
> > @@ -0,0 +1,6 @@
> > +CFLAGS += -g -I../../../../usr/include/ -I../../../../include
> 
> Your second -I pulls the unexported kernel headers in, userspace
> programs shouldn't include unexported kernel headers.
> 
> It breaks the build on powerpc with eg:
> 
>   powerpc64le-linux-gnu-gcc -g -I../../../../usr/include/ 
> -I../../../../includeclose_range_test.c  -o 
> /output/kselftest/core/close_range_test
>   In file included from 
> /usr/powerpc64le-linux-gnu/include/bits/fcntl-linux.h:346,
>from /usr/powerpc64le-linux-gnu/include/bits/fcntl.h:62,
>from /usr/powerpc64le-linux-gnu/include/fcntl.h:35,
>from close_range_test.c:5:
>   ../../../../include/linux/falloc.h:13:2: error: unknown type name '__s16'
> __s16  l_type;
> ^
> 
> 
> Did you do that on purpose or just copy it from one of the other
> Makefiles? :)

I originally did that on purpose because checkpatch was yammering on
about me not having used ARRAY_SIZE(). But that include can go, you are
right.

Christian


Re: [PATCH 1/2] perf ioctl: Add check for the sample_period value

2019-05-28 Thread Michael Ellerman
Ravi Bangoria  writes:
> On 5/13/19 2:26 PM, Peter Zijlstra wrote:
>> On Mon, May 13, 2019 at 09:42:13AM +0200, Peter Zijlstra wrote:
>>> On Sat, May 11, 2019 at 08:12:16AM +0530, Ravi Bangoria wrote:
 Add a check for sample_period value sent from userspace. Negative
 value does not make sense. And in powerpc arch code this could cause
 a recursive PMI leading to a hang (reported when running perf-fuzzer).

 Signed-off-by: Ravi Bangoria 
 ---
  kernel/events/core.c | 3 +++
  1 file changed, 3 insertions(+)

 diff --git a/kernel/events/core.c b/kernel/events/core.c
 index abbd4b3b96c2..e44c90378940 100644
 --- a/kernel/events/core.c
 +++ b/kernel/events/core.c
 @@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event 
 *event, u64 __user *arg)
if (perf_event_check_period(event, value))
return -EINVAL;
  
 +  if (!event->attr.freq && (value & (1ULL << 63)))
 +  return -EINVAL;
>>>
>>> Well, perf_event_attr::sample_period is __u64. Would not be the site
>>> using it as signed be the one in error?
>> 
>> You forgot to mention commit: 0819b2e30ccb9, so I guess this just makes
>> it consistent and is fine.
>> 
>
> Yeah, I was about to reply :)

I've taken patch 2. You should probably do a v2 of patch 1 with an
updated change log that explains things fully?

cheers


Re: [PATCH v2] mm: hwpoison: disable memory error handling on 1GB hugepage

2019-05-28 Thread Wanpeng Li
Cc Paolo,
Hi all,
On Wed, 14 Feb 2018 at 06:34, Mike Kravetz  wrote:
>
> On 02/12/2018 06:48 PM, Michael Ellerman wrote:
> > Andrew Morton  writes:
> >
> >> On Thu, 08 Feb 2018 12:30:45 + Punit Agrawal  
> >> wrote:
> >>
> 
>  So I don't think that the above test result means that errors are 
>  properly
>  handled, and the proposed patch should help for arm64.
> >>>
> >>> Although, the deviation of pud_huge() avoids a kernel crash the code
> >>> would be easier to maintain and reason about if arm64 helpers are
> >>> consistent with expectations by core code.
> >>>
> >>> I'll look to update the arm64 helpers once this patch gets merged. But
> >>> it would be helpful if there was a clear expression of semantics for
> >>> pud_huge() for various cases. Is there any version that can be used as
> >>> reference?
> >>
> >> Is that an ack or tested-by?
> >>
> >> Mike keeps plaintively asking the powerpc developers to take a look,
> >> but they remain steadfastly in hiding.
> >
> > Cc'ing linuxppc-dev is always a good idea :)
> >
>
> Thanks Michael,
>
> I was mostly concerned about use cases for soft/hard offline of huge pages
> larger than PMD_SIZE on powerpc.  I know that powerpc supports PGD_SIZE
> huge pages, and soft/hard offline support was specifically added for this.
> See, 94310cbcaa3c "mm/madvise: enable (soft|hard) offline of HugeTLB pages
> at PGD level"
>
> This patch will disable that functionality.  So, at a minimum this is a
> 'heads up'.  If there are actual use cases that depend on this, then more
> work/discussions will need to happen.  From the e-mail thread on PGD_SIZE
> support, I can not tell if there is a real use case or this is just a
> 'nice to have'.

1GB hugetlbfs pages are used by DPDK and VMs in cloud deployment, we
encounter gup_pud_range() panic several times in product environment.
Is there any plan to reenable and fix arch codes?

In addition, 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kvm/mmu.c#n3213
The memory in guest can be 1GB/2MB/4K, though the host-backed memory
are 1GB hugetlbfs pages, after above PUD panic is fixed,
try_to_unmap() which is called in MCA recovery path will mark the PUD
hwpoison entry. The guest will vmexit and retry endlessly when
accessing any memory in the guest which is backed by this 1GB poisoned
hugetlbfs page. We have a plan to split this 1GB hugetblfs page by 2MB
hugetlbfs pages/4KB pages, maybe file remap to a virtual address range
which is 2MB/4KB page granularity, also split the KVM MMU 1GB SPTE
into 2MB/4KB and mark the offensive SPTE w/ a hwpoison flag, a sigbus
will be delivered to VM at page fault next time for the offensive
SPTE. Is this proposal acceptable?

Regards,
Wanpeng Li


Re: [PATCH v2] mm: Move MAP_SYNC to asm-generic/mman-common.h

2019-05-28 Thread Jan Kara
On Tue 28-05-19 14:41:20, Aneesh Kumar K.V wrote:
> This enables support for synchronous DAX fault on powerpc
> 
> The generic changes are added as part of
> commit b6fb293f2497 ("mm: Define MAP_SYNC and VM_SYNC flags")
> 
> Without this, mmap returns EOPNOTSUPP for MAP_SYNC with MAP_SHARED_VALIDATE
> 
> Instead of adding MAP_SYNC with same value to
> arch/powerpc/include/uapi/asm/mman.h, I am moving the #define to
> asm-generic/mman-common.h. Two architectures using mman-common.h directly are
> sparc and powerpc. We should be able to consloidate more #defines to
> mman-common.h. That can be done as a separate patch.
> 
> Signed-off-by: Aneesh Kumar K.V 

Looks good to me FWIW (I don't have much experience with mmap flags and
their peculirarities). So feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
> Changes from V1:
> * Move #define to mman-common.h instead of powerpc specific mman.h change
> 
> 
>  include/uapi/asm-generic/mman-common.h | 3 ++-
>  include/uapi/asm-generic/mman.h| 1 -
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/include/uapi/asm-generic/mman-common.h 
> b/include/uapi/asm-generic/mman-common.h
> index abd238d0f7a4..bea0278f65ab 100644
> --- a/include/uapi/asm-generic/mman-common.h
> +++ b/include/uapi/asm-generic/mman-common.h
> @@ -25,7 +25,8 @@
>  # define MAP_UNINITIALIZED 0x0   /* Don't support this flag */
>  #endif
>  
> -/* 0x0100 - 0x8 flags are defined in asm-generic/mman.h */
> +/* 0x0100 - 0x4 flags are defined in asm-generic/mman.h */
> +#define MAP_SYNC 0x08 /* perform synchronous page faults for 
> the mapping */
>  #define MAP_FIXED_NOREPLACE  0x10/* MAP_FIXED which doesn't 
> unmap underlying mapping */
>  
>  /*
> diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
> index 653687d9771b..2dffcbf705b3 100644
> --- a/include/uapi/asm-generic/mman.h
> +++ b/include/uapi/asm-generic/mman.h
> @@ -13,7 +13,6 @@
>  #define MAP_NONBLOCK 0x1 /* do not block on IO */
>  #define MAP_STACK0x2 /* give out an address that is best 
> suited for process/thread stacks */
>  #define MAP_HUGETLB  0x4 /* create a huge page mapping */
> -#define MAP_SYNC 0x8 /* perform synchronous page faults for 
> the mapping */
>  
>  /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
>  
> -- 
> 2.21.0
> 
-- 
Jan Kara 
SUSE Labs, CR


Re: [PATCH v1 00/15] Fixing selftests failure on Talitos driver

2019-05-28 Thread Horia Geanta
On 5/21/2019 4:34 PM, Christophe Leroy wrote:
> Several test failures have popped up following recent changes to crypto
> selftests.
> 
> This series fixes (most of) them.
> 
> The last three patches are trivial cleanups.
> 
Thanks Christophe.

For the series:
Reviewed-by: Horia Geantă 

Have you validated the changes also on SEC 2.x+?
Asking since IIRC you mentioned having only HW with SEC 1 and changes in patch
"crypto: talitos - fix AEAD processing." look quite complex.

Thanks,
Horia



[PATCH v2] mm: Move MAP_SYNC to asm-generic/mman-common.h

2019-05-28 Thread Aneesh Kumar K.V
This enables support for synchronous DAX fault on powerpc

The generic changes are added as part of
commit b6fb293f2497 ("mm: Define MAP_SYNC and VM_SYNC flags")

Without this, mmap returns EOPNOTSUPP for MAP_SYNC with MAP_SHARED_VALIDATE

Instead of adding MAP_SYNC with same value to
arch/powerpc/include/uapi/asm/mman.h, I am moving the #define to
asm-generic/mman-common.h. Two architectures using mman-common.h directly are
sparc and powerpc. We should be able to consloidate more #defines to
mman-common.h. That can be done as a separate patch.

Signed-off-by: Aneesh Kumar K.V 
---
Changes from V1:
* Move #define to mman-common.h instead of powerpc specific mman.h change


 include/uapi/asm-generic/mman-common.h | 3 ++-
 include/uapi/asm-generic/mman.h| 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/asm-generic/mman-common.h 
b/include/uapi/asm-generic/mman-common.h
index abd238d0f7a4..bea0278f65ab 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -25,7 +25,8 @@
 # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
 #endif
 
-/* 0x0100 - 0x8 flags are defined in asm-generic/mman.h */
+/* 0x0100 - 0x4 flags are defined in asm-generic/mman.h */
+#define MAP_SYNC   0x08 /* perform synchronous page faults for 
the mapping */
 #define MAP_FIXED_NOREPLACE0x10/* MAP_FIXED which doesn't 
unmap underlying mapping */
 
 /*
diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
index 653687d9771b..2dffcbf705b3 100644
--- a/include/uapi/asm-generic/mman.h
+++ b/include/uapi/asm-generic/mman.h
@@ -13,7 +13,6 @@
 #define MAP_NONBLOCK   0x1 /* do not block on IO */
 #define MAP_STACK  0x2 /* give out an address that is best 
suited for process/thread stacks */
 #define MAP_HUGETLB0x4 /* create a huge page mapping */
-#define MAP_SYNC   0x8 /* perform synchronous page faults for 
the mapping */
 
 /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
 
-- 
2.21.0



Re: [PATCH v1 08/15] crypto: talitos - Do not modify req->cryptlen on decryption.

2019-05-28 Thread Horia Geanta
On 5/21/2019 4:34 PM, Christophe Leroy wrote:
> For decrypt, req->cryptlen includes the size of the authentication
> part while all functions of the driver expect cryptlen to be
> the size of the encrypted data.
> 
> As it is not expected to change req->cryptlen, this patch
> implements local calculation of cryptlen.
> 
An alternative would be to restore req->cryptlen in the *_done() callback.
It would be easier to implement, though probably less intuitive.

Horia


[PATCH] powerpc/configs: Rename foo_basic_defconfig to foo_base.config

2019-05-28 Thread Michael Ellerman
We have several "defconfigs" that are not actually full defconfigs
they are just a base set of options which are then merged with other
fragments to produce a working defconfig.

The most obvious example is corenet_basic_defconfig which only
contains one symbol CONFIG_CORENET_GENERIC=y. But there is also
mpc85xx_base_defconfig which doesn't actually enable CONFIG_PPC_85xx.

To avoid confusion, rename these config fragments to "foo_base.config"
to make it clearer that they are not full defconfigs.

Reported-by: Christophe Leroy 
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/Makefile| 12 ++--
 .../{corenet_basic_defconfig => corenet_base.config} |  0
 .../{mpc85xx_basic_defconfig => mpc85xx_base.config} |  0
 .../{mpc86xx_basic_defconfig => mpc86xx_base.config} |  0
 4 files changed, 6 insertions(+), 6 deletions(-)
 rename arch/powerpc/configs/{corenet_basic_defconfig => corenet_base.config} 
(100%)
 rename arch/powerpc/configs/{mpc85xx_basic_defconfig => mpc85xx_base.config} 
(100%)
 rename arch/powerpc/configs/{mpc86xx_basic_defconfig => mpc86xx_base.config} 
(100%)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index c345b79414a9..94f735db2229 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -333,32 +333,32 @@ PHONY += powernv_be_defconfig
 
 PHONY += mpc85xx_defconfig
 mpc85xx_defconfig:
-   $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
+   $(call merge_into_defconfig,mpc85xx_base.config,\
85xx-32bit 85xx-hw fsl-emb-nonhw)
 
 PHONY += mpc85xx_smp_defconfig
 mpc85xx_smp_defconfig:
-   $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
+   $(call merge_into_defconfig,mpc85xx_base.config,\
85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
 
 PHONY += corenet32_smp_defconfig
 corenet32_smp_defconfig:
-   $(call merge_into_defconfig,corenet_basic_defconfig,\
+   $(call merge_into_defconfig,corenet_base.config,\
85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
 
 PHONY += corenet64_smp_defconfig
 corenet64_smp_defconfig:
-   $(call merge_into_defconfig,corenet_basic_defconfig,\
+   $(call merge_into_defconfig,corenet_base.config,\
85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
 
 PHONY += mpc86xx_defconfig
 mpc86xx_defconfig:
-   $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
+   $(call merge_into_defconfig,mpc86xx_base.config,\
86xx-hw fsl-emb-nonhw)
 
 PHONY += mpc86xx_smp_defconfig
 mpc86xx_smp_defconfig:
-   $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
+   $(call merge_into_defconfig,mpc86xx_base.config,\
86xx-smp 86xx-hw fsl-emb-nonhw)
 
 PHONY += ppc32_allmodconfig
diff --git a/arch/powerpc/configs/corenet_basic_defconfig 
b/arch/powerpc/configs/corenet_base.config
similarity index 100%
rename from arch/powerpc/configs/corenet_basic_defconfig
rename to arch/powerpc/configs/corenet_base.config
diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig 
b/arch/powerpc/configs/mpc85xx_base.config
similarity index 100%
rename from arch/powerpc/configs/mpc85xx_basic_defconfig
rename to arch/powerpc/configs/mpc85xx_base.config
diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig 
b/arch/powerpc/configs/mpc86xx_base.config
similarity index 100%
rename from arch/powerpc/configs/mpc86xx_basic_defconfig
rename to arch/powerpc/configs/mpc86xx_base.config
-- 
2.20.1



Re: ppc85xx_basic_defconfig is buggy ?

2019-05-28 Thread Michael Ellerman
Christophe Leroy  writes:
> ppc85xx_basic_defconfig doesn't not select CONFIG_PPC_85xx.

You mean arch/powerpc/configs/mpc85xx_basic_defconfig presumably.

> Is that expected ?

Yeah it is expected.

It's not intended to be a full defconfig, it's used as a fragment and
merged with other configs, see arch/powerpc/Makefile:

  PHONY += mpc85xx_defconfig
  mpc85xx_defconfig:
$(call merge_into_defconfig,mpc85xx_basic_defconfig,\
85xx-32bit 85xx-hw fsl-emb-nonhw)

Where 85xx-32bit.config is:

  CONFIG_HIGHMEM=y
  CONFIG_KEXEC=y
  CONFIG_PPC_85xx=y
  CONFIG_PROC_KCORE=y
  CONFIG_PHYS_64BIT=y

So that's where PPC_85xx gets set.

But it's confusing that mpc85xx_basic_defconfig is named "foo_defconfig"
but is not actually a proper defconfig. We should rename it to
mpc85xx_basic.config to make it clearer that it's a fragment. I'll do a
patch.

cheers


[PATCH 3/3][V2] lib: re-introduce new match_string() helper/macro

2019-05-28 Thread Alexandru Ardelean
This change re-introduces `match_string()` as a macro that uses
ARRAY_SIZE() to compute the size of the array.

After this change, work can start on migrating subsystems to use this new
helper. Since the original helper is pretty used, migrating to this new one
will take a while, and will be reviewed by each subsystem.

Signed-off-by: Alexandru Ardelean 
---
 include/linux/string.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/include/linux/string.h b/include/linux/string.h
index 7149fcdf62df..34491b075449 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -198,6 +198,15 @@ static inline int strtobool(const char *s, bool *res)
 int __match_string(const char * const *array, size_t n, const char *string);
 int __sysfs_match_string(const char * const *array, size_t n, const char *s);
 
+/**
+ * match_string - matches given string in an array
+ * @_a: array of strings
+ * @_s: string to match with
+ *
+ * Helper for __match_string(). Calculates the size of @a automatically.
+ */
+#define match_string(_a, _s) __match_string(_a, ARRAY_SIZE(_a), _s)
+
 /**
  * sysfs_match_string - matches given string in an array
  * @_a: array of strings
-- 
2.20.1



[PATCH 2/3][V2] treewide: rename match_string() -> __match_string()

2019-05-28 Thread Alexandru Ardelean
This change does a rename of match_string() -> __match_string().

There are a few parts to the intention here (with this change):
1. Align with sysfs_match_string()/__sysfs_match_string()
2. This helps to group users of `match_string()`:
   a. those that use ARRAY_SIZE(_a) to specify the number of elements
   b. those that use -1 to pass a NULL terminated array of strings
   c. special users, which (after eliminating 1 & 2) are not that many

This change is done treewide. Updates to the new match_string() helper will
be done on a per-subsystem basis, as the cadence of each subsystem differs.

Signed-off-by: Alexandru Ardelean 
---
 arch/powerpc/xmon/xmon.c |  2 +-
 arch/x86/kernel/cpu/mtrr/if.c|  2 +-
 drivers/ata/pata_hpt366.c|  2 +-
 drivers/ata/pata_hpt37x.c|  2 +-
 drivers/base/devcon.c|  2 +-
 drivers/base/property.c  |  2 +-
 drivers/clk/bcm/clk-bcm2835.c|  6 +++---
 drivers/clk/rockchip/clk.c   |  4 ++--
 drivers/cpufreq/intel_pstate.c   |  2 +-
 drivers/gpio/gpiolib-of.c|  2 +-
 drivers/gpu/drm/drm_edid_load.c  |  2 +-
 drivers/gpu/drm/drm_panel_orientation_quirks.c   |  2 +-
 drivers/gpu/drm/i915/intel_pipe_crc.c|  2 +-
 drivers/ide/hpt366.c |  2 +-
 drivers/mfd/omap-usb-host.c  |  2 +-
 drivers/mmc/host/sdhci-xenon-phy.c   |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c |  2 +-
 drivers/pci/pcie/aer.c   |  2 +-
 drivers/phy/tegra/xusb.c |  4 ++--
 drivers/pinctrl/mvebu/pinctrl-armada-37xx.c  |  4 ++--
 drivers/pinctrl/pinmux.c |  2 +-
 drivers/power/supply/ab8500_btemp.c  |  2 +-
 drivers/power/supply/ab8500_charger.c|  2 +-
 drivers/power/supply/ab8500_fg.c |  2 +-
 drivers/power/supply/abx500_chargalg.c   |  2 +-
 drivers/power/supply/charger-manager.c   |  4 ++--
 drivers/staging/gdm724x/gdm_tty.c|  4 ++--
 drivers/usb/common/common.c  |  4 ++--
 drivers/usb/typec/class.c| 10 +-
 drivers/usb/typec/tps6598x.c |  2 +-
 drivers/vfio/vfio.c  |  6 +++---
 drivers/video/fbdev/pxafb.c  |  2 +-
 fs/ubifs/auth.c  |  4 ++--
 include/linux/string.h   |  2 +-
 kernel/cgroup/rdma.c |  2 +-
 kernel/sched/debug.c |  2 +-
 kernel/trace/trace.c |  2 +-
 lib/string.c |  8 
 mm/mempolicy.c   |  2 +-
 mm/vmpressure.c  |  4 ++--
 security/apparmor/lsm.c  |  4 ++--
 security/integrity/ima/ima_main.c|  2 +-
 sound/firewire/oxfw/oxfw.c   |  2 +-
 sound/pci/oxygen/oxygen_mixer.c  |  2 +-
 sound/soc/codecs/max98088.c  |  2 +-
 sound/soc/codecs/max98095.c  |  2 +-
 sound/soc/soc-dapm.c |  2 +-
 47 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1b0149b2bb6c..8039759a9e82 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3264,7 +3264,7 @@ scanhex(unsigned long *vp)
regname[i] = c;
}
regname[i] = 0;
-   i = match_string(regnames, N_PTREGS, regname);
+   i = __match_string(regnames, N_PTREGS, regname);
if (i < 0) {
printf("invalid register name '%%%s'\n", regname);
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 4d36dcc1cf87..4ec7a5f7b94c 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -142,7 +142,7 @@ mtrr_write(struct file *file, const char __user *buf, 
size_t len, loff_t * ppos)
return -EINVAL;
ptr = skip_spaces(ptr + 5);
 
-   i = match_string(mtrr_strings, MTRR_NUM_TYPES, ptr);
+   i = __match_string(mtrr_strings, MTRR_NUM_TYPES, ptr);
if (i < 0)
return i;
 
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index 2574d6fbb1ad..a23ec26cc95f 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -181,7 +181,7 @@ static int hpt_dma_blacklisted(const struct ata_device 
*dev, char *modestr,
 
ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
 
-   i = match_string(list, -1, model_num);
+   i = __match_string(list, -1, 

[PATCH 1/3][V2] lib: fix match_string() helper on -1 array size

2019-05-28 Thread Alexandru Ardelean
The documentation the `_match_string()` helper mentions that `n`
should be:
 * @n: number of strings in the array or -1 for NULL terminated arrays

The behavior of the function is different, in the sense that it exits on
the first NULL element in the array, regardless of whether `n` is -1 or a
positive number.

This patch changes the behavior, to exit the loop when a NULL element is
found and n == -1. Essentially, this aligns the behavior with the
doc-string.

There are currently many users of `match_string()`, and so, in order to go
through them, the next patches in the series will focus on doing some
cosmetic changes, which are aimed at grouping the users of
`match_string()`.

Signed-off-by: Alexandru Ardelean 
---

Changelog v1 -> v2:
* split the initial series into just 3 patches that fix the
  `match_string()` helper and start introducing a new version of this
  helper, which computes array-size of static arrays

 lib/string.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/string.c b/lib/string.c
index 6016eb3ac73d..e2cf5acc83bd 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -681,8 +681,11 @@ int match_string(const char * const *array, size_t n, 
const char *string)
 
for (index = 0; index < n; index++) {
item = array[index];
-   if (!item)
+   if (!item) {
+   if (n != (size_t)-1)
+   continue;
break;
+   }
if (!strcmp(item, string))
return index;
}
-- 
2.20.1



Re: [PATCH v3 1/3] PCI: Introduce pcibios_ignore_alignment_request

2019-05-28 Thread Shawn Anastasio




On 5/28/19 1:27 AM, Alexey Kardashevskiy wrote:



On 28/05/2019 15:36, Oliver wrote:

On Tue, May 28, 2019 at 2:03 PM Shawn Anastasio  wrote:


Introduce a new pcibios function pcibios_ignore_alignment_request
which allows the PCI core to defer to platform-specific code to
determine whether or not to ignore alignment requests for PCI resources.

The existing behavior is to simply ignore alignment requests when
PCI_PROBE_ONLY is set. This is behavior is maintained by the
default implementation of pcibios_ignore_alignment_request.

Signed-off-by: Shawn Anastasio 
---
  drivers/pci/pci.c   | 9 +++--
  include/linux/pci.h | 1 +
  2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 8abc843b1615..8207a09085d1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5882,6 +5882,11 @@ resource_size_t __weak pcibios_default_alignment(void)
 return 0;
  }

+int __weak pcibios_ignore_alignment_request(void)
+{
+   return pci_has_flag(PCI_PROBE_ONLY);
+}
+
  #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
  static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
  static DEFINE_SPINLOCK(resource_alignment_lock);
@@ -5906,9 +5911,9 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
 p = resource_alignment_param;
 if (!*p && !align)
 goto out;
-   if (pci_has_flag(PCI_PROBE_ONLY)) {
+   if (pcibios_ignore_alignment_request()) {
 align = 0;
-   pr_info_once("PCI: Ignoring requested alignments 
(PCI_PROBE_ONLY)\n");
+   pr_info_once("PCI: Ignoring requested alignments\n");
 goto out;
 }


I think the logic here is questionable to begin with. If the user has
explicitly requested re-aligning a resource via the command line then
we should probably do it even if PCI_PROBE_ONLY is set. When it breaks
they get to keep the pieces.

That said, the real issue here is that PCI_PROBE_ONLY probably
shouldn't be set under qemu/kvm. Under the other hypervisor (PowerVM)
hotplugged devices are configured by firmware before it's passed to
the guest and we need to keep the FW assignments otherwise things
break. QEMU however doesn't do any BAR assignments and relies on that
being handled by the guest. At boot time this is done by SLOF, but
Linux only keeps SLOF around until it's extracted the device-tree.
Once that's done SLOF gets blown away and the kernel needs to do it's
own BAR assignments. I'm guessing there's a hack in there to make it
work today, but it's a little surprising that it works at all...



The hack is to run a modified qemu-aware "/usr/sbin/rtas_errd" in the
guest which receives an event from qemu (RAS_EPOW from
/proc/interrupts), fetches device tree chunks (and as I understand it -
they come with BARs from phyp but without from qemu) and writes "1" to
"/sys/bus/pci/rescan" which calls pci_assign_resource() eventually:


Interesting. Does this mean that the PHYP hotplug path doesn't
call pci_assign_resource? If so it means the patch may not
break that platform after all, though it still may not be
the correct way of doing things.



[c6e6f960] [c05f62d4] pci_assign_resource+0x44/0x360

[c6e6fa10] [c05f8b54]
assign_requested_resources_sorted+0x84/0x110
[c6e6fa60] [c05f9540] __assign_resources_sorted+0xd0/0x750
[c6e6fb40] [c05fb2e0]
__pci_bus_assign_resources+0x80/0x280
[c6e6fc00] [c05fb95c]
pci_assign_unassigned_bus_resources+0xbc/0x100
[c6e6fc60] [c05e3d74] pci_rescan_bus+0x34/0x60

[c6e6fc90] [c05f1ef4] rescan_store+0x84/0xc0

[c6e6fcd0] [c068060c] bus_attr_store+0x3c/0x60

[c6e6fcf0] [c037853c] sysfs_kf_write+0x5c/0x80







IIRC Sam Bobroff was looking at hotplug under pseries recently so he
might have something to add. He's sick at the moment, but I'll ask him
to take a look at this once he's back among the living


diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4a5a84d7bdd4..47471dcdbaf9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1990,6 +1990,7 @@ static inline void pcibios_penalize_isa_irq(int irq, int 
active) {}
  int pcibios_alloc_irq(struct pci_dev *dev);
  void pcibios_free_irq(struct pci_dev *dev);
  resource_size_t pcibios_default_alignment(void);
+int pcibios_ignore_alignment_request(void);

  #ifdef CONFIG_HIBERNATE_CALLBACKS
  extern struct dev_pm_ops pcibios_pm_ops;
--
2.20.1





[RFC PATCH v4 6/6] kvmppc: Support reset of secure guest

2019-05-28 Thread Bharata B Rao
Add support for reset of secure guest via a new ioctl KVM_PPC_SVM_OFF.
This ioctl will be issued by QEMU during reset and in this ioctl,
we ask UV to terminate the guest via UV_SVM_TERMINATE ucall,
reinitialize guest's partitioned scoped page tables and release all
HMM pages of the secure guest.

After these steps, guest is ready to issue UV_ESM call once again
to switch to secure mode.

Signed-off-by: Bharata B Rao 
Signed-off-by: Sukadev Bhattiprolu 
[Implementation of uv_svm_terminate() and its call from
guest shutdown path]
---
 arch/powerpc/include/asm/kvm_host.h   |  6 ++
 arch/powerpc/include/asm/kvm_ppc.h|  4 ++
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  7 ++
 arch/powerpc/kvm/book3s_hv.c  | 23 +++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 83 +++
 arch/powerpc/kvm/powerpc.c| 12 
 include/uapi/linux/kvm.h  |  1 +
 tools/include/uapi/linux/kvm.h|  1 +
 9 files changed, 138 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 63d56f7a357e..c220bcfe7726 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -868,6 +868,7 @@ extern void kvmppc_hmm_free(void);
 extern void kvmppc_hmm_release_pfns(struct kvm_memory_slot *free);
 extern bool kvmppc_is_guest_secure(struct kvm *kvm);
 extern int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gpa);
+extern int kvmppc_hmm_svm_off(struct kvm *kvm);
 #else
 static inline int kvmppc_hmm_init(void)
 {
@@ -887,6 +888,11 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, 
unsigned long gpa)
 {
return -EFAULT;
 }
+
+static inline int kvmppc_hmm_svm_off(struct kvm *kvm)
+{
+   return 0;
+}
 #endif /* CONFIG_PPC_UV */
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index bc892380e6cd..aee4b81e9558 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -188,6 +188,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm 
*kvm,
 extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
 extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
 extern void kvmppc_setup_partition_table(struct kvm *kvm);
+extern int kvmppc_reinit_partition_table(struct kvm *kvm);
 
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
@@ -332,6 +333,7 @@ struct kvmppc_ops {
   int size);
int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
  int size);
+   int (*svm_off)(struct kvm *kvm);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -961,5 +963,7 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu 
*vcpu, int ra, int rb)
 }
 
 extern void xics_wake_cpu(int cpu);
+extern void kvmppc_hmm_free_memslot_pfns(struct kvm *kvm,
+struct kvm_memslots *slots);
 
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index eaca65ea2070..6d59c64f30ce 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -25,5 +25,6 @@
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 #define UV_PAGE_INVAL  0xF138
+#define UV_SVM_TERMINATE   0xF13C
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 28dbc0f0eddb..1c60b7328f09 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -84,6 +84,13 @@ static inline int uv_page_inval(u64 lpid, u64 gpa, u64 
page_shift)
 
return ucall(UV_PAGE_INVAL, retbuf, lpid, gpa, page_shift);
 }
+
+static inline int uv_svm_terminate(u64 lpid)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_SVM_TERMINATE, retbuf, lpid);
+}
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5ef35e230453..4a423c92fb18 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4571,6 +4571,22 @@ void kvmppc_setup_partition_table(struct kvm *kvm)
kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
 }
 
+/*
+ * Called from KVM_PPC_SVM_OFF ioctl at guest reset time when secure
+ * guest is converted back to normal guest.
+ */
+int kvmppc_reinit_partition_table(struct kvm *kvm)
+{
+   int ret;
+
+   ret = kvmppc_init_vm_radix(kvm);
+   if (ret)
+   return ret;
+
+   kvmppc_setup_partition_table(kvm);
+   return 0;
+}
+
 /*
  * Set up HPT (hashed page table) and RMA (real-mode area).

[RFC PATCH v4 5/6] kvmppc: Radix changes for secure guest

2019-05-28 Thread Bharata B Rao
- After the guest becomes secure, when we handle a page fault of a page
  belonging to SVM in HV, send that page to UV via UV_PAGE_IN.
- Whenever a page is unmapped on the HV side, inform UV via UV_PAGE_INVAL.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/kvm_host.h   | 13 +
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  7 +++
 arch/powerpc/kvm/book3s_64_mmu_radix.c| 19 +++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 23 +++
 5 files changed, 63 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 845fd2a73506..63d56f7a357e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -866,6 +866,8 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 extern int kvmppc_hmm_init(void);
 extern void kvmppc_hmm_free(void);
 extern void kvmppc_hmm_release_pfns(struct kvm_memory_slot *free);
+extern bool kvmppc_is_guest_secure(struct kvm *kvm);
+extern int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gpa);
 #else
 static inline int kvmppc_hmm_init(void)
 {
@@ -874,6 +876,17 @@ static inline int kvmppc_hmm_init(void)
 
 static inline void kvmppc_hmm_free(void) {}
 static inline void kvmppc_hmm_release_pfns(struct kvm_memory_slot *free) {}
+
+
+static inline bool kvmppc_is_guest_secure(struct kvm *kvm)
+{
+   return false;
+}
+
+static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gpa)
+{
+   return -EFAULT;
+}
 #endif /* CONFIG_PPC_UV */
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 35b71e01177d..eaca65ea2070 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -24,5 +24,6 @@
 #define UV_UNREGISTER_MEM_SLOT 0xF124
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
+#define UV_PAGE_INVAL  0xF138
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 5113457c4743..28dbc0f0eddb 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -77,6 +77,13 @@ static inline int uv_unregister_mem_slot(u64 lpid, u64 
slotid)
 
return ucall(UV_UNREGISTER_MEM_SLOT, retbuf, lpid, slotid);
 }
+
+static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_PAGE_INVAL, retbuf, lpid, gpa, page_shift);
+}
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index f55ef071883f..e5d63449ad77 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /*
  * Supported radix tree geometry.
@@ -923,6 +925,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
if (!(dsisr & DSISR_PRTABLE_FAULT))
gpa |= ea & 0xfff;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return kvmppc_send_page_to_uv(kvm, gpa & PAGE_MASK);
+
/* Get the corresponding memslot */
memslot = gfn_to_memslot(kvm, gfn);
 
@@ -980,6 +985,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift;
 
+   if (kvmppc_is_guest_secure(kvm)) {
+   uv_page_inval(kvm->arch.lpid, gpa, PAGE_SIZE);
+   return 0;
+   }
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
@@ -997,6 +1007,9 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot 
*memslot,
int ref = 0;
unsigned long old, *rmapp;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
@@ -1021,6 +1034,9 @@ int kvm_test_age_radix(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
@@ -1038,6 +1054,9 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
int ret = 0;
unsigned long old, *rmapp;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return ret;
+

[PATCH v4 4/6] kvmppc: Handle memory plug/unplug to secure VM

2019-05-28 Thread Bharata B Rao
Register the new memslot with UV during plug and unregister
the memslot during unplug.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  7 +++
 arch/powerpc/kvm/book3s_hv.c  | 19 +++
 3 files changed, 27 insertions(+)

diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 05b17f4351f4..35b71e01177d 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -21,6 +21,7 @@
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
 #define UV_REGISTER_MEM_SLOT   0xF120
+#define UV_UNREGISTER_MEM_SLOT 0xF124
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 9befa6fea8db..5113457c4743 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -70,6 +70,13 @@ static inline int uv_register_mem_slot(u64 lpid, u64 
start_gpa, u64 size,
return ucall(UV_REGISTER_MEM_SLOT, retbuf, lpid, start_gpa,
 size, flags, slotid);
 }
+
+static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_UNREGISTER_MEM_SLOT, retbuf, lpid, slotid);
+}
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3683e517541f..5ef35e230453 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -78,6 +78,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "book3s.h"
 
@@ -4498,6 +4499,24 @@ static void kvmppc_core_commit_memory_region_hv(struct 
kvm *kvm,
if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
kvmppc_radix_flush_memslot(kvm, old);
+   /*
+* If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+*/
+   if (!kvm->arch.secure_guest)
+   return;
+
+   /*
+* TODO: Handle KVM_MR_MOVE
+*/
+   if (change == KVM_MR_CREATE) {
+   uv_register_mem_slot(kvm->arch.lpid,
+  new->base_gfn << PAGE_SHIFT,
+  new->npages * PAGE_SIZE,
+  0,
+  new->id);
+   } else if (change == KVM_MR_DELETE) {
+   uv_unregister_mem_slot(kvm->arch.lpid, old->id);
+   }
 }
 
 /*
-- 
2.17.1



[PATCH v4 3/6] kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls

2019-05-28 Thread Bharata B Rao
H_SVM_INIT_START: Initiate securing a VM
H_SVM_INIT_DONE: Conclude securing a VM

As part of H_SVM_INIT_START register all existing memslots with the UV.
H_SVM_INIT_DONE call by UV informs HV that transition of the guest
to secure mode is complete.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/hvcall.h |  2 ++
 arch/powerpc/include/asm/kvm_book3s_hmm.h | 12 
 arch/powerpc/include/asm/kvm_host.h   |  4 +++
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  9 ++
 arch/powerpc/kvm/book3s_hv.c  |  7 +
 arch/powerpc/kvm/book3s_hv_hmm.c  | 34 +++
 7 files changed, 69 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 05b8536f6653..fa7695928e30 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -343,6 +343,8 @@
 /* Platform-specific hcalls used by the Ultravisor */
 #define H_SVM_PAGE_IN  0xEF00
 #define H_SVM_PAGE_OUT 0xEF04
+#define H_SVM_INIT_START   0xEF08
+#define H_SVM_INIT_DONE0xEF0C
 
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h 
b/arch/powerpc/include/asm/kvm_book3s_hmm.h
index 21f3de5f2acb..3e13dab7f690 100644
--- a/arch/powerpc/include/asm/kvm_book3s_hmm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
@@ -11,6 +11,8 @@ extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
  unsigned long gra,
  unsigned long flags,
  unsigned long page_shift);
+extern unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
+extern unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
 #else
 static inline unsigned long
 kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
@@ -25,5 +27,15 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
 {
return H_UNSUPPORTED;
 }
+
+static inine unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+   return H_UNSUPPORTED;
+}
+
+static inine unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
+{
+   return H_UNSUPPORTED;
+}
 #endif /* CONFIG_PPC_UV */
 #endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index c0c9c3455ac4..845fd2a73506 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -272,6 +272,10 @@ struct kvm_hpt_info {
 
 struct kvm_resize_hpt;
 
+/* Flag values for kvm_arch.secure_guest */
+#define KVMPPC_SECURE_INIT_START   0x1 /* H_SVM_INIT_START has been called 
*/
+#define KVMPPC_SECURE_INIT_DONE0x2 /* H_SVM_INIT_DONE 
completed */
+
 struct kvm_arch {
unsigned int lpid;
unsigned int smt_mode;  /* # vcpus per virtual core */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 51c4e0b5d197..05b17f4351f4 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -20,6 +20,7 @@
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
+#define UV_REGISTER_MEM_SLOT   0xF120
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 1e4c51799b43..9befa6fea8db 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -61,6 +61,15 @@ static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 
src_gpa, u64 flags,
return ucall(UV_PAGE_OUT, retbuf, lpid, dst_ra, src_gpa, flags,
 page_shift);
 }
+
+static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size,
+  u64 flags, u64 slotid)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_REGISTER_MEM_SLOT, retbuf, lpid, start_gpa,
+size, flags, slotid);
+}
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2918616198de..3683e517541f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1098,6 +1098,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
break;
+   case H_SVM_INIT_START:
+   ret = kvmppc_h_svm_init_start(vcpu->kvm);
+   break;
+   case H_SVM_INIT_DONE:
+   ret = kvmppc_h_svm_init_done(vcpu->kvm);
+   break;
+
default:
return RESUME_HOST;

[PATCH v4 2/6] kvmppc: Shared pages support for secure guests

2019-05-28 Thread Bharata B Rao
A secure guest will share some of its pages with hypervisor (Eg. virtio
bounce buffers etc). Support shared pages in HMM driver.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/hvcall.h |  3 ++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 58 +--
 2 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 2f6b952deb0f..05b8536f6653 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -337,6 +337,9 @@
 #define H_TLB_INVALIDATE   0xF808
 #define H_COPY_TOFROM_GUEST0xF80C
 
+/* Flags for H_SVM_PAGE_IN */
+#define H_PAGE_IN_SHARED0x1
+
 /* Platform-specific hcalls used by the Ultravisor */
 #define H_SVM_PAGE_IN  0xEF00
 #define H_SVM_PAGE_OUT 0xEF04
diff --git a/arch/powerpc/kvm/book3s_hv_hmm.c b/arch/powerpc/kvm/book3s_hv_hmm.c
index 713806003da3..333829682f59 100644
--- a/arch/powerpc/kvm/book3s_hv_hmm.c
+++ b/arch/powerpc/kvm/book3s_hv_hmm.c
@@ -45,6 +45,7 @@ struct kvmppc_hmm_page_pvt {
unsigned long *rmap;
unsigned int lpid;
unsigned long gpa;
+   bool skip_page_out;
 };
 
 struct kvmppc_hmm_migrate_args {
@@ -212,6 +213,45 @@ static const struct migrate_vma_ops kvmppc_hmm_migrate_ops 
= {
.finalize_and_map = kvmppc_hmm_migrate_finalize_and_map,
 };
 
+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and share
+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses the HMM fault handler to release the HMM page.
+ */
+static unsigned long
+kvmppc_share_page(struct kvm *kvm, unsigned long *rmap, unsigned long gpa,
+ unsigned long addr, unsigned long page_shift)
+{
+
+   int ret;
+   unsigned int lpid = kvm->arch.lpid;
+   struct page *hmm_page;
+   struct kvmppc_hmm_page_pvt *pvt;
+   unsigned long pfn;
+   int srcu_idx;
+
+   if (kvmppc_is_hmm_pfn(*rmap)) {
+   hmm_page = pfn_to_page(*rmap & ~KVMPPC_PFN_HMM);
+   pvt = (struct kvmppc_hmm_page_pvt *)
+   hmm_devmem_page_get_drvdata(hmm_page);
+   pvt->skip_page_out = true;
+   }
+
+   srcu_idx = srcu_read_lock(>srcu);
+   pfn = gfn_to_pfn(kvm, gpa >> page_shift);
+   srcu_read_unlock(>srcu, srcu_idx);
+   if (is_error_noslot_pfn(pfn))
+   return H_PARAMETER;
+
+   ret = uv_page_in(lpid, pfn << page_shift, gpa, 0, page_shift);
+   kvm_release_pfn_clean(pfn);
+
+   return (ret == U_SUCCESS) ? H_SUCCESS : H_PARAMETER;
+}
+
 /*
  * Move page from normal memory to secure memory.
  */
@@ -242,9 +282,12 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
 
end = addr + (1UL << page_shift);
 
-   if (flags)
+   if (flags & ~H_PAGE_IN_SHARED)
return H_P2;
 
+   if (flags & H_PAGE_IN_SHARED)
+   return kvmppc_share_page(kvm, rmap, gpa, addr, page_shift);
+
args.rmap = rmap;
args.lpid = kvm->arch.lpid;
args.gpa = gpa;
@@ -291,8 +334,17 @@ kvmppc_hmm_fault_migrate_alloc_and_copy(struct 
vm_area_struct *vma,
   hmm_devmem_page_get_drvdata(spage);
 
pfn = page_to_pfn(dpage);
-   ret = uv_page_out(pvt->lpid, pfn << PAGE_SHIFT,
- pvt->gpa, 0, PAGE_SHIFT);
+
+   /*
+* This same alloc_and_copy() callback is used in two cases:
+* - When HV touches a secure page, for which we do page-out
+* - When a secure page is converted to shared page, we touch
+*   the page to essentially discard the HMM page. In this case we
+*   skip page-out.
+*/
+   if (!pvt->skip_page_out)
+   ret = uv_page_out(pvt->lpid, pfn << PAGE_SHIFT,
+ pvt->gpa, 0, PAGE_SHIFT);
if (ret == U_SUCCESS)
*dst_pfn = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
 }
-- 
2.17.1



[PATCH v4 1/6] kvmppc: HMM backend driver to manage pages of secure guest

2019-05-28 Thread Bharata B Rao
HMM driver for KVM PPC to manage page transitions of
secure guest via H_SVM_PAGE_IN and H_SVM_PAGE_OUT hcalls.

H_SVM_PAGE_IN: Move the content of a normal page to secure page
H_SVM_PAGE_OUT: Move the content of a secure page to normal page

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/hvcall.h |   4 +
 arch/powerpc/include/asm/kvm_book3s_hmm.h |  29 ++
 arch/powerpc/include/asm/kvm_host.h   |  14 +
 arch/powerpc/include/asm/ultravisor-api.h |   2 +
 arch/powerpc/include/asm/ultravisor.h |  17 +
 arch/powerpc/kvm/Makefile |   3 +
 arch/powerpc/kvm/book3s_hv.c  |  20 +
 arch/powerpc/kvm/book3s_hv_hmm.c  | 474 ++
 8 files changed, 563 insertions(+)
 create mode 100644 arch/powerpc/include/asm/kvm_book3s_hmm.h
 create mode 100644 arch/powerpc/kvm/book3s_hv_hmm.c

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 463c63a9fcf1..2f6b952deb0f 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -337,6 +337,10 @@
 #define H_TLB_INVALIDATE   0xF808
 #define H_COPY_TOFROM_GUEST0xF80C
 
+/* Platform-specific hcalls used by the Ultravisor */
+#define H_SVM_PAGE_IN  0xEF00
+#define H_SVM_PAGE_OUT 0xEF04
+
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
 #define H_SET_MODE_RESOURCE_SET_DAWR   2
diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h 
b/arch/powerpc/include/asm/kvm_book3s_hmm.h
new file mode 100644
index ..21f3de5f2acb
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __POWERPC_KVM_PPC_HMM_H__
+#define __POWERPC_KVM_PPC_HMM_H__
+
+#ifdef CONFIG_PPC_UV
+extern unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+#else
+static inline unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
+unsigned long flags, unsigned long page_shift)
+{
+   return H_UNSUPPORTED;
+}
+
+static inline unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
+ unsigned long flags, unsigned long page_shift)
+{
+   return H_UNSUPPORTED;
+}
+#endif /* CONFIG_PPC_UV */
+#endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 184becb62ea4..c0c9c3455ac4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -858,4 +858,18 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+#ifdef CONFIG_PPC_UV
+extern int kvmppc_hmm_init(void);
+extern void kvmppc_hmm_free(void);
+extern void kvmppc_hmm_release_pfns(struct kvm_memory_slot *free);
+#else
+static inline int kvmppc_hmm_init(void)
+{
+   return 0;
+}
+
+static inline void kvmppc_hmm_free(void) {}
+static inline void kvmppc_hmm_release_pfns(struct kvm_memory_slot *free) {}
+#endif /* CONFIG_PPC_UV */
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 15e6ce77a131..51c4e0b5d197 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -20,5 +20,7 @@
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
+#define UV_PAGE_IN 0xF128
+#define UV_PAGE_OUT0xF12C
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 4ffec7a36acd..1e4c51799b43 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -44,6 +44,23 @@ static inline int uv_register_pate(u64 lpid, u64 dw0, u64 
dw1)
return ucall(UV_WRITE_PATE, retbuf, lpid, dw0, dw1);
 }
 
+static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags,
+u64 page_shift)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_PAGE_IN, retbuf, lpid, src_ra, dst_gpa, flags,
+page_shift);
+}
+
+static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags,
+ u64 page_shift)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return 

[PATCH v4 0/6] kvmppc: HMM driver to manage pages of secure guest

2019-05-28 Thread Bharata B Rao
Hi,

A pseries guest can be run as a secure guest on Ultravisor-enabled
POWER platforms. On such platforms, this driver will be used to manage
the movement of guest pages between the normal memory managed by
hypervisor (HV) and secure memory managed by Ultravisor (UV).

Private ZONE_DEVICE memory equal to the amount of secure memory
available in the platform for running secure guests is created
via a HMM device. The movement of pages between normal and secure
memory is done by ->alloc_and_copy() callback routine of migrate_vma().

The page-in or page-out requests from UV will come to HV as hcalls and
HV will call back into UV via uvcalls to satisfy these page requests.

These patches apply and work on top of the base Ultravisor patches
posted by Claudio Carvalho at:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-May/190694.html

In this version, the last two patches are the new additions.

Changes in v4
=
- Handling HV side page invalidations by issuing UV_PAGE_INVAL ucall
- Handling HV side radix page faults by sending the page to UV
- Support for rebooting a secure guest
- Some cleanups and code reorgs

v3: https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-January/184731.html

Bharata B Rao (6):
  kvmppc: HMM backend driver to manage pages of secure guest
  kvmppc: Shared pages support for secure guests
  kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls
  kvmppc: Handle memory plug/unplug to secure VM
  kvmppc: Radix changes for secure guest
  kvmppc: Support reset of secure guest

 arch/powerpc/include/asm/hvcall.h |   9 +
 arch/powerpc/include/asm/kvm_book3s_hmm.h |  41 ++
 arch/powerpc/include/asm/kvm_host.h   |  37 ++
 arch/powerpc/include/asm/kvm_ppc.h|   4 +
 arch/powerpc/include/asm/ultravisor-api.h |   6 +
 arch/powerpc/include/asm/ultravisor.h |  47 ++
 arch/powerpc/kvm/Makefile |   3 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c|  19 +
 arch/powerpc/kvm/book3s_hv.c  |  69 +++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 666 ++
 arch/powerpc/kvm/powerpc.c|  12 +
 include/uapi/linux/kvm.h  |   1 +
 tools/include/uapi/linux/kvm.h|   1 +
 13 files changed, 915 insertions(+)
 create mode 100644 arch/powerpc/include/asm/kvm_book3s_hmm.h
 create mode 100644 arch/powerpc/kvm/book3s_hv_hmm.c

-- 
2.17.1



Re: [PATCH v3 1/3] PCI: Introduce pcibios_ignore_alignment_request

2019-05-28 Thread Alexey Kardashevskiy



On 28/05/2019 15:36, Oliver wrote:
> On Tue, May 28, 2019 at 2:03 PM Shawn Anastasio  wrote:
>>
>> Introduce a new pcibios function pcibios_ignore_alignment_request
>> which allows the PCI core to defer to platform-specific code to
>> determine whether or not to ignore alignment requests for PCI resources.
>>
>> The existing behavior is to simply ignore alignment requests when
>> PCI_PROBE_ONLY is set. This is behavior is maintained by the
>> default implementation of pcibios_ignore_alignment_request.
>>
>> Signed-off-by: Shawn Anastasio 
>> ---
>>  drivers/pci/pci.c   | 9 +++--
>>  include/linux/pci.h | 1 +
>>  2 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index 8abc843b1615..8207a09085d1 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -5882,6 +5882,11 @@ resource_size_t __weak pcibios_default_alignment(void)
>> return 0;
>>  }
>>
>> +int __weak pcibios_ignore_alignment_request(void)
>> +{
>> +   return pci_has_flag(PCI_PROBE_ONLY);
>> +}
>> +
>>  #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
>>  static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
>>  static DEFINE_SPINLOCK(resource_alignment_lock);
>> @@ -5906,9 +5911,9 @@ static resource_size_t 
>> pci_specified_resource_alignment(struct pci_dev *dev,
>> p = resource_alignment_param;
>> if (!*p && !align)
>> goto out;
>> -   if (pci_has_flag(PCI_PROBE_ONLY)) {
>> +   if (pcibios_ignore_alignment_request()) {
>> align = 0;
>> -   pr_info_once("PCI: Ignoring requested alignments 
>> (PCI_PROBE_ONLY)\n");
>> +   pr_info_once("PCI: Ignoring requested alignments\n");
>> goto out;
>> }
> 
> I think the logic here is questionable to begin with. If the user has
> explicitly requested re-aligning a resource via the command line then
> we should probably do it even if PCI_PROBE_ONLY is set. When it breaks
> they get to keep the pieces.
> 
> That said, the real issue here is that PCI_PROBE_ONLY probably
> shouldn't be set under qemu/kvm. Under the other hypervisor (PowerVM)
> hotplugged devices are configured by firmware before it's passed to
> the guest and we need to keep the FW assignments otherwise things
> break. QEMU however doesn't do any BAR assignments and relies on that
> being handled by the guest. At boot time this is done by SLOF, but
> Linux only keeps SLOF around until it's extracted the device-tree.
> Once that's done SLOF gets blown away and the kernel needs to do it's
> own BAR assignments. I'm guessing there's a hack in there to make it
> work today, but it's a little surprising that it works at all...


The hack is to run a modified qemu-aware "/usr/sbin/rtas_errd" in the
guest which receives an event from qemu (RAS_EPOW from
/proc/interrupts), fetches device tree chunks (and as I understand it -
they come with BARs from phyp but without from qemu) and writes "1" to
"/sys/bus/pci/rescan" which calls pci_assign_resource() eventually:

[c6e6f960] [c05f62d4] pci_assign_resource+0x44/0x360

[c6e6fa10] [c05f8b54]
assign_requested_resources_sorted+0x84/0x110
[c6e6fa60] [c05f9540] __assign_resources_sorted+0xd0/0x750
[c6e6fb40] [c05fb2e0]
__pci_bus_assign_resources+0x80/0x280
[c6e6fc00] [c05fb95c]
pci_assign_unassigned_bus_resources+0xbc/0x100
[c6e6fc60] [c05e3d74] pci_rescan_bus+0x34/0x60

[c6e6fc90] [c05f1ef4] rescan_store+0x84/0xc0

[c6e6fcd0] [c068060c] bus_attr_store+0x3c/0x60

[c6e6fcf0] [c037853c] sysfs_kf_write+0x5c/0x80





> 
> IIRC Sam Bobroff was looking at hotplug under pseries recently so he
> might have something to add. He's sick at the moment, but I'll ask him
> to take a look at this once he's back among the living
> 
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index 4a5a84d7bdd4..47471dcdbaf9 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -1990,6 +1990,7 @@ static inline void pcibios_penalize_isa_irq(int irq, 
>> int active) {}
>>  int pcibios_alloc_irq(struct pci_dev *dev);
>>  void pcibios_free_irq(struct pci_dev *dev);
>>  resource_size_t pcibios_default_alignment(void);
>> +int pcibios_ignore_alignment_request(void);
>>
>>  #ifdef CONFIG_HIBERNATE_CALLBACKS
>>  extern struct dev_pm_ops pcibios_pm_ops;
>> --
>> 2.20.1
>>

-- 
Alexey


Re: [PATCH v4 3/3] kselftest: Extend vDSO selftest to clock_getres

2019-05-28 Thread Michael Ellerman
Vincenzo Frascino  writes:

> The current version of the multiarch vDSO selftest verifies only
> gettimeofday.
>
> Extend the vDSO selftest to clock_getres, to verify that the
> syscall and the vDSO library function return the same information.
>
> The extension has been used to verify the hrtimer_resoltion fix.

This is passing for me even without patch 1 applied, shouldn't it fail
without the fix? What am I missing?

# uname -r
5.2.0-rc2-gcc-8.2.0

# ./vdso_clock_getres
clock_id: CLOCK_REALTIME [PASS]
clock_id: CLOCK_BOOTTIME [PASS]
clock_id: CLOCK_TAI [PASS]
clock_id: CLOCK_REALTIME_COARSE [PASS]
clock_id: CLOCK_MONOTONIC [PASS]
clock_id: CLOCK_MONOTONIC_RAW [PASS]
clock_id: CLOCK_MONOTONIC_COARSE [PASS]

cheers

> Cc: Shuah Khan 
> Signed-off-by: Vincenzo Frascino 
> ---
>
> Note: This patch is independent from the others in this series, hence it
> can be merged singularly by the kselftest maintainers.
>
>  tools/testing/selftests/vDSO/Makefile |   2 +
>  .../selftests/vDSO/vdso_clock_getres.c| 124 ++
>  2 files changed, 126 insertions(+)
>  create mode 100644 tools/testing/selftests/vDSO/vdso_clock_getres.c