[PATCH net-next] mlx5: Restore err assignment in mlx5_mdev_init

2020-05-29 Thread Nathan Chancellor
Clang warns:

drivers/net/ethernet/mellanox/mlx5/core/main.c:1278:6: warning: variable
'err' is used uninitialized whenever 'if' condition is true
[-Wsometimes-uninitialized]
if (!priv->dbg_root) {
^~~
drivers/net/ethernet/mellanox/mlx5/core/main.c:1303:9: note:
uninitialized use occurs here
return err;
   ^~~
drivers/net/ethernet/mellanox/mlx5/core/main.c:1278:2: note: remove the
'if' if its condition is always false
if (!priv->dbg_root) {
^~
drivers/net/ethernet/mellanox/mlx5/core/main.c:1259:9: note: initialize
the variable 'err' to silence this warning
int err;
   ^
= 0
1 warning generated.

This path previously returned -ENOMEM, restore that error code so that
it is not uninitialized.

Fixes: 810cbb25549b ("net/mlx5: Add missing mutex destroy")
Link: https://github.com/ClangBuiltLinux/linux/issues/1042
Signed-off-by: Nathan Chancellor 
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index df46b1fce3a7..ac68445fde2d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1277,6 +1277,7 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int 
profile_idx)
mlx5_debugfs_root);
if (!priv->dbg_root) {
dev_err(dev->device, "mlx5_core: error, Cannot create debugfs 
dir, aborting\n");
+   err = -ENOMEM;
goto err_dbg_root;
}
 

base-commit: c0cc73b79123e67b212bd537a7af88e52c9fbeac
-- 
2.27.0.rc0



Re: [PATCH v8 14/18] EFI: Introduce the new AMD Memory Encryption GUID.

2020-05-29 Thread Ashish Kalra
Hello Steve,

On Fri, May 29, 2020 at 07:07:56PM -0700, Steve Rutherford wrote:
> On Tue, May 5, 2020 at 2:20 PM Ashish Kalra  wrote:
> >
> > From: Ashish Kalra 
> >
> > Introduce a new AMD Memory Encryption GUID which is currently
> > used for defining a new UEFI enviroment variable which indicates
> > UEFI/OVMF support for the SEV live migration feature. This variable
> > is setup when UEFI/OVMF detects host/hypervisor support for SEV
> > live migration and later this variable is read by the kernel using
> > EFI runtime services to verify if OVMF supports the live migration
> > feature.
> >
> > Signed-off-by: Ashish Kalra 
> > ---
> >  include/linux/efi.h | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/include/linux/efi.h b/include/linux/efi.h
> > index 251f1f783cdf..2efb42ccf3a8 100644
> > --- a/include/linux/efi.h
> > +++ b/include/linux/efi.h
> > @@ -358,6 +358,7 @@ void efi_native_runtime_setup(void);
> >
> >  /* OEM GUIDs */
> >  #define DELLEMC_EFI_RCI2_TABLE_GUIDEFI_GUID(0x2d9f28a2, 
> > 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
> > +#define MEM_ENCRYPT_GUID   EFI_GUID(0x0cf29b71, 
> > 0x9e51, 0x433a,  0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
> >
> >  typedef struct {
> > efi_guid_t guid;
> > --
> > 2.17.1
> >
> Have you gotten this GUID upstreamed into edk2?
> 

Not yet.

This patch and the other OVMF patches are ready to be sent for
upstreaming, i was waiting for this kernel patch-set to be
accepted and upstreamed. 

> Reviewed-by: Steve Rutherford 


Re: [PATCH v8 12/18] KVM: SVM: Add support for static allocation of unified Page Encryption Bitmap.

2020-05-29 Thread Ashish Kalra
Hello Steve,

On Fri, May 29, 2020 at 07:07:33PM -0700, Steve Rutherford wrote:
> On Tue, May 5, 2020 at 2:18 PM Ashish Kalra  wrote:
> >
> > From: Ashish Kalra 
> >
> > Add support for static allocation of the unified Page encryption bitmap by
> > extending kvm_arch_commit_memory_region() callack to add svm specific 
> > x86_ops
> > which can read the userspace provided memory region/memslots and calculate
> > the amount of guest RAM managed by the KVM and grow the bitmap based
> > on that information, i.e. the highest guest PA that is mapped by a memslot.
> >
> > Signed-off-by: Ashish Kalra 
> > ---
> >  arch/x86/include/asm/kvm_host.h |  1 +
> >  arch/x86/kvm/svm/sev.c  | 35 +
> >  arch/x86/kvm/svm/svm.c  |  1 +
> >  arch/x86/kvm/svm/svm.h  |  1 +
> >  arch/x86/kvm/x86.c  |  5 +
> >  5 files changed, 43 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h 
> > b/arch/x86/include/asm/kvm_host.h
> > index fc74144d5ab0..b573ea85b57e 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1254,6 +1254,7 @@ struct kvm_x86_ops {
> >
> > bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
> > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> > +   void (*commit_memory_region)(struct kvm *kvm, enum kvm_mr_change 
> > change);
> > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
> >   unsigned long sz, unsigned long mode);
> > int (*get_page_enc_bitmap)(struct kvm *kvm,
> > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > index 30efc1068707..c0d7043a0627 100644
> > --- a/arch/x86/kvm/svm/sev.c
> > +++ b/arch/x86/kvm/svm/sev.c
> > @@ -1377,6 +1377,41 @@ static int sev_resize_page_enc_bitmap(struct kvm 
> > *kvm, unsigned long new_size)
> > return 0;
> >  }
> >
> > +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change)
> > +{
> > +   struct kvm_memslots *slots;
> > +   struct kvm_memory_slot *memslot;
> > +   gfn_t start, end = 0;
> > +
> > +   spin_lock(>mmu_lock);
> > +   if (change == KVM_MR_CREATE) {
> > +   slots = kvm_memslots(kvm);
> > +   kvm_for_each_memslot(memslot, slots) {
> > +   start = memslot->base_gfn;
> > +   end = memslot->base_gfn + memslot->npages;
> > +   /*
> > +* KVM memslots is a sorted list, starting with
> > +* the highest mapped guest PA, so pick the topmost
> > +* valid guest PA.
> > +*/
> > +   if (memslot->npages)
> > +   break;
> > +   }
> > +   }
> > +   spin_unlock(>mmu_lock);
> > +
> > +   if (end) {
> > +   /*
> > +* NORE: This callback is invoked in vm ioctl
> > +* set_user_memory_region, hence we can use a
> > +* mutex here.
> > +*/
> > +   mutex_lock(>lock);
> > +   sev_resize_page_enc_bitmap(kvm, end);
> > +   mutex_unlock(>lock);
> > +   }
> > +}
> > +
> >  int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> >   unsigned long npages, unsigned long enc)
> >  {
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index 501e82f5593c..442adbbb0641 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -4015,6 +4015,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> >
> > .check_nested_events = svm_check_nested_events,
> >
> > +   .commit_memory_region = svm_commit_memory_region,
> > .page_enc_status_hc = svm_page_enc_status_hc,
> > .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> > index 2ebdcce50312..fd99e0a5417a 100644
> > --- a/arch/x86/kvm/svm/svm.h
> > +++ b/arch/x86/kvm/svm/svm.h
> > @@ -406,6 +406,7 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned 
> > long gpa,
> >   unsigned long npages, unsigned long enc);
> >  int svm_get_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap 
> > *bmap);
> >  int svm_set_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap 
> > *bmap);
> > +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change);
> >
> >  /* avic.c */
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index c4166d7a0493..8938de868d42 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -10133,6 +10133,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
> > kvm_mmu_change_mmu_pages(kvm,
> > 

Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier

2020-05-29 Thread Kees Cook
On Sat, May 30, 2020 at 03:58:18AM +, Sargun Dhillon wrote:
> Isn't the "right" way to do this to allocate a bunch of file descriptors,
> and fill up the user buffer with them, and then install the files? This
> seems to like half-install the file descriptors and then error out.
> 
> I know that's the current behaviour, but that seems like a bad idea. Do
> we really want to perpetuate this half-broken state? I guess that some
> userspace programs could be depending on this -- and their recovery
> semantics could rely on this. I mean this is 10+ year old code.

Right -- my instincts on this are to leave the behavior as-is. I've
been burned by so many "nothing could possible rely on THIS" cases. ;)

It might be worth adding a comment (or commit log note) that describes
the alternative you suggest here. But I think building a common helper
that does all of the work (and will get used in three^Wfour places now)
is the correct way to refactor this.

Oh hey! Look at scm_detach_fds_compat(). It needs this too. (And it's
missing the cgroup tracking.) That would fix:

48a87cc26c13 ("net: netprio: fd passed in SCM_RIGHTS datagram not set 
correctly")
d84295067fc7 ("net: net_cls: fd passed in SCM_RIGHTS datagram not set 
correctly")

So, yes, let's get this fixed up. I'd say first fix the missing sock
update in the compat path (so it can be CCed stable). Then fix the missing
sock update in pidfd_getfd() (so it can be CCed stable), then write the
helper with a refactoring of scm_detach_fds(), scm_detach_fds_compat(),
and pidfd_getfd(). And then add the addfd seccomp user_notif ioctl cmd.

-- 
Kees Cook


[PATCH V2] mm, memory_failure: don't send BUS_MCEERR_AO for action required error

2020-05-29 Thread Wetp Zhang
Some processes dont't want to be killed early, but in "Action Required"
case, those also may be killed by BUS_MCEERR_AO when sharing memory
with other which is accessing the fail memory.
And sending SIGBUS with BUS_MCEERR_AO for action required error is
strange, so ignore the non-current processes here.

Suggested-by: Naoya Horiguchi 
Signed-off-by: Wetp Zhang 
---
 mm/memory-failure.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a96364be8ab4..dd3862fcf2e9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -210,14 +210,17 @@ static int kill_proc(struct to_kill *tk, unsigned long 
pfn, int flags)
 {
struct task_struct *t = tk->tsk;
short addr_lsb = tk->size_shift;
-   int ret;
+   int ret = 0;
 
-   pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware 
memory corruption\n",
-   pfn, t->comm, t->pid);
+   if ((t->mm == current->mm) || !(flags & MF_ACTION_REQUIRED))
+   pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to 
hardware memory corruption\n",
+   pfn, t->comm, t->pid);
 
-   if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
-   ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr,
-  addr_lsb);
+   if (flags & MF_ACTION_REQUIRED) {
+   if (t->mm == current->mm)
+   ret = force_sig_mceerr(BUS_MCEERR_AR,
+(void __user *)tk->addr, addr_lsb);
+   /* send no signal to non-current processes */
} else {
/*
 * Don't use force here, it's convenient if the signal
-- 
2.14.3 (Apple Git-98)



Re: [PATCH 9/9] staging: media: atomisp: add PMIC_OPREGION dependency

2020-05-29 Thread Mauro Carvalho Chehab
Em Fri, 29 May 2020 20:11:29 -0700
Nathan Chancellor  escreveu:

> On Fri, May 29, 2020 at 10:00:31PM +0200, Arnd Bergmann wrote:
> > Without that driver, there is a link failure in
> > 
> > ERROR: modpost: "intel_soc_pmic_exec_mipi_pmic_seq_element"
> > [drivers/staging/media/atomisp/pci/atomisp_gmin_platform.ko] undefined!
> > 
> > Add an explicit Kconfig dependency.
> > 
> > Signed-off-by: Arnd Bergmann   
> 
> It'd be interesting to know if this is strictly required for the driver
> to work properly. 

It is. Without OpRegion, the driver can't power on the camera sensors.

> The call to intel_soc_pmic_exec_mipi_pmic_seq_element
> has some error handling after it, maybe that should just be surrounded
> by an #ifdef or IS_ENABLED for PMIC_OPREGION, like some other drivers
> do.
> 
> Regardless of that:
> 
> Reviewed-by: Nathan Chancellor 
> 
> > ---
> >  drivers/staging/media/atomisp/Kconfig | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/drivers/staging/media/atomisp/Kconfig 
> > b/drivers/staging/media/atomisp/Kconfig
> > index c4f3049b0706..e86311c14329 100644
> > --- a/drivers/staging/media/atomisp/Kconfig
> > +++ b/drivers/staging/media/atomisp/Kconfig
> > @@ -11,6 +11,7 @@ menuconfig INTEL_ATOMISP
> >  config VIDEO_ATOMISP
> > tristate "Intel Atom Image Signal Processor Driver"
> > depends on VIDEO_V4L2 && INTEL_ATOMISP
> > +   depends on PMIC_OPREGION
> > select IOSF_MBI
> > select VIDEOBUF_VMALLOC
> > ---help---
> > -- 
> > 2.26.2
> >   



Thanks,
Mauro


Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier

2020-05-29 Thread Kees Cook
On Sat, May 30, 2020 at 05:17:24AM +0200, Jann Horn wrote:
> On Sat, May 30, 2020 at 4:43 AM Kees Cook  wrote:
> > I mean, yes, that's certainly better, but it just seems a shame that
> > everyone has to do the get_unused/put_unused dance just because of how
> > SCM_RIGHTS does this weird put_user() in the middle.
> >
> > Can anyone clarify the expected failure mode from SCM_RIGHTS? Can we
> > move the put_user() after instead?
> 
> Honestly, I think trying to remove file descriptors and such after
> -EFAULT is a waste of time. If userspace runs into -EFAULT, userspace
> is beyond saving and can't really do much other than exit immediately.
> There are a bunch of places that will change state and then throw
> -EFAULT at the end if userspace supplied an invalid address, because
> trying to hold locks across userspace accesses just in case userspace
> supplied a bogus address is kinda silly (and often borderline
> impossible).

Logically, I agree. I'm more worried about the behavioral change -- if
we don't remove the fd on failure, the fd is installed with no
indication to the process that it exists (it won't know the close it --
if it keeps running -- and it may survive across exec). Before, it never
entered the file table.

> You can actually see that even scm_detach_fds() currently just
> silently swallows errors if writing some header fields fails at the
> end.

Yeah, and it's a corner case. But it should be possible (trivial, even)
to clean up on failure to retain the original results.

-- 
Kees Cook


Re: [PATCH 2/2] exec: Compute file based creds only once

2020-05-29 Thread Kees Cook
On Fri, May 29, 2020 at 10:28:41PM -0500, Eric W. Biederman wrote:
> The range-diff winds up being:
> 1:  c9258ef4879b ! 1:  a7868323c263 exec: Add a per bprm->file version of 
> per_clear
> @@ Commit message
>  
>  History Tree: 
> git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
>  Fixes: 1bb0fa189c6a ("[PATCH] NX: clean up legacy binary support")
> +Reviewed-by: Kees Cook 
>  Signed-off-by: "Eric W. Biederman" 
>  
>   ## fs/exec.c ##
> @@ include/linux/lsm_hooks.h
>*  transitions between security domains).
>*  The hook must set @bprm->active_secureexec to 1 if AT_SECURE 
> should be set to
>*  request libc enable secure mode.
> -+ *  The hook must set @bprm->pf_per_clear to the personality flags 
> that
> -+ *  should be cleared from current->personality.
> ++ *  The hook must add to @bprm->pf_per_clear any personality flags 
> that
> ++ *  should be cleared from current->personality.
>*  @bprm contains the linux_binprm structure.
>*  Return 0 if the hook is successful and permission is granted.
>* @bprm_check_security:
> 2:  e6f20c69b96e ! 2:  56305aa9b6fa exec: Compute file based creds only once
> @@ Commit message
>  secureity attribute and derive capabilities from the fact the
>  user had uid 0 has been added.
>  
> +Reviewed-by: Kees Cook 
>  Signed-off-by: "Eric W. Biederman" 
>  
>   ## fs/binfmt_misc.c ##
> @@ include/linux/lsm_hooks.h
>  + *  between security domains).
>  + *  The hook must set @bprm->secureexec to 1 if AT_SECURE should be 
> set to
>*  request libc enable secure mode.
> -- *  The hook must set @bprm->pf_per_clear to the personality flags 
> that
> -+ *  The hook must set @bprm->per_clear to the personality flags that
> -  *  should be cleared from current->personality.
> +- *  The hook must add to @bprm->pf_per_clear any personality flags 
> that
> ++ *  The hook must add to @bprm->per_clear any personality flags that
> +  *  should be cleared from current->personality.
>*  @bprm contains the linux_binprm structure.
>*  Return 0 if the hook is successful and permission is granted.

Awesome; thanks!

> > The cap_ambient_invariant_ok() test is needlessly repeated: it doesn't
> > examine securebits, and nonroot_raised_pE appears to have no
> > side-effects.
> >
> > One of those can be dropped, yes?
> 
> That is what it looks like to me.

Okay, cool. I was worried I was missing something in the mess of tiny
helper calls. :)

> I hope that when the dust clears the function can become a
> straightforward implementation of the capability equations.
> We will see.

Yeah, this looks better and better every day! I'm glad you're able to
dig through all of this.

-- 
Kees Cook


Re: [PATCH 1/2] exec: Add a per bprm->file version of per_clear

2020-05-29 Thread Kees Cook
On Fri, May 29, 2020 at 10:23:58PM -0500, Eric W. Biederman wrote:
> Kees Cook  writes:
> > I wish we had more robust execve tests. :(
> 
> I think you have more skill at writing automated tests than I do.  So
> feel free to write some.

Yeah, my limiting factor is available time. No worries; I didn't mean
it as a request to you -- it was more a commiseration. :)

-- 
Kees Cook


Re: [PATCH 17/30] KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit

2020-05-29 Thread Paolo Bonzini
On 30/05/20 04:06, Krish Sadhukhan wrote:
>>
>>   -    nested_vmcb->control.int_ctl   = vmcb->control.int_ctl;
>> -    nested_vmcb->control.int_vector    = vmcb->control.int_vector;
> 
> 
> While it's not related to this patch, I am wondering if we need the
> following line in enter_svm_guest_mode():
> 
>     svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
> 
> If int_vector is used only to trigger a #VMEXIT after we have decided to
> inject a virtual interrupt, we probably don't the vector value from the
> nested vmcb.

KVM does not use int_vector, but other hypervisor sometimes trigger
virtual interrupts using int_ctl+int_vector instead of eventinj.
(Unlike KVM they don't intercept EXIT_VINTR).  Hyper-V operates like that.

Paolo



[PATCH v2] locking/Kconfig: Don't forcefully uninline spin_unlock for PREEMPT

2020-05-29 Thread Sultan Alsawaf
From: Sultan Alsawaf 

This change was originally done in 2005 without any justification in
commit bda98685b855 ("[PATCH] x86: inline spin_unlock if
!CONFIG_DEBUG_SPINLOCK and !CONFIG_PREEMPT"). Perhaps the reasoning at
the time was that PREEMPT was still considered unstable and needed extra
debugging; however, this is no longer the case, so remove the artificial
limitation.

Signed-off-by: Sultan Alsawaf 
---
 kernel/Kconfig.locks   | 10 +-
 kernel/Kconfig.preempt |  1 -
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 3de8fd11873b..2d992f5c6ec3 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -139,7 +139,7 @@ config INLINE_SPIN_UNLOCK_BH
 
 config INLINE_SPIN_UNLOCK_IRQ
def_bool y
-   depends on !PREEMPTION || ARCH_INLINE_SPIN_UNLOCK_IRQ
+   depends on ARCH_INLINE_SPIN_UNLOCK_IRQ
 
 config INLINE_SPIN_UNLOCK_IRQRESTORE
def_bool y
@@ -168,7 +168,7 @@ config INLINE_READ_LOCK_IRQSAVE
 
 config INLINE_READ_UNLOCK
def_bool y
-   depends on !PREEMPTION || ARCH_INLINE_READ_UNLOCK
+   depends on ARCH_INLINE_READ_UNLOCK
 
 config INLINE_READ_UNLOCK_BH
def_bool y
@@ -176,7 +176,7 @@ config INLINE_READ_UNLOCK_BH
 
 config INLINE_READ_UNLOCK_IRQ
def_bool y
-   depends on !PREEMPTION || ARCH_INLINE_READ_UNLOCK_IRQ
+   depends on ARCH_INLINE_READ_UNLOCK_IRQ
 
 config INLINE_READ_UNLOCK_IRQRESTORE
def_bool y
@@ -205,7 +205,7 @@ config INLINE_WRITE_LOCK_IRQSAVE
 
 config INLINE_WRITE_UNLOCK
def_bool y
-   depends on !PREEMPTION || ARCH_INLINE_WRITE_UNLOCK
+   depends on ARCH_INLINE_WRITE_UNLOCK
 
 config INLINE_WRITE_UNLOCK_BH
def_bool y
@@ -213,7 +213,7 @@ config INLINE_WRITE_UNLOCK_BH
 
 config INLINE_WRITE_UNLOCK_IRQ
def_bool y
-   depends on !PREEMPTION || ARCH_INLINE_WRITE_UNLOCK_IRQ
+   depends on ARCH_INLINE_WRITE_UNLOCK_IRQ
 
 config INLINE_WRITE_UNLOCK_IRQRESTORE
def_bool y
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bf82259cff96..5a9e0409c844 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -39,7 +39,6 @@ config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
depends on !ARCH_NO_PREEMPT
select PREEMPTION
-   select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
  This option reduces the latency of the kernel by making
  all kernel code (that is not executing in a critical section)
-- 
2.26.2



[PATCH] locking/Kconfig: Don't forcefully uninline spin_unlock for PREEMPT

2020-05-29 Thread Sultan Alsawaf
From: Sultan Alsawaf 

This change was originally done in 2005 without any justification in
commit bda98685b855 ("[PATCH] x86: inline spin_unlock if
!CONFIG_DEBUG_SPINLOCK and !CONFIG_PREEMPT"). Perhaps the reasoning at
the time was that PREEMPT was still considered unstable and needed extra
debugging; however, this is no longer the case, so remove the artificial
limitation.

Signed-off-by: Sultan Alsawaf 
---
 kernel/Kconfig.preempt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bf82259cff96..5a9e0409c844 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -39,7 +39,6 @@ config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
depends on !ARCH_NO_PREEMPT
select PREEMPTION
-   select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
  This option reduces the latency of the kernel by making
  all kernel code (that is not executing in a critical section)
-- 
2.26.2



linux-next: Signed-off-by missing for commit in the clk tree

2020-05-29 Thread Stephen Rothwell
Hi all,

Commit

  5f2feacb7639 ("clk: vc5: Add support for IDT VersaClock 5P49V6965")

is missing a Signed-off-by from its committer.

-- 
Cheers,
Stephen Rothwell


pgpRr3mitCWYb.pgp
Description: OpenPGP digital signature


Re: [PATCH] flow_dissector: work around stack frame size warning

2020-05-29 Thread Cong Wang
On Fri, May 29, 2020 at 1:14 PM Arnd Bergmann  wrote:
>
> The fl_flow_key structure is around 500 bytes, so having two of them
> on the stack in one function now exceeds the warning limit after an
> otherwise correct change:
>
> net/sched/cls_flower.c:298:12: error: stack frame size of 1056 bytes in 
> function 'fl_classify' [-Werror,-Wframe-larger-than=]
>
> I suspect the fl_classify function could be reworked to only have one
> of them on the stack and modify it in place, but I could not work out
> how to do that.
>
> As a somewhat hacky workaround, move one of them into an out-of-line
> function to reduce its scope. This does not necessarily reduce the stack
> usage of the outer function, but at least the second copy is removed
> from the stack during most of it and does not add up to whatever is
> called from there.
>
> I now see 552 bytes of stack usage for fl_classify(), plus 528 bytes
> for fl_mask_lookup().
>
> Fixes: 58cff782cc55 ("flow_dissector: Parse multiple MPLS Label Stack 
> Entries")
> Signed-off-by: Arnd Bergmann 

I think this is probably the quickest way to amend this warning,
so:

Acked-by: Cong Wang 

Thanks.


[PATCH][v5] KVM: X86: support APERF/MPERF registers

2020-05-29 Thread Li RongQing
Guest kernel reports a fixed cpu frequency in /proc/cpuinfo,
this is confused to user when turbo is enable, and aperf/mperf
can be used to show current cpu frequency after 7d5905dc14a
"(x86 / CPU: Always show current CPU frequency in /proc/cpuinfo)"
so guest should support aperf/mperf capability

This patch implements aperf/mperf by three mode: none, software
emulation, and pass-through

None: default mode, guest does not support aperf/mperf

Software emulation: the period of aperf/mperf in guest mode are
accumulated as emulated value

Pass-though: it is only suitable for KVM_HINTS_REALTIME, Because
that hint guarantees we have a 1:1 vCPU:CPU binding and guaranteed
no over-commit.

And a per-VM capability is added to configure aperfmperf mode

Signed-off-by: Li RongQing 
Signed-off-by: Chai Wen 
Signed-off-by: Jia Lina 
---
diff v4:
fix maybe-uninitialized warning

diff v3:
fix interception of MSR_IA32_MPERF/APERF in svm

diff v2:
support aperfmperf pass though
move common codes to kvm_get_msr_common

diff v1:
1. support AMD, but not test
2. support per-vm capability to enable

 Documentation/virt/kvm/api.rst  | 10 ++
 arch/x86/include/asm/kvm_host.h | 11 +++
 arch/x86/kvm/cpuid.c| 13 -
 arch/x86/kvm/svm/svm.c  |  8 
 arch/x86/kvm/vmx/vmx.c  |  6 ++
 arch/x86/kvm/x86.c  | 42 +
 arch/x86/kvm/x86.h  | 15 +++
 include/uapi/linux/kvm.h|  1 +
 8 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index d871dacb984e..f854f4da6fd8 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6126,3 +6126,13 @@ KVM can therefore start protected VMs.
 This capability governs the KVM_S390_PV_COMMAND ioctl and the
 KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected
 guests when the state change is invalid.
+
+8.23 KVM_CAP_APERFMPERF
+
+
+:Architectures: x86
+:Parameters: args[0] is aperfmperf mode;
+ 0 for not support, 1 for software emulation, 2 for pass-through
+:Returns: 0 on success; -1 on error
+
+This capability indicates that KVM supports APERF and MPERF MSR registers
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fd78bd44b2d6..14643f8af9c4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -824,6 +824,9 @@ struct kvm_vcpu_arch {
 
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
+
+   u64 v_mperf;
+   u64 v_aperf;
 };
 
 struct kvm_lpage_info {
@@ -889,6 +892,12 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT,/* created with KVM_CAP_SPLIT_IRQCHIP */
 };
 
+enum kvm_aperfmperf_mode {
+   KVM_APERFMPERF_NONE,
+   KVM_APERFMPERF_SOFT,  /* software emulate aperfmperf */
+   KVM_APERFMPERF_PT,/* pass-through aperfmperf to guest */
+};
+
 #define APICV_INHIBIT_REASON_DISABLE0
 #define APICV_INHIBIT_REASON_HYPERV 1
 #define APICV_INHIBIT_REASON_NESTED 2
@@ -986,6 +995,8 @@ struct kvm_arch {
 
struct kvm_pmu_event_filter *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread;
+
+   enum kvm_aperfmperf_mode aperfmperf_mode;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index cd708b0b460a..c960dda4251b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -122,6 +122,14 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
   MSR_IA32_MISC_ENABLE_MWAIT);
}
 
+   best = kvm_find_cpuid_entry(vcpu, 6, 0);
+   if (best) {
+   if (guest_has_aperfmperf(vcpu->kvm) &&
+   boot_cpu_has(X86_FEATURE_APERFMPERF))
+   best->ecx |= 1;
+   else
+   best->ecx &= ~1;
+   }
/* Note, maxphyaddr must be updated before tdp_level. */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
@@ -557,7 +565,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array 
*array, u32 function)
case 6: /* Thermal management */
entry->eax = 0x4; /* allow ARAT */
entry->ebx = 0;
-   entry->ecx = 0;
+   if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+   entry->ecx = 0x1;
+   else
+   entry->ecx = 0x0;
entry->edx = 0;
break;
/* function 7 has additional index. */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e9c0fb68387d..1d38fe3afc0d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1200,6 +1200,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm->msrpm = page_address(msrpm_pages);

[RFC][PATCH] usb: typec: tcpci_rt1711h: Try to avoid screaming irq causing boot hangs

2020-05-29 Thread John Stultz
I've recently (since 5.7-rc1) started noticing very rare hangs
pretty early in bootup on my HiKey960 board.

They have been particularly difficult to debug, as the system
seems to not respond at all to sysrq- commands. However, the
system is alive as I'll occaionally see firmware loading timeout
errors after awhile. Adding changes like initcall_debug and
lockdep weren't informative, as it tended to cause the problem
to hide.

I finally tried to dig in a bit more on this today, and noticed
that the last dmesg output before the hang was usually:
  "random: crng init done"

So I dumped the stack at that point, and saw it was being called
from the pl061 gpio irq, and the hang always occurred when the
crng init finished on cpu 0. Instrumenting that more I could see
that when the issue triggered, we were getting a stream of irqs.

Chasing further, I found the screaming irq was for the rt1711h,
and narrowed down that we were hitting the !chip->tcpci check
which immediately returns IRQ_HANDLED, but does not stop the
irq from triggering immediately afterwards.

This patch slightly reworks the logic, so if we hit the irq
before the chip->tcpci has been assigned, we still read and
write the alert register, but just skip calling tcpci_irq().

With this change, I haven't managed to trip over the problem
(though it hasn't been super long - but I did confirm I hit
the error case and it didn't hang the system).

I still have some concern that I don't know why this cropped
up since 5.7-rc, as there haven't been any changes to the
driver since 5.4 (or before). It may just be the initialization
timing has changed due to something else, and its just exposed
this issue? I'm not sure, and that's not super re-assuring.

Anyway, I'd love to hear your thoughts if this looks like a sane
fix or not.

Cc: Guenter Roeck 
Cc: Heikki Krogerus 
Cc: Greg Kroah-Hartman 
Cc: YongQin Liu 
Cc: linux-...@vger.kernel.org
Signed-off-by: John Stultz 
---
 drivers/usb/typec/tcpm/tcpci_rt1711h.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c 
b/drivers/usb/typec/tcpm/tcpci_rt1711h.c
index 017389021b96..530fd2c111ad 100644
--- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c
+++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c
@@ -159,9 +159,6 @@ static irqreturn_t rt1711h_irq(int irq, void *dev_id)
u8 status;
struct rt1711h_chip *chip = dev_id;
 
-   if (!chip->tcpci)
-   return IRQ_HANDLED;
-
ret = rt1711h_read16(chip, TCPC_ALERT, );
if (ret < 0)
goto out;
@@ -176,6 +173,9 @@ static irqreturn_t rt1711h_irq(int irq, void *dev_id)
}
 
 out:
+   if (!chip->tcpci)
+   return IRQ_HANDLED;
+
return tcpci_irq(chip->tcpci);
 }
 
-- 
2.17.1



Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier

2020-05-29 Thread Sargun Dhillon
> 
> I mean, yes, that's certainly better, but it just seems a shame that
> everyone has to do the get_unused/put_unused dance just because of how
> SCM_RIGHTS does this weird put_user() in the middle.
> 
> Can anyone clarify the expected failure mode from SCM_RIGHTS? Can we
> move the put_user() after instead? I think cleanup would just be:
> replace_fd(fd, NULL, 0)
> 
> So:
> 
> (updated to skip sock updates on failure; thank you Christian!)
> 
> int file_receive(int fd, unsigned long flags, struct file *file)
> {
>   struct socket *sock;
>   int ret;
> 
>   ret = security_file_receive(file);
>   if (ret)
>   return ret;
> 
>   /* Install the file. */
>   if (fd == -1) {
>   ret = get_unused_fd_flags(flags);
>   if (ret >= 0)
>   fd_install(ret, get_file(file));
>   } else {
>   ret = replace_fd(fd, file, flags);
>   }
> 
>   /* Bump the sock usage counts. */
>   if (ret >= 0) {
>   sock = sock_from_file(addfd->file, );
>   if (sock) {
>   sock_update_netprioidx(>sk->sk_cgrp_data);
>   sock_update_classid(>sk->sk_cgrp_data);
>   }
>   }
> 
>   return ret;
> }
> 
> scm_detach_fds()
>   ...
>   for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i  i++, cmfptr++)
>   {
>   int new_fd;
> 
>   err = file_receive(-1, MSG_CMSG_CLOEXEC & msg->msg_flags
>   ? O_CLOEXEC : 0, fp[i]);
>   if (err < 0)
>   break;
>   new_fd = err;
> 
Isn't the "right" way to do this to allocate a bunch of file descriptors,
and fill up the user buffer with them, and then install the files? This
seems to like half-install the file descriptors and then error out.

I know that's the current behaviour, but that seems like a bad idea. Do
we really want to perpetuate this half-broken state? I guess that some
userspace programs could be depending on this -- and their recovery
semantics could rely on this. I mean this is 10+ year old code.

>   err = put_user(err, cmfptr);
>   if (err) {
>   /*
>* If we can't notify userspace that it got the
>* fd, we need to unwind and remove it again.
>*/
>   replace_fd(new_fd, NULL, 0);
>   break;
>   }
>   }
>   ...
> 
> 
> 
> -- 
> Kees Cook


Re: [PATCH] refperf: work around 64-bit division

2020-05-29 Thread Nathan Chancellor
On Fri, May 29, 2020 at 10:15:51PM +0200, Arnd Bergmann wrote:
> A 64-bit division was introduced in refperf, breaking compilation
> on all 32-bit architectures:
> 
> kernel/rcu/refperf.o: in function `main_func':
> refperf.c:(.text+0x57c): undefined reference to `__aeabi_uldivmod'
> 
> Work it by using div_u64 to mark the expensive operation.
> 
> Fixes: bd5b16d6c88d ("refperf: Allow decimal nanoseconds")
> Signed-off-by: Arnd Bergmann 
> ---
>  kernel/rcu/refperf.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/rcu/refperf.c b/kernel/rcu/refperf.c
> index 47df72c492b3..c2366648981d 100644
> --- a/kernel/rcu/refperf.c
> +++ b/kernel/rcu/refperf.c
> @@ -386,7 +386,7 @@ static int main_func(void *arg)
>   if (torture_must_stop())
>   goto end;
>  
> - result_avg[exp] = 1000 * process_durations(nreaders) / 
> (nreaders * loops);
> + result_avg[exp] = div_u64(1000 * process_durations(nreaders), 
> nreaders * loops);
>   }
>  
>   // Print the average of all experiments
> @@ -397,9 +397,14 @@ static int main_func(void *arg)
>   strcat(buf, "Threads\tTime(ns)\n");
>  
>   for (exp = 0; exp < nruns; exp++) {
> + u64 avg;
> + u32 rem;
> +
>   if (errexit)
>   break;
> - sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, result_avg[exp] / 
> 1000, (int)(result_avg[exp] % 1000));
> +
> + avg = div_s64_rem(result_avg[exp], 1000, );

Shouldn't this be div_u64_rem? result_avg is u64.

> + sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, avg, rem);

Would %03u be the better specifier since rem is u32?

>   strcat(buf, buf1);
>   }
>  
> -- 
> 2.26.2
> 

Cheers,
Nathan


Re: [PATCH 2/2] exec: Compute file based creds only once

2020-05-29 Thread Eric W. Biederman
Kees Cook  writes:

> On Fri, May 29, 2020 at 11:47:29AM -0500, Eric W. Biederman wrote:
>> diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
>> index cd3dd0afceb5..37bb3df751c6 100644
>> --- a/include/linux/lsm_hooks.h
>> +++ b/include/linux/lsm_hooks.h
>> @@ -44,18 +44,18 @@
>>   *  request libc enable secure mode.
>> - *  The hook must set @bprm->pf_per_clear to the personality flags that
>> + *  The hook must set @bprm->per_clear to the personality flags that
>
> Here and the other per_clear comment have language that doesn't quite
> line up with how hooks should deal with the bits. They should not "set
> it to" the personality flags they want clear, they need to "add the
> bits" they want to see cleared. i.e I don't want something thinking
> they're the only one touching per_clear, so they should never do:
>   bprm->per_clear = PER_CLEAR_ON_SETID;
> but always:
>   bprm->per_clear |= PER_CLEAR_ON_SETID;
>
> How about:
>
> The hook must set @bprm->per_clear with any personality flag bits that

Sounds good:

The range-diff winds up being:
1:  c9258ef4879b ! 1:  a7868323c263 exec: Add a per bprm->file version of 
per_clear
@@ Commit message
 
 History Tree: 
git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
 Fixes: 1bb0fa189c6a ("[PATCH] NX: clean up legacy binary support")
+Reviewed-by: Kees Cook 
 Signed-off-by: "Eric W. Biederman" 
 
  ## fs/exec.c ##
@@ include/linux/lsm_hooks.h
   *transitions between security domains).
   *The hook must set @bprm->active_secureexec to 1 if AT_SECURE 
should be set to
   *request libc enable secure mode.
-+ *The hook must set @bprm->pf_per_clear to the personality flags 
that
-+ *should be cleared from current->personality.
++ *The hook must add to @bprm->pf_per_clear any personality flags 
that
++ *should be cleared from current->personality.
   *@bprm contains the linux_binprm structure.
   *Return 0 if the hook is successful and permission is granted.
   * @bprm_check_security:
2:  e6f20c69b96e ! 2:  56305aa9b6fa exec: Compute file based creds only once
@@ Commit message
 secureity attribute and derive capabilities from the fact the
 user had uid 0 has been added.
 
+Reviewed-by: Kees Cook 
 Signed-off-by: "Eric W. Biederman" 
 
  ## fs/binfmt_misc.c ##
@@ include/linux/lsm_hooks.h
 + *between security domains).
 + *The hook must set @bprm->secureexec to 1 if AT_SECURE should be 
set to
   *request libc enable secure mode.
-- *The hook must set @bprm->pf_per_clear to the personality flags 
that
-+ *The hook must set @bprm->per_clear to the personality flags that
-  *should be cleared from current->personality.
+- *The hook must add to @bprm->pf_per_clear any personality flags 
that
++ *The hook must add to @bprm->per_clear any personality flags that
+  *should be cleared from current->personality.
   *@bprm contains the linux_binprm structure.
   *Return 0 if the hook is successful and permission is granted.
 

>> diff --git a/security/commoncap.c b/security/commoncap.c
>
> Not about this patch, but while looking through this file, I see:
>
> int cap_bprm_set_creds(struct linux_binprm *bprm)
> {
>   ...
>   *capability manipulations*
>
> if (WARN_ON(!cap_ambient_invariant_ok(new)))
> return -EPERM;
>
> if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
> ret = audit_log_bprm_fcaps(bprm, new, old);
> if (ret < 0)
> return ret;
> }
>
> new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
>
> if (WARN_ON(!cap_ambient_invariant_ok(new)))
> return -EPERM;
>   ...
> }
>
> The cap_ambient_invariant_ok() test is needlessly repeated: it doesn't
> examine securebits, and nonroot_raised_pE appears to have no
> side-effects.
>
> One of those can be dropped, yes?

That is what it looks like to me.

I am hoping to take a deep dive into this function after I finish with
bprm_fill_uid (the patches that were dropped).

My brain bends on little details like is_setid not testing if the
excutable was suid or sgid, but instead is testing something close but
unrelated.

I hope that when the dust clears the function can become a
straightforward implementation of the capability equations.
We will see.

Eric



Re: [PATCH 1/2] exec: Add a per bprm->file version of per_clear

2020-05-29 Thread Eric W. Biederman
Kees Cook  writes:

> On Fri, May 29, 2020 at 11:46:40AM -0500, Eric W. Biederman wrote:
>> 
>> There is a small bug in the code that recomputes parts of bprm->cred
>> for every bprm->file.  The code never recomputes the part of
>> clear_dangerous_personality_flags it is responsible for.
>> 
>> Which means that in practice if someone creates a sgid script
>> the interpreter will not be able to use any of:
>>  READ_IMPLIES_EXEC
>>  ADDR_NO_RANDOMIZE
>>  ADDR_COMPAT_LAYOUT
>>  MMAP_PAGE_ZERO.
>> 
>> This accentially clearing of personality flags probably does
>> not matter in practice because no one has complained
>> but it does make the code more difficult to understand.
>> 
>> Further remaining bug compatible prevents the recomputation from being
>> removed and replaced by simply computing bprm->cred once from the
>> final bprm->file.
>> 
>> Making this change removes the last behavior difference between
>> computing bprm->creds from the final file and recomputing
>> bprm->cred several times.  Which allows this behavior change
>> to be justified for it's own reasons, and for any but hunts
>> looking into why the behavior changed to wind up here instead
>> of in the code that will follow that computes bprm->cred
>> from the final bprm->file.
>> 
>> This small logic bug appears to have existed since the code
>> started clearing dangerous personality bits.
>> 
>> History Tree: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
>> Fixes: 1bb0fa189c6a ("[PATCH] NX: clean up legacy binary support")
>> Signed-off-by: "Eric W. Biederman" 
>
> Yup, this looks good. Pointless nit because it's removed in the next
> patch, but pf_per_clear is following the same behavioral pattern as
> active_secureexec, it could be named active_per_clear, but since this
> already been bikeshed in v1, it's fine! :)

That plus it is very much true that active_ isn't a particularly good
prefix.  pf_ for per_file seems slightly better.

The only time I can imagine this patch seeing the light of day is if
someone happens to discover that this fixes a bug for them and just this
patch is backported.  At which point pf_per_clear pairs with
cap_elevated.  So I don't think it hurts.

*Shrug*

The next patch is my long term solution to the mess.

> Reviewed-by: Kees Cook 
>
> I wish we had more robust execve tests. :(

I think you have more skill at writing automated tests than I do.  So
feel free to write some.

Eric


[PATCH] usb: cdns3: fix possible buffer overflow caused by bad DMA value

2020-05-29 Thread Jia-Ju Bai
In cdns3_ep0_setup_phase():
  struct usb_ctrlrequest *ctrl = priv_dev->setup_buf;

Because priv_dev->setup_buf (allocated in cdns3_gadget_start) is stored 
in DMA memory, and thus ctrl is a DMA value.

cdns3_ep0_setup_phase()
  cdns3_ep0_standard_request(priv_dev, ctrl)
cdns3_req_ep0_get_status(priv_dev, ctrl)
  index = cdns3_ep_addr_to_index(ctrl->wIndex);
  priv_ep = priv_dev->eps[index];

cdns3_ep0_setup_phase()
  cdns3_ep0_standard_request(priv_dev, ctrl)
cdns3_req_ep0_handle_feature(priv_dev, ctrl_req, 0)
  cdns3_ep0_feature_handle_endpoint(priv_dev, ctrl, set)
index = cdns3_ep_addr_to_index(ctrl->wIndex);
priv_ep = priv_dev->eps[index];

In these cases, ctrl->wIndex can be be modified at anytime by malicious
hardware, and thus a buffer overflow can occur when the code
"priv_dev->eps[index]" is executed.

To fix these possible bugs, index is checked before being used.

Signed-off-by: Jia-Ju Bai 
---
 drivers/usb/cdns3/ep0.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/cdns3/ep0.c b/drivers/usb/cdns3/ep0.c
index e71240b386b4..0a80c7ade613 100644
--- a/drivers/usb/cdns3/ep0.c
+++ b/drivers/usb/cdns3/ep0.c
@@ -265,6 +265,8 @@ static int cdns3_req_ep0_get_status(struct cdns3_device 
*priv_dev,
return cdns3_ep0_delegate_req(priv_dev, ctrl);
case USB_RECIP_ENDPOINT:
index = cdns3_ep_addr_to_index(ctrl->wIndex);
+   if (index >= CDNS3_ENDPOINTS_MAX_COUNT)
+   return -EINVAL;
priv_ep = priv_dev->eps[index];
 
/* check if endpoint is stalled or stall is pending */
@@ -388,6 +390,9 @@ static int cdns3_ep0_feature_handle_endpoint(struct 
cdns3_device *priv_dev,
return 0;
 
index = cdns3_ep_addr_to_index(ctrl->wIndex);
+   if (index >= CDNS3_ENDPOINTS_MAX_COUNT)
+   return -EINVAL;
+
priv_ep = priv_dev->eps[index];
 
cdns3_select_ep(priv_dev, ctrl->wIndex);
-- 
2.17.1



Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier

2020-05-29 Thread Jann Horn
On Sat, May 30, 2020 at 4:43 AM Kees Cook  wrote:
> I mean, yes, that's certainly better, but it just seems a shame that
> everyone has to do the get_unused/put_unused dance just because of how
> SCM_RIGHTS does this weird put_user() in the middle.
>
> Can anyone clarify the expected failure mode from SCM_RIGHTS? Can we
> move the put_user() after instead?

Honestly, I think trying to remove file descriptors and such after
-EFAULT is a waste of time. If userspace runs into -EFAULT, userspace
is beyond saving and can't really do much other than exit immediately.
There are a bunch of places that will change state and then throw
-EFAULT at the end if userspace supplied an invalid address, because
trying to hold locks across userspace accesses just in case userspace
supplied a bogus address is kinda silly (and often borderline
impossible).

You can actually see that even scm_detach_fds() currently just
silently swallows errors if writing some header fields fails at the
end.


Re: [PATCH 9/9] staging: media: atomisp: add PMIC_OPREGION dependency

2020-05-29 Thread Nathan Chancellor
On Fri, May 29, 2020 at 10:00:31PM +0200, Arnd Bergmann wrote:
> Without that driver, there is a link failure in
> 
> ERROR: modpost: "intel_soc_pmic_exec_mipi_pmic_seq_element"
> [drivers/staging/media/atomisp/pci/atomisp_gmin_platform.ko] undefined!
> 
> Add an explicit Kconfig dependency.
> 
> Signed-off-by: Arnd Bergmann 

It'd be interesting to know if this is strictly required for the driver
to work properly. The call to intel_soc_pmic_exec_mipi_pmic_seq_element
has some error handling after it, maybe that should just be surrounded
by an #ifdef or IS_ENABLED for PMIC_OPREGION, like some other drivers
do.

Regardless of that:

Reviewed-by: Nathan Chancellor 

> ---
>  drivers/staging/media/atomisp/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/staging/media/atomisp/Kconfig 
> b/drivers/staging/media/atomisp/Kconfig
> index c4f3049b0706..e86311c14329 100644
> --- a/drivers/staging/media/atomisp/Kconfig
> +++ b/drivers/staging/media/atomisp/Kconfig
> @@ -11,6 +11,7 @@ menuconfig INTEL_ATOMISP
>  config VIDEO_ATOMISP
>   tristate "Intel Atom Image Signal Processor Driver"
>   depends on VIDEO_V4L2 && INTEL_ATOMISP
> + depends on PMIC_OPREGION
>   select IOSF_MBI
>   select VIDEOBUF_VMALLOC
>   ---help---
> -- 
> 2.26.2
> 


Re: [PATCH 8/9] staging: media: atomisp: disable all custom formats

2020-05-29 Thread Nathan Chancellor
On Fri, May 29, 2020 at 10:00:30PM +0200, Arnd Bergmann wrote:
> clang points out the usage of an incorrect enum type in the
> list of supported image formats:
> 
> drivers/staging/media/atomisp/pci/atomisp_subdev.c:49:65: error: implicit 
> conversion from enumeration type 'enum ia_css_frame_format' to different 
> enumeration type 'enum atomisp_input_format' [-Werror,-Wenum-conversion]
> { V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, 
> CSS_FRAME_FORMAT_NV21 },
> drivers/staging/media/atomisp/pci/atomisp_subdev.c:49:39: error: implicit 
> conversion from enumeration type 'enum ia_css_frame_format' to different 
> enumeration type 'enum atomisp_input_format' [-Werror,-Wenum-conversion]
> { V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, 
> CSS_FRAME_FORMAT_NV21 },
> { V4L2_MBUS_FMT_CUSTOM_NV12, 12, 12, CSS_FRAME_FORMAT_NV12, 0, 
> CSS_FRAME_FORMAT_NV12 },
> { MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, 
> ATOMISP_INPUT_FORMAT_BINARY_8 },
> 
> Checking the git history, I found a commit that disabled one such case
> because it did not work. It seems likely that the incorrect enum was
> part of the original problem and that the others do not work either,
> or have never been tested.
> 
> Disable all the ones that cause a warning.
> 
> Fixes: cb02ae3d71ea ("media: staging: atomisp: Disable custom format for now")
> Signed-off-by: Arnd Bergmann 

I have this patch in my local tree and debated sending it myself. I
think that this is the right fix for now, as the driver is being cleaned
up. Maybe add a FIXME like the rest of this driver?

Regardless of that last point:

Reviewed-by: Nathan Chancellor 

> ---
>  drivers/staging/media/atomisp/pci/atomisp_subdev.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c 
> b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
> index 46590129cbe3..8bce466cc128 100644
> --- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c
> +++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
> @@ -44,9 +44,11 @@ const struct atomisp_in_fmt_conv atomisp_in_fmt_conv[] = {
>   { MEDIA_BUS_FMT_SRGGB12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, 
> CSS_BAYER_ORDER_RGGB, CSS_FORMAT_RAW_12 },
>   { MEDIA_BUS_FMT_UYVY8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, 
> ATOMISP_INPUT_FORMAT_YUV422_8 },
>   { MEDIA_BUS_FMT_YUYV8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, 
> ATOMISP_INPUT_FORMAT_YUV422_8 },
> +#if 0
>   { MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, 
> ATOMISP_INPUT_FORMAT_BINARY_8 },
>   { V4L2_MBUS_FMT_CUSTOM_NV12, 12, 12, CSS_FRAME_FORMAT_NV12, 0, 
> CSS_FRAME_FORMAT_NV12 },
>   { V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, 
> CSS_FRAME_FORMAT_NV21 },
> +#endif
>   { V4L2_MBUS_FMT_CUSTOM_YUV420, 12, 12, 
> ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, 0, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY 
> },
>  #if 0
>   { V4L2_MBUS_FMT_CUSTOM_M10MO_RAW, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, 
> ATOMISP_INPUT_FORMAT_BINARY_8 },
> -- 
> 2.26.2
> 


Re: [PATCH 7/9] staging: media: atomisp: fix enum type mixups

2020-05-29 Thread Nathan Chancellor
On Fri, May 29, 2020 at 10:00:29PM +0200, Arnd Bergmann wrote:
> Some function calls pass an incorrect enum type:
> 
> drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:858:16:
>  error: implicit conversion from enumeration type 'input_system_ID_t' to 
> different enumeration type 'gp_device_ID_t' [-Werror,-Wenum-conversion]
> gp_device_rst(INPUT_SYSTEM0_ID);
> ~ ^~~~
> drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:860:19:
>  error: implicit conversion from enumeration type 'input_system_ID_t' to 
> different enumeration type 'gp_device_ID_t' [-Werror,-Wenum-conversion]
> input_switch_rst(INPUT_SYSTEM0_ID);
>  ^~~~
> drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:876:27:
>  error: implicit conversion from enumeration type 'input_system_cfg_flag_t' 
> to different enumeration type 'input_system_connection_t' 
> [-Werror,-Wenum-conversion]
> config.multicast[i]  = 
> INPUT_SYSTEM_CFG_FLAG_RESET;
>  ~ ^~~
> drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:1326:32:
>  error: implicit conversion from enumeration type 'input_system_ID_t' to 
> different enumeration type 'gp_device_ID_t' [-Werror,-Wenum-conversion]
> input_selector_cfg_for_sensor(INPUT_SYSTEM0_ID);
> ~ ^~~~
> drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:1329:19:
>  error: implicit conversion from enumeration type 'input_system_ID_t' to 
> different enumeration type 'gp_device_ID_t' [-Werror,-Wenum-conversion]
> input_switch_cfg(INPUT_SYSTEM0_ID, _switch_cfg);
>  ^~~~
> 
> INPUT_SYSTEM0_ID is zero, so use the corresponding zero-value
> of the expected types instead.
> 
> Fixes: a49d25364dfb ("staging/atomisp: Add support for the Intel IPU v2")
> Signed-off-by: Arnd Bergmann 

Huh weird that I did not see this warning but you do randconfigs so
that's expected.

Reviewed-by: Nathan Chancellor 

> ---
>  .../pci/hive_isp_css_common/host/input_system.c| 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git 
> a/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c 
> b/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c
> index 2114cf4f3fda..aa0f0fca9346 100644
> --- 
> a/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c
> +++ 
> b/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c
> @@ -855,9 +855,9 @@ input_system_error_t 
> input_system_configuration_reset(void)
>  
>   input_system_network_rst(INPUT_SYSTEM0_ID);
>  
> - gp_device_rst(INPUT_SYSTEM0_ID);
> + gp_device_rst(GP_DEVICE0_ID);
>  
> - input_switch_rst(INPUT_SYSTEM0_ID);
> + input_switch_rst(GP_DEVICE0_ID);
>  
>   //target_rst();
>  
> @@ -873,7 +873,7 @@ input_system_error_t 
> input_system_configuration_reset(void)
>  
>   for (i = 0; i < N_CSI_PORTS; i++) {
>   config.csi_buffer_flags[i]   = INPUT_SYSTEM_CFG_FLAG_RESET;
> - config.multicast[i]  = INPUT_SYSTEM_CFG_FLAG_RESET;
> + config.multicast[i]  = INPUT_SYSTEM_DISCARD_ALL;
>   }
>  
>   config.source_type_flags = 
> INPUT_SYSTEM_CFG_FLAG_RESET;
> @@ -1323,10 +1323,10 @@ static input_system_error_t 
> configuration_to_registers(void)
>   } // end of switch (source_type)
>  
>   // Set input selector.
> - input_selector_cfg_for_sensor(INPUT_SYSTEM0_ID);
> + input_selector_cfg_for_sensor(GP_DEVICE0_ID);
>  
>   // Set input switch.
> - input_switch_cfg(INPUT_SYSTEM0_ID, _switch_cfg);
> + input_switch_cfg(GP_DEVICE0_ID, _switch_cfg);
>  
>   // Set input formatters.
>   // AM: IF are set dynamically.
> -- 
> 2.26.2
> 


Re: [PATCH 5/9] staging: media: atomisp: fix stack overflow in init_pipe_defaults()

2020-05-29 Thread Nathan Chancellor
On Fri, May 29, 2020 at 10:00:27PM +0200, Arnd Bergmann wrote:
> When building with clang, multiple copies of the structures to be
> initialized are passed around on the stack and copied locally, using an
> insane amount of stack space:
> 
> drivers/staging/media/atomisp/pci/sh_css.c:2371:1: error: stack frame size of 
> 26864 bytes in function 'create_pipe' [-Werror,-Wframe-larger-than=]
> 
> Use constantly-allocated variables plus an explicit memcpy()
> to avoid that.
> 
> Fixes: 6dc9a2568f84 ("media: atomisp: convert default struct values to use 
> compound-literals with designated initializers")
> Signed-off-by: Arnd Bergmann 

Reviewed-by: Nathan Chancellor 

> ---
>  drivers/staging/media/atomisp/pci/sh_css.c | 16 +++-
>  1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/staging/media/atomisp/pci/sh_css.c 
> b/drivers/staging/media/atomisp/pci/sh_css.c
> index e91c6029c651..1e8b9d637116 100644
> --- a/drivers/staging/media/atomisp/pci/sh_css.c
> +++ b/drivers/staging/media/atomisp/pci/sh_css.c
> @@ -2264,6 +2264,12 @@ static enum ia_css_err
>  init_pipe_defaults(enum ia_css_pipe_mode mode,
>  struct ia_css_pipe *pipe,
>  bool copy_pipe) {
> + static const struct ia_css_pipe default_pipe = IA_CSS_DEFAULT_PIPE;
> + static const struct ia_css_preview_settings preview = 
> IA_CSS_DEFAULT_PREVIEW_SETTINGS;
> + static const struct ia_css_capture_settings capture = 
> IA_CSS_DEFAULT_CAPTURE_SETTINGS;
> + static const struct ia_css_video_settings video = 
> IA_CSS_DEFAULT_VIDEO_SETTINGS;
> + static const struct ia_css_yuvpp_settings yuvpp = 
> IA_CSS_DEFAULT_YUVPP_SETTINGS;
> +
>   if (!pipe)
>   {
>   IA_CSS_ERROR("NULL pipe parameter");
> @@ -2271,14 +2277,14 @@ init_pipe_defaults(enum ia_css_pipe_mode mode,
>   }
>  
>   /* Initialize pipe to pre-defined defaults */
> - *pipe = IA_CSS_DEFAULT_PIPE;
> + memcpy(pipe, _pipe, sizeof(default_pipe));
>  
>   /* TODO: JB should not be needed, but temporary backward reference */
>   switch (mode)
>   {
>   case IA_CSS_PIPE_MODE_PREVIEW:
>   pipe->mode = IA_CSS_PIPE_ID_PREVIEW;
> - pipe->pipe_settings.preview = IA_CSS_DEFAULT_PREVIEW_SETTINGS;
> + memcpy(>pipe_settings.preview, , sizeof(preview));
>   break;
>   case IA_CSS_PIPE_MODE_CAPTURE:
>   if (copy_pipe) {
> @@ -2286,11 +2292,11 @@ init_pipe_defaults(enum ia_css_pipe_mode mode,
>   } else {
>   pipe->mode = IA_CSS_PIPE_ID_CAPTURE;
>   }
> - pipe->pipe_settings.capture = IA_CSS_DEFAULT_CAPTURE_SETTINGS;
> + memcpy(>pipe_settings.capture, , sizeof(capture));
>   break;
>   case IA_CSS_PIPE_MODE_VIDEO:
>   pipe->mode = IA_CSS_PIPE_ID_VIDEO;
> - pipe->pipe_settings.video = IA_CSS_DEFAULT_VIDEO_SETTINGS;
> + memcpy(>pipe_settings.video, , sizeof(video));
>   break;
>   case IA_CSS_PIPE_MODE_ACC:
>   pipe->mode = IA_CSS_PIPE_ID_ACC;
> @@ -2300,7 +2306,7 @@ init_pipe_defaults(enum ia_css_pipe_mode mode,
>   break;
>   case IA_CSS_PIPE_MODE_YUVPP:
>   pipe->mode = IA_CSS_PIPE_ID_YUVPP;
> - pipe->pipe_settings.yuvpp = IA_CSS_DEFAULT_YUVPP_SETTINGS;
> + memcpy(>pipe_settings.yuvpp, , sizeof(yuvpp));
>   break;
>   default:
>   return IA_CSS_ERR_INVALID_ARGUMENTS;
> -- 
> 2.26.2
> 


Re: [PATCH 2/9] staging: media: atomisp: declare 'struct device' before using it

2020-05-29 Thread Nathan Chancellor
On Fri, May 29, 2020 at 10:00:24PM +0200, Arnd Bergmann wrote:
> In some configurations, including this header leads to a warning:
> 
> drivers/staging/media/atomisp//pci/sh_css_firmware.h:41:38: error: 
> declaration of 'struct device' will not be visible outside of this function 
> [-Werror,-Wvisibility]
> 
> Make sure the struct tag is known before declaring a function
> that uses it as an argument.
> 
> Fixes: 9d4fa1a16b28 ("media: atomisp: cleanup directory hierarchy")
> Signed-off-by: Arnd Bergmann 

Reviewed-by: Nathan Chancellor 

> ---
>  drivers/staging/media/atomisp/pci/sh_css_firmware.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/staging/media/atomisp/pci/sh_css_firmware.h 
> b/drivers/staging/media/atomisp/pci/sh_css_firmware.h
> index f6253392a6c9..317559c7689f 100644
> --- a/drivers/staging/media/atomisp/pci/sh_css_firmware.h
> +++ b/drivers/staging/media/atomisp/pci/sh_css_firmware.h
> @@ -37,6 +37,7 @@ extern unsigned int sh_css_num_binaries;
>  char
>  *sh_css_get_fw_version(void);
>  
> +struct device;
>  bool
>  sh_css_check_firmware_version(struct device *dev, const char *fw_data);
>  
> -- 
> 2.26.2
> 


Re: [PATCH 3/9] staging: media: atomisp: annotate an unused function

2020-05-29 Thread Nathan Chancellor
On Fri, May 29, 2020 at 10:00:25PM +0200, Arnd Bergmann wrote:
> atomisp_mrfld_power() has no more callers and produces
> a warning:
> 
> drivers/staging/media/atomisp/pci/atomisp_v4l2.c:764:12: error: unused 
> function 'atomisp_mrfld_power' [-Werror,-Wunused-function]
> 
> Mark the function as unused while the PM code is being
> debugged, expecting that it will be used again in the
> future and should not just be removed.
> 
> Fixes: 95d1f398c4dc ("media: atomisp: keep the ISP powered on when setting 
> it")
> Signed-off-by: Arnd Bergmann 

Mauro fixed this in his experimental branch:

https://git.linuxtv.org/mchehab/experimental.git/commit/?id=dbcee8118fc9283401df9dbe8ba03ab9d17433ff

> ---
>  drivers/staging/media/atomisp/pci/atomisp_v4l2.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c 
> b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
> index 694268d133c0..10abb35ba0e0 100644
> --- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
> +++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
> @@ -761,7 +761,8 @@ static void punit_ddr_dvfs_enable(bool enable)
>   pr_info("DDR DVFS, door bell is not cleared within 3ms\n");
>  }
>  
> -static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable)
> +static __attribute__((unused)) int
> +atomisp_mrfld_power(struct atomisp_device *isp, bool enable)
>  {
>   unsigned long timeout;
>   u32 val = enable ? MRFLD_ISPSSPM0_IUNIT_POWER_ON :
> -- 
> 2.26.2
> 


Re: [PATCH 1/9] staging: media: atomisp: fix incorrect NULL pointer check

2020-05-29 Thread Nathan Chancellor
On Fri, May 29, 2020 at 10:31:44PM +0200, Arnd Bergmann wrote:
> On Fri, May 29, 2020 at 10:23 PM Arnd Bergmann  wrote:
> >
> > On Fri, May 29, 2020 at 10:04 PM 'Nick Desaulniers' via Clang Built
> > Linux  wrote:
> > >
> > > See also Nathan's 7 patch series.
> > > https://lore.kernel.org/lkml/20200527071150.3381228-1-natechancel...@gmail.com/
> > >
> > > Might be some overlap between series?
> > >
> >
> > Probably. I really should have checked when I saw the number of warnings.
> >
> > At least this gives Mauro a chance to double-check the changes and see if
> > Nathan and I came to different conclusions on any of them.
> 
> I checked now and found that the overlap is smaller than I expected.
> In each case, Nathans' solution seems more complete than mine,
> so this patch ("staging: media: atomisp: fix incorrect NULL pointer check")
> and also "staging: media: atomisp: fix a type conversion warning" can be
> dropped, but I think the others are still needed.
> 
> Arnd

Thanks for double checking! I will read through the rest of the series
and review as I can.

Cheers,
Nathan


Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier

2020-05-29 Thread Kees Cook
On Sat, May 30, 2020 at 01:10:55AM +, Sargun Dhillon wrote:
> // And then SCM reads:
>   for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); ii++, cmfptr++)
>   {
>   int new_fd;
>   err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags
> ? O_CLOEXEC : 0);
>   if (err < 0)
>   break;
>   new_fd = err;
>   err = put_user(new_fd, cmfptr);
>   if (err) {
>   put_unused_fd(new_fd);
>   break;
>   }
> 
>   err = file_receive(new_fd, fp[i]);
>   if (err) {
>   put_unused_fd(new_fd);
>   break;
>   }
>   }
> 
> And our code reads:
> 
> 
> static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
> {
>   int ret, err;
> 
>   /*
>* Remove the notification, and reset the list pointers, indicating
>* that it has been handled.
>*/
>   list_del_init(>list);
> 
>   if (addfd->fd == -1) {
>   ret = get_unused_fd_flags(addfd->flags);
>   if (ret < 0)
>   goto err;
> 
>   err = file_receive(ret, addfd->file);
>   if (err) {
>   put_unused_fd(ret);
>   ret = err;
>   }
>   } else {
>   ret = file_receive_replace(addfd->fd, addfd->flags,
>  addfd->file);
>   }
> 
> err:
>   addfd->ret = ret;
>   complete(>completion);
> }
> 
> 
> And the pidfd getfd code reads:
> 
> static int pidfd_getfd(struct pid *pid, int fd)
> {
>   struct task_struct *task;
>   struct file *file;
>   int ret, err;
> 
>   task = get_pid_task(pid, PIDTYPE_PID);
>   if (!task)
>   return -ESRCH;
> 
>   file = __pidfd_fget(task, fd);
>   put_task_struct(task);
>   if (IS_ERR(file))
>   return PTR_ERR(file);
> 
>   ret = get_unused_fd_flags(O_CLOEXEC);
>   if (ret >= 0) {
>   err = file_receive(ret, file);
>   if (err) {
>   put_unused_fd(ret);
>   ret = err;
>   }
>   }
> 
>   fput(file);
>   return ret;
> }

I mean, yes, that's certainly better, but it just seems a shame that
everyone has to do the get_unused/put_unused dance just because of how
SCM_RIGHTS does this weird put_user() in the middle.

Can anyone clarify the expected failure mode from SCM_RIGHTS? Can we
move the put_user() after instead? I think cleanup would just be:
replace_fd(fd, NULL, 0)

So:

(updated to skip sock updates on failure; thank you Christian!)

int file_receive(int fd, unsigned long flags, struct file *file)
{
struct socket *sock;
int ret;

ret = security_file_receive(file);
if (ret)
return ret;

/* Install the file. */
if (fd == -1) {
ret = get_unused_fd_flags(flags);
if (ret >= 0)
fd_install(ret, get_file(file));
} else {
ret = replace_fd(fd, file, flags);
}

/* Bump the sock usage counts. */
if (ret >= 0) {
sock = sock_from_file(addfd->file, );
if (sock) {
sock_update_netprioidx(>sk->sk_cgrp_data);
sock_update_classid(>sk->sk_cgrp_data);
}
}

return ret;
}

scm_detach_fds()
...
for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); imsg_flags
  ? O_CLOEXEC : 0, fp[i]);
if (err < 0)
break;
new_fd = err;

err = put_user(err, cmfptr);
if (err) {
/*
 * If we can't notify userspace that it got the
 * fd, we need to unwind and remove it again.
 */
replace_fd(new_fd, NULL, 0);
break;
}
}
...



-- 
Kees Cook


[PATCH] net: vmxnet3: fix possible buffer overflow caused by bad DMA value in vmxnet3_get_rss()

2020-05-29 Thread Jia-Ju Bai
The value adapter->rss_conf is stored in DMA memory, and it is assigned
to rssConf, so rssConf->indTableSize can be modified at anytime by
malicious hardware. Because rssConf->indTableSize is assigned to n,
buffer overflow may occur when the code "rssConf->indTable[n]" is
executed.

To fix this possible bug, n is checked after being used.

Signed-off-by: Jia-Ju Bai 
---
 drivers/net/vmxnet3/vmxnet3_ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c 
b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 6528940ce5f3..b53bb8bcd47f 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -700,6 +700,8 @@ vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key, 
u8 *hfunc)
*hfunc = ETH_RSS_HASH_TOP;
if (!p)
return 0;
+   if (n > UPT1_RSS_MAX_IND_TABLE_SIZE)
+   return 0;
while (n--)
p[n] = rssConf->indTable[n];
return 0;
-- 
2.17.1



[PATCH] media: venus: fix possible buffer overlow casued bad DMA value in venus_sfr_print()

2020-05-29 Thread Jia-Ju Bai
The value hdev->sfr.kva is stored in DMA memory, and it is assigned to
sfr, so sfr->buf_size can be modified at anytime by malicious hardware. 
In this case, a buffer overflow may happen when the code 
"sfr->data[sfr->buf_size - 1]" is executed.

To fix this possible bug, sfr->buf_size is assigned to a local variable,
and then this variable is checked before being used.

Signed-off-by: Jia-Ju Bai 
---
 drivers/media/platform/qcom/venus/hfi_venus.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c 
b/drivers/media/platform/qcom/venus/hfi_venus.c
index 0d8855014ab3..4251a9e47a1b 100644
--- a/drivers/media/platform/qcom/venus/hfi_venus.c
+++ b/drivers/media/platform/qcom/venus/hfi_venus.c
@@ -960,18 +960,23 @@ static void venus_sfr_print(struct venus_hfi_device *hdev)
 {
struct device *dev = hdev->core->dev;
struct hfi_sfr *sfr = hdev->sfr.kva;
+   u32 buf_size;
void *p;
 
if (!sfr)
return;
 
-   p = memchr(sfr->data, '\0', sfr->buf_size);
+   buf_size = sfr->buf_size;
+   if (buf_size > 1)
+   return;
+
+   p = memchr(sfr->data, '\0', buf_size);
/*
 * SFR isn't guaranteed to be NULL terminated since SYS_ERROR indicates
 * that Venus is in the process of crashing.
 */
if (!p)
-   sfr->data[sfr->buf_size - 1] = '\0';
+   sfr->data[buf_size - 1] = '\0';
 
dev_err_ratelimited(dev, "SFR message from FW: %s\n", sfr->data);
 }
-- 
2.17.1



Re: [PATCH v2 2/2] regulator: Add support for sync_state() callbacks

2020-05-29 Thread Saravana Kannan
On Fri, May 29, 2020 at 6:00 AM Mark Brown  wrote:
>
> On Thu, May 28, 2020 at 12:06:10PM -0700, Saravana Kannan wrote:
> > When a regulator is left on by the bootloader or anything else before
> > the kernel starts (let's call this a "boot on" regulator), we need to
> > keep it on till all the consumers of the regulator have probed. This is
>
> We we don't in general have a requirement that any of the consumers of a
> regulator will ever probe.  As I thought I'd already made clear that
> case really needs to be handled.

Sorry, in the eagerness to address all your concerns, I had forgotten
to update the commit text in v2. I'll reword it to clarify this'll
happen only when there's no timeout set.

> > especially important for regulators that might be powering more than one
> > consumer. Otherwise, when the first consumer probes, enables and then
> > disables the "boot on" regulator, it'd turn off the power to rest of the
> > consumers of the "boot on" regulator.
>
> Which is a problem because...?

Those consumers that haven't probed can be powered on and active and
can crash the system if the power is pulled under their feet.

> > The sync_state() callback that's been added to drivers is meant for
> > situations like this. It gets called when all the consumers of a device
> > have probed successfully. To ease the transition to sync_state()
> > callbacks, it is never called before late_initcall_sync().
>
> This is not terribly useful for regulators where adding any of these
> delays is going to create surprises.  Frankly I can't really see why
> deferring things until late_initcall() would help anything.

Is this different from what's done already? Not having this delay is
easy -- but will have to be at a global level across
devices/resources.

> > sync_state() callbacks become even more useful when combined with
> > fw_devlink.  If fw_devlink is off, sync_state() is called at
> > late_initcall_sync() or the regulator device probing successfully --
> > whichever is later. This is because, with fw_devlink off, there would be
> > no consumers to the regulator device when it probes.
>
> This breaks the case where no driver ever instantiates for a device.
>
> Oh, actually now I get to the very end of the patch I see there is
> actually a timeout in here which wasn't mentioned in the changelog at
> all.  I very nearly didn't read the actual code as according to the
> changelog this issue hadn't been addressed.

Again, sorry I didn't update the commit text. I'll skip replying to
rest of your comments on the commit text, because all of them are due
to stale commit text.

> > This commit adds a regulator_sync_state() helper function that takes
> > care of all the "boot on" regulator clean up for any regulator driver.
> > All one needs to do is add the following line to the driver struct.
> >
> > .sync_state = regulator_sync_state,
>
> Exactly the same issues as before apply here, why are devices getting
> involved here?
>
> > +static void regulator_set_minimum_state(struct regulator_dev *rdev)
> > +{
>
> I find this name very confusing.  If anything it's doing the opposite of
> setting a minimum state, it's trying to prevent that happening.

I agree. I renamed it to try and make it better, but I can see how
it's more confusing once you called it out. Suggestions?

regulator_set_boot_limits? regulator_set_boot_constraints?

> > + /*
> > +  * Wait for parent supply to be ready before trying to keep this
> > +  * regulator on.
> > +  */
> > + if (rdev->supply_name && !rdev->supply)
> > + return;
>
> I can't make sense of this.  This stuff only limits disabling, not
> enabling, regulators, we're keeping things on here anyway and why would
> we care about the supply for disabling?

We want to wait till supply is set up before we try to put the
"enable" vote on a regulator. Otherwise, it won't be propagated
properly? I do know that regulator_resolve_supply() takes care of
propagating the use count, but we could delete that code if we just
till the supply is resolved before "enabling" the regulator. The usual
clients can't "get" the regulator anyway without the supply being
resolved. Long story short, doing it this way can allow us to delete
some functionally duplicative code.

> > +static int regulator_rel_minimum_state(struct device *dev, void *data)
> > +{
> > + struct regulator_dev *rdev = dev_to_rdev(dev);
> > +
> > + if (dev->parent != data)
> > + return 0;
>
> I've just realised that this is even more restrictive than the
> descriptions have suggested - it's not just preventing any changes until
> all potential consumers of a given regulator have instantiated, it's
> preventing changes until all potential consumers of all resources
> provided by a given device have instantiated.  Given that many systems
> have a single PMIC which may also be providing other things like GPIOs
> that would mean that any consumer that doesn't instantiate would prevent
> any device 

Re: [net-next 1/6] net: marvell: prestera: Add driver for Prestera family ASIC devices

2020-05-29 Thread Andrew Lunn
On Sat, May 30, 2020 at 03:46:22AM +0300, Vadym Kochan wrote:
> Hi David,
> 
> On Fri, May 29, 2020 at 05:18:39PM -0700, David Miller wrote:
> > 
> > Please remove all of the __packed attributes.
> > 
> > I looked at your data structures and all of them use fixed sized types
> > and are multiples of 4 so the __packed attribute is completely
> > unnecessary.
> > 
> > The alignment attribute is also unnecessary so please remove that too.
> 
> Some of the fields are u8, so I assume there might be holes added by
> the compiler ? Also these attributes guarantee some ABI compatibility
> with FW side, I will try to remove them and check but it sounds for me a bit
> risky.

Hi Vadym

You might want to play with pahole(1).

Andrew


[PATCH v5 1/2] cpufreq: change '.set_boost' to act on only one policy

2020-05-29 Thread Xiongfeng Wang
Macro 'for_each_active_policy()' is defined internally. To avoid some
cpufreq driver needing this macro to iterate over all the policies in
'.set_boost' callback, we redefine '.set_boost' to act on only one
policy and pass the policy as an argument.
'cpufreq_boost_trigger_state()' iterate over all the policies to set
boost for the system. This is preparation for adding SW BOOST support
for CPPC.

To protect Boost enable/disable by sysfs from CPU online/offline, we
add 'cpu_hotplug_lock' before calling '.set_boost' for each CPU. Also we
move the lock from 'set_boost()' to 'store_cpb()' for acpi_cpufreq.

Signed-off-by: Xiongfeng Wang 
Suggested-by: Viresh Kumar 
---
 drivers/cpufreq/acpi-cpufreq.c | 14 ++-
 drivers/cpufreq/cpufreq.c  | 57 +++---
 include/linux/cpufreq.h|  2 +-
 3 files changed, 40 insertions(+), 33 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 289e8ce..429e5a3 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -126,12 +126,12 @@ static void boost_set_msr_each(void *p_en)
boost_set_msr(enable);
 }
 
-static int set_boost(int val)
+static int set_boost(struct cpufreq_policy *policy, int val)
 {
-   get_online_cpus();
-   on_each_cpu(boost_set_msr_each, (void *)(long)val, 1);
-   put_online_cpus();
-   pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");
+   on_each_cpu_mask(policy->cpus, boost_set_msr_each,
+(void *)(long)val, 1);
+   pr_debug("CPU %*pbl: Core Boosting %sabled.\n",
+cpumask_pr_args(policy->cpus), val ? "en" : "dis");
 
return 0;
 }
@@ -162,7 +162,9 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, 
const char *buf,
if (ret || val > 1)
return -EINVAL;
 
-   set_boost(val);
+   get_online_cpus();
+   set_boost(policy, val);
+   put_online_cpus();
 
return count;
 }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index d03f250..0128de3 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2532,34 +2532,29 @@ void cpufreq_update_limits(unsigned int cpu)
 /*
  *   BOOST  *
  */
-static int cpufreq_boost_set_sw(int state)
+static int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state)
 {
-   struct cpufreq_policy *policy;
-
-   for_each_active_policy(policy) {
-   int ret;
+   int ret;
 
-   if (!policy->freq_table)
-   return -ENXIO;
+   if (!policy->freq_table)
+   return -ENXIO;
 
-   ret = cpufreq_frequency_table_cpuinfo(policy,
- policy->freq_table);
-   if (ret) {
-   pr_err("%s: Policy frequency update failed\n",
-  __func__);
-   return ret;
-   }
-
-   ret = freq_qos_update_request(policy->max_freq_req, 
policy->max);
-   if (ret < 0)
-   return ret;
+   ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table);
+   if (ret) {
+   pr_err("%s: Policy frequency update failed\n", __func__);
+   return ret;
}
 
+   ret = freq_qos_update_request(policy->max_freq_req, policy->max);
+   if (ret < 0)
+   return ret;
+
return 0;
 }
 
 int cpufreq_boost_trigger_state(int state)
 {
+   struct cpufreq_policy *policy;
unsigned long flags;
int ret = 0;
 
@@ -2570,15 +2565,25 @@ int cpufreq_boost_trigger_state(int state)
cpufreq_driver->boost_enabled = state;
write_unlock_irqrestore(_driver_lock, flags);
 
-   ret = cpufreq_driver->set_boost(state);
-   if (ret) {
-   write_lock_irqsave(_driver_lock, flags);
-   cpufreq_driver->boost_enabled = !state;
-   write_unlock_irqrestore(_driver_lock, flags);
-
-   pr_err("%s: Cannot %s BOOST\n",
-  __func__, state ? "enable" : "disable");
+   get_online_cpus();
+   for_each_active_policy(policy) {
+   ret = cpufreq_driver->set_boost(policy, state);
+   if (ret)
+   goto err_reset_state;
}
+   put_online_cpus();
+
+   return 0;
+
+err_reset_state:
+   put_online_cpus();
+
+   write_lock_irqsave(_driver_lock, flags);
+   cpufreq_driver->boost_enabled = !state;
+   write_unlock_irqrestore(_driver_lock, flags);
+
+   pr_err("%s: Cannot %s BOOST\n",
+  __func__, state ? "enable" : "disable");
 
return ret;
 }
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 

[PATCH v5 0/2] add SW BOOST support for CPPC

2020-05-29 Thread Xiongfeng Wang
ACPI spec 6.2 section 8.4.7.1 provide the following two CPC registers.

"Highest performance is the absolute maximum performance an individual
processor may reach, assuming ideal conditions. This performance level
may not be sustainable for long durations, and may only be achievable if
other platform components are in a specific state; for example, it may
require other processors be in an idle state.

Nominal Performance is the maximum sustained performance level of the
processor, assuming ideal operating conditions. In absence of an
external constraint (power, thermal, etc.) this is the performance level
the platform is expected to be able to maintain continuously. All
processors are expected to be able to sustain their nominal performance
state simultaneously."

We can use Highest Performance as the max performance in boost mode and
Nomial Performance as the max performance in non-boost mode. If the
Highest Performance is greater than the Nominal Performance, we assume
SW BOOST is supported.

Changelog:

v4 -> v5:
add 'cpu_hotplug_lock' before calling '.set_boost'
v3 -> v4:
run 'boost_set_msr_each' for each CPU in the policy rather than
each CPU in the system for 'acpi-cpufreq'
add 'Suggested-by'

Xiongfeng Wang (2):
  cpufreq: change '.set_boost' to act on only one policy
  CPPC: add support for SW BOOST

 drivers/cpufreq/acpi-cpufreq.c | 14 ++-
 drivers/cpufreq/cppc_cpufreq.c | 39 +++--
 drivers/cpufreq/cpufreq.c  | 57 +++---
 include/linux/cpufreq.h|  2 +-
 4 files changed, 77 insertions(+), 35 deletions(-)

-- 
1.7.12.4



[PATCH v5 2/2] CPPC: add support for SW BOOST

2020-05-29 Thread Xiongfeng Wang
To add SW BOOST support for CPPC, we need to get the max frequency of
boost mode and non-boost mode. ACPI spec 6.2 section 8.4.7.1 describe
the following two CPC registers.

"Highest performance is the absolute maximum performance an individual
processor may reach, assuming ideal conditions. This performance level
may not be sustainable for long durations, and may only be achievable if
other platform components are in a specific state; for example, it may
require other processors be in an idle state.

Nominal Performance is the maximum sustained performance level of the
processor, assuming ideal operating conditions. In absence of an
external constraint (power, thermal, etc.) this is the performance level
the platform is expected to be able to maintain continuously. All
processors are expected to be able to sustain their nominal performance
state simultaneously."

To add SW BOOST support for CPPC, we can use Highest Performance as the
max performance in boost mode and Nominal Performance as the max
performance in non-boost mode. If the Highest Performance is greater
than the Nominal Performance, we assume SW BOOST is supported.

The current CPPC driver does not support SW BOOST and use 'Highest
Performance' as the max performance the CPU can achieve. 'Nominal
Performance' is used to convert 'performance' to 'frequency'. That
means, if firmware enable boost and provide a value for Highest
Performance which is greater than Nominal Performance, boost feature is
enabled by default.

Because SW BOOST is disabled by default, so, after this patch, boost
feature is disabled by default even if boost is enabled by firmware.

Signed-off-by: Xiongfeng Wang 
Suggested-by: Viresh Kumar 
---
 drivers/cpufreq/cppc_cpufreq.c | 39 +--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index bda0b24..257d726 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -37,6 +37,7 @@
  * requested etc.
  */
 static struct cppc_cpudata **all_cpu_data;
+static bool boost_supported;
 
 struct cppc_workaround_oem_info {
char oem_id[ACPI_OEM_ID_SIZE + 1];
@@ -310,7 +311,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
 * Section 8.4.7.1.1.5 of ACPI 6.1 spec)
 */
policy->min = cppc_cpufreq_perf_to_khz(cpu, 
cpu->perf_caps.lowest_nonlinear_perf);
-   policy->max = cppc_cpufreq_perf_to_khz(cpu, 
cpu->perf_caps.highest_perf);
+   policy->max = cppc_cpufreq_perf_to_khz(cpu, 
cpu->perf_caps.nominal_perf);
 
/*
 * Set cpuinfo.min_freq to Lowest to make the full range of performance
@@ -318,7 +319,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
 * nonlinear perf
 */
policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu, 
cpu->perf_caps.lowest_perf);
-   policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, 
cpu->perf_caps.highest_perf);
+   policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, 
cpu->perf_caps.nominal_perf);
 
policy->transition_delay_us = 
cppc_cpufreq_get_transition_delay_us(cpu_num);
policy->shared_type = cpu->shared_type;
@@ -343,6 +344,13 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
 
cpu->cur_policy = policy;
 
+   /*
+* If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
+* is supported.
+*/
+   if (cpu->perf_caps.highest_perf > cpu->perf_caps.nominal_perf)
+   boost_supported = true;
+
/* Set policy->cur to max now. The governors will adjust later. */
policy->cur = cppc_cpufreq_perf_to_khz(cpu,
cpu->perf_caps.highest_perf);
@@ -410,6 +418,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int 
cpunum)
return cppc_get_rate_from_fbctrs(cpu, fb_ctrs_t0, fb_ctrs_t1);
 }
 
+static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
+{
+   struct cppc_cpudata *cpudata;
+   int ret;
+
+   if (!boost_supported) {
+   pr_err("BOOST not supported by CPU or firmware\n");
+   return -EINVAL;
+   }
+
+   cpudata = all_cpu_data[policy->cpu];
+   if (state)
+   policy->max = cppc_cpufreq_perf_to_khz(cpudata,
+   cpudata->perf_caps.highest_perf);
+   else
+   policy->max = cppc_cpufreq_perf_to_khz(cpudata,
+   cpudata->perf_caps.nominal_perf);
+   policy->cpuinfo.max_freq = policy->max;
+
+   ret = freq_qos_update_request(policy->max_freq_req, policy->max);
+   if (ret < 0)
+   return ret;
+
+   return 0;
+}
+
 static struct cpufreq_driver cppc_cpufreq_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = cppc_verify_policy,
@@ -417,6 +451,7 @@ static unsigned int 

Re: [PATCH v8 17/18] KVM: x86: Add kexec support for SEV Live Migration.

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:21 PM Ashish Kalra  wrote:
>
> From: Ashish Kalra 
>
> Reset the host's page encryption bitmap related to kernel
> specific page encryption status settings before we load a
> new kernel by kexec. We cannot reset the complete
> page encryption bitmap here as we need to retain the
> UEFI/OVMF firmware specific settings.
>
> The host's page encryption bitmap is maintained for the
> guest to keep the encrypted/decrypted state of the guest pages,
> therefore we need to explicitly mark all shared pages as
> encrypted again before rebooting into the new guest kernel.
>
> Signed-off-by: Ashish Kalra 
> ---
>  arch/x86/kernel/kvm.c | 28 
>  1 file changed, 28 insertions(+)
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 4b29815de873..a8bc30d5b15b 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  static int kvmapf = 1;
>
> @@ -358,6 +359,33 @@ static void kvm_pv_guest_cpu_reboot(void *unused)
>  */
> if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
> wrmsrl(MSR_KVM_PV_EOI_EN, 0);
> +   /*
> +* Reset the host's page encryption bitmap related to kernel
> +* specific page encryption status settings before we load a
> +* new kernel by kexec. NOTE: We cannot reset the complete
> +* page encryption bitmap here as we need to retain the
> +* UEFI/OVMF firmware specific settings.
> +*/
> +   if (sev_live_migration_enabled() & (smp_processor_id() == 0)) {
> +   int i;
> +   unsigned long nr_pages;
> +
> +   for (i = 0; i < e820_table->nr_entries; i++) {
> +   struct e820_entry *entry = _table->entries[i];
> +   unsigned long start_pfn;
> +   unsigned long end_pfn;
> +
> +   if (entry->type != E820_TYPE_RAM)
> +   continue;
What should the behavior be for other memory types that are not
expected to be mucked with by firmware? Should we avoid resetting the
enc status of pmem/pram pages?

My intuition here is that we should only preserve the enc status of
those bits that are set by the firmware.

> +
> +   start_pfn = entry->addr >> PAGE_SHIFT;
> +   end_pfn = (entry->addr + entry->size) >> PAGE_SHIFT;
> +   nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
> +
> +   kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS,
> +  entry->addr, nr_pages, 1);
> +   }
> +   }
> kvm_pv_disable_apf();
> kvm_disable_steal_time();
>  }
> --
> 2.17.1
>


Re: [PATCH v8 18/18] KVM: SVM: Enable SEV live migration feature implicitly on Incoming VM(s).

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:22 PM Ashish Kalra  wrote:
>
> From: Ashish Kalra 
>
> For source VM, live migration feature is enabled explicitly
> when the guest is booting, for the incoming VM(s) it is implied.
> This is required for handling A->B->C->... VM migrations case.
>
> Signed-off-by: Ashish Kalra 
> ---
>  arch/x86/kvm/svm/sev.c | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 6f69c3a47583..ba7c0ebfa1f3 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1592,6 +1592,13 @@ int svm_set_page_enc_bitmap(struct kvm *kvm,
> if (ret)
> goto unlock;
>
> +   /*
> +* For source VM, live migration feature is enabled
> +* explicitly when the guest is booting, for the
> +* incoming VM(s) it is implied.
> +*/
> +   sev_update_migration_flags(kvm, KVM_SEV_LIVE_MIGRATION_ENABLED);
> +
> bitmap_copy(sev->page_enc_bmap + BIT_WORD(gfn_start), bitmap,
> (gfn_end - gfn_start));
>
> --
> 2.17.1
>

Reviewed-by: Steve Rutherford 


Re: [PATCH v8 16/18] KVM: x86: Mark _bss_decrypted section variables as decrypted in page encryption bitmap.

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:20 PM Ashish Kalra  wrote:
>
> From: Ashish Kalra 
>
> Ensure that _bss_decrypted section variables such as hv_clock_boot and
> wall_clock are marked as decrypted in the page encryption bitmap if
> sev liv migration is supported.
>
> Signed-off-by: Ashish Kalra 
> ---
>  arch/x86/kernel/kvmclock.c | 12 
>  1 file changed, 12 insertions(+)
>
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index 34b18f6eeb2c..65777bf1218d 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -334,6 +334,18 @@ void __init kvmclock_init(void)
> pr_info("kvm-clock: Using msrs %x and %x",
> msr_kvm_system_time, msr_kvm_wall_clock);
>
> +   if (sev_live_migration_enabled()) {
> +   unsigned long nr_pages;
> +   /*
> +* sizeof(hv_clock_boot) is already PAGE_SIZE aligned
> +*/
> +   early_set_mem_enc_dec_hypercall((unsigned long)hv_clock_boot,
> +   1, 0);
> +   nr_pages = DIV_ROUND_UP(sizeof(wall_clock), PAGE_SIZE);
> +   early_set_mem_enc_dec_hypercall((unsigned long)_clock,
> +   nr_pages, 0);
> +   }
> +
> this_cpu_write(hv_clock_per_cpu, _clock_boot[0]);
> kvm_register_clock("primary cpu clock");
> pvclock_set_pvti_cpu0_va(hv_clock_boot);
> --
> 2.17.1
>

Reviewed-by: Steve Rutherford 


Re: [PATCH v8 14/18] EFI: Introduce the new AMD Memory Encryption GUID.

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:20 PM Ashish Kalra  wrote:
>
> From: Ashish Kalra 
>
> Introduce a new AMD Memory Encryption GUID which is currently
> used for defining a new UEFI enviroment variable which indicates
> UEFI/OVMF support for the SEV live migration feature. This variable
> is setup when UEFI/OVMF detects host/hypervisor support for SEV
> live migration and later this variable is read by the kernel using
> EFI runtime services to verify if OVMF supports the live migration
> feature.
>
> Signed-off-by: Ashish Kalra 
> ---
>  include/linux/efi.h | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/include/linux/efi.h b/include/linux/efi.h
> index 251f1f783cdf..2efb42ccf3a8 100644
> --- a/include/linux/efi.h
> +++ b/include/linux/efi.h
> @@ -358,6 +358,7 @@ void efi_native_runtime_setup(void);
>
>  /* OEM GUIDs */
>  #define DELLEMC_EFI_RCI2_TABLE_GUIDEFI_GUID(0x2d9f28a2, 0xa886, 
> 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
> +#define MEM_ENCRYPT_GUID   EFI_GUID(0x0cf29b71, 0x9e51, 
> 0x433a,  0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
>
>  typedef struct {
> efi_guid_t guid;
> --
> 2.17.1
>
Have you gotten this GUID upstreamed into edk2?

Reviewed-by: Steve Rutherford 


Re: [PATCH v8 15/18] KVM: x86: Add guest support for detecting and enabling SEV Live Migration feature.

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:20 PM Ashish Kalra  wrote:
>
> From: Ashish Kalra 
>
> The guest support for detecting and enabling SEV Live migration
> feature uses the following logic :
>
>  - kvm_init_plaform() checks if its booted under the EFI
>
>- If not EFI,
>
>  i) check for the KVM_FEATURE_CPUID
>
>  ii) if CPUID reports that migration is support then issue wrmsrl
>  to enable the SEV migration support
>
>- If EFI,
>
>  i) Check the KVM_FEATURE_CPUID.
>
>  ii) If CPUID reports that migration is supported, then reads the UEFI 
> enviroment variable which
>  indicates OVMF support for live migration.
>
>  iii) If variable is set then wrmsr to enable the SEV migration support.
>
> The EFI live migration check is done using a late_initcall() callback.
>
> Signed-off-by: Ashish Kalra 
> ---
>  arch/x86/include/asm/mem_encrypt.h | 11 ++
>  arch/x86/kernel/kvm.c  | 62 ++
>  arch/x86/mm/mem_encrypt.c  | 11 ++
>  3 files changed, 84 insertions(+)
>
> diff --git a/arch/x86/include/asm/mem_encrypt.h 
> b/arch/x86/include/asm/mem_encrypt.h
> index 848ce43b9040..d10e92ae5ca1 100644
> --- a/arch/x86/include/asm/mem_encrypt.h
> +++ b/arch/x86/include/asm/mem_encrypt.h
> @@ -20,6 +20,7 @@
>
>  extern u64 sme_me_mask;
>  extern bool sev_enabled;
> +extern bool sev_live_mig_enabled;
>
>  void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
>  unsigned long decrypted_kernel_vaddr,
> @@ -42,6 +43,8 @@ void __init sme_enable(struct boot_params *bp);
>
>  int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long 
> size);
>  int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long 
> size);
> +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
> +   bool enc);
>
>  /* Architecture __weak replacement functions */
>  void __init mem_encrypt_init(void);
> @@ -55,6 +58,7 @@ bool sev_active(void);
>  #else  /* !CONFIG_AMD_MEM_ENCRYPT */
>
>  #define sme_me_mask0ULL
> +#define sev_live_mig_enabled   false
>
>  static inline void __init sme_early_encrypt(resource_size_t paddr,
> unsigned long size) { }
> @@ -76,6 +80,8 @@ static inline int __init
>  early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 
> 0; }
>  static inline int __init
>  early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 
> 0; }
> +static inline void __init
> +early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
>
>  #define __bss_decrypted
>
> @@ -102,6 +108,11 @@ static inline u64 sme_get_me_mask(void)
> return sme_me_mask;
>  }
>
> +static inline bool sev_live_migration_enabled(void)
> +{
> +   return sev_live_mig_enabled;
> +}
> +
>  #endif /* __ASSEMBLY__ */
>
>  #endif /* __X86_MEM_ENCRYPT_H__ */
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 6efe0410fb72..4b29815de873 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -403,6 +404,53 @@ static inline void __set_percpu_decrypted(void *ptr, 
> unsigned long size)
> early_set_memory_decrypted((unsigned long) ptr, size);
>  }
>
> +#ifdef CONFIG_EFI
> +static bool setup_kvm_sev_migration(void)
> +{
> +   efi_char16_t efi_Sev_Live_Mig_support_name[] = 
> L"SevLiveMigrationEnabled";
> +   efi_guid_t efi_variable_guid = MEM_ENCRYPT_GUID;
> +   efi_status_t status;
> +   unsigned long size;
> +   bool enabled;
> +
> +   if (!sev_live_migration_enabled())
> +   return false;
> +
> +   size = sizeof(enabled);
> +
> +   if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
> +   pr_info("setup_kvm_sev_migration: no efi\n");
> +   return false;
> +   }
> +
> +   /* Get variable contents into buffer */
> +   status = efi.get_variable(efi_Sev_Live_Mig_support_name,
> + _variable_guid, NULL, , );
> +
> +   if (status == EFI_NOT_FOUND) {
> +   pr_info("setup_kvm_sev_migration: variable not found\n");
> +   return false;
> +   }
> +
> +   if (status != EFI_SUCCESS) {
> +   pr_info("setup_kvm_sev_migration: get_variable fail\n");
> +   return false;
> +   }
> +
> +   if (enabled == 0) {
> +   pr_info("setup_kvm_sev_migration: live migration disabled in 
> OVMF\n");
> +   return false;
> +   }
> +
> +   pr_info("setup_kvm_sev_migration: live migration enabled in OVMF\n");
> +   wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED);
> +
> +   return true;
> +}
> +
> +late_initcall(setup_kvm_sev_migration);
> +#endif
> +
>  /*
>   * Iterate through all possible CPUs and map the memory region 

Re: [PATCH v8 12/18] KVM: SVM: Add support for static allocation of unified Page Encryption Bitmap.

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:18 PM Ashish Kalra  wrote:
>
> From: Ashish Kalra 
>
> Add support for static allocation of the unified Page encryption bitmap by
> extending kvm_arch_commit_memory_region() callack to add svm specific x86_ops
> which can read the userspace provided memory region/memslots and calculate
> the amount of guest RAM managed by the KVM and grow the bitmap based
> on that information, i.e. the highest guest PA that is mapped by a memslot.
>
> Signed-off-by: Ashish Kalra 
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/svm/sev.c  | 35 +
>  arch/x86/kvm/svm/svm.c  |  1 +
>  arch/x86/kvm/svm/svm.h  |  1 +
>  arch/x86/kvm/x86.c  |  5 +
>  5 files changed, 43 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fc74144d5ab0..b573ea85b57e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1254,6 +1254,7 @@ struct kvm_x86_ops {
>
> bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
> int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> +   void (*commit_memory_region)(struct kvm *kvm, enum kvm_mr_change 
> change);
> int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
>   unsigned long sz, unsigned long mode);
> int (*get_page_enc_bitmap)(struct kvm *kvm,
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 30efc1068707..c0d7043a0627 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1377,6 +1377,41 @@ static int sev_resize_page_enc_bitmap(struct kvm *kvm, 
> unsigned long new_size)
> return 0;
>  }
>
> +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change)
> +{
> +   struct kvm_memslots *slots;
> +   struct kvm_memory_slot *memslot;
> +   gfn_t start, end = 0;
> +
> +   spin_lock(>mmu_lock);
> +   if (change == KVM_MR_CREATE) {
> +   slots = kvm_memslots(kvm);
> +   kvm_for_each_memslot(memslot, slots) {
> +   start = memslot->base_gfn;
> +   end = memslot->base_gfn + memslot->npages;
> +   /*
> +* KVM memslots is a sorted list, starting with
> +* the highest mapped guest PA, so pick the topmost
> +* valid guest PA.
> +*/
> +   if (memslot->npages)
> +   break;
> +   }
> +   }
> +   spin_unlock(>mmu_lock);
> +
> +   if (end) {
> +   /*
> +* NORE: This callback is invoked in vm ioctl
> +* set_user_memory_region, hence we can use a
> +* mutex here.
> +*/
> +   mutex_lock(>lock);
> +   sev_resize_page_enc_bitmap(kvm, end);
> +   mutex_unlock(>lock);
> +   }
> +}
> +
>  int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>   unsigned long npages, unsigned long enc)
>  {
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 501e82f5593c..442adbbb0641 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4015,6 +4015,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>
> .check_nested_events = svm_check_nested_events,
>
> +   .commit_memory_region = svm_commit_memory_region,
> .page_enc_status_hc = svm_page_enc_status_hc,
> .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 2ebdcce50312..fd99e0a5417a 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -406,6 +406,7 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned long 
> gpa,
>   unsigned long npages, unsigned long enc);
>  int svm_get_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap 
> *bmap);
>  int svm_set_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap 
> *bmap);
> +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change);
>
>  /* avic.c */
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c4166d7a0493..8938de868d42 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10133,6 +10133,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
> kvm_mmu_change_mmu_pages(kvm,
> kvm_mmu_calculate_default_mmu_pages(kvm));
>
> +   if (change == KVM_MR_CREATE || change == KVM_MR_DELETE) {
> +   if (kvm_x86_ops.commit_memory_region)
> +   kvm_x86_ops.commit_memory_region(kvm, change);
Why not just call this every time (if it exists) and have the
kvm_x86_op determine if it should do anything?

It 

Re: [PATCH v8 13/18] KVM: x86: Introduce new KVM_FEATURE_SEV_LIVE_MIGRATION feature & Custom MSR.

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:19 PM Ashish Kalra  wrote:
>
> From: Ashish Kalra 
>
> Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check
> for host-side support for SEV live migration. Also add a new custom
> MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration
> feature.
>
> Signed-off-by: Ashish Kalra 
> ---
>  Documentation/virt/kvm/cpuid.rst |  5 +
>  Documentation/virt/kvm/msr.rst   | 10 ++
>  arch/x86/include/uapi/asm/kvm_para.h |  5 +
>  arch/x86/kvm/svm/sev.c   | 14 ++
>  arch/x86/kvm/svm/svm.c   | 16 
>  arch/x86/kvm/svm/svm.h   |  2 ++
>  6 files changed, 52 insertions(+)
>
> diff --git a/Documentation/virt/kvm/cpuid.rst 
> b/Documentation/virt/kvm/cpuid.rst
> index 01b081f6e7ea..0514523e00cd 100644
> --- a/Documentation/virt/kvm/cpuid.rst
> +++ b/Documentation/virt/kvm/cpuid.rst
> @@ -86,6 +86,11 @@ KVM_FEATURE_PV_SCHED_YIELD13  guest checks 
> this feature bit
>before using paravirtualized
>sched yield.
>
> +KVM_FEATURE_SEV_LIVE_MIGRATION14  guest checks this feature bit 
> before
> +  using the page encryption state
> +  hypercall to notify the page 
> state
> +  change
> +
>  KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24  host will warn if no guest-side
>per-cpu warps are expeced in
>kvmclock
> diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst
> index 33892036672d..7cd7786bbb03 100644
> --- a/Documentation/virt/kvm/msr.rst
> +++ b/Documentation/virt/kvm/msr.rst
> @@ -319,3 +319,13 @@ data:
>
> KVM guests can request the host not to poll on HLT, for example if
> they are performing polling themselves.
> +
> +MSR_KVM_SEV_LIVE_MIG_EN:
> +0x4b564d06
> +
> +   Control SEV Live Migration features.
> +
> +data:
> +Bit 0 enables (1) or disables (0) host-side SEV Live Migration 
> feature.
> +Bit 1 enables (1) or disables (0) support for SEV Live Migration 
> extensions.
> +All other bits are reserved.
> diff --git a/arch/x86/include/uapi/asm/kvm_para.h 
> b/arch/x86/include/uapi/asm/kvm_para.h
> index 2a8e0b6b9805..d9d4953b42ad 100644
> --- a/arch/x86/include/uapi/asm/kvm_para.h
> +++ b/arch/x86/include/uapi/asm/kvm_para.h
> @@ -31,6 +31,7 @@
>  #define KVM_FEATURE_PV_SEND_IPI11
>  #define KVM_FEATURE_POLL_CONTROL   12
>  #define KVM_FEATURE_PV_SCHED_YIELD 13
> +#define KVM_FEATURE_SEV_LIVE_MIGRATION 14
>
>  #define KVM_HINTS_REALTIME  0
>
> @@ -50,6 +51,7 @@
>  #define MSR_KVM_STEAL_TIME  0x4b564d03
>  #define MSR_KVM_PV_EOI_EN  0x4b564d04
>  #define MSR_KVM_POLL_CONTROL   0x4b564d05
> +#define MSR_KVM_SEV_LIVE_MIG_EN0x4b564d06
>
>  struct kvm_steal_time {
> __u64 steal;
> @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data {
>  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
>  #define KVM_PV_EOI_DISABLED 0x0
>
> +#define KVM_SEV_LIVE_MIGRATION_ENABLED (1 << 0)
> +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED(1 << 1)
> +
>  #endif /* _UAPI_ASM_X86_KVM_PARA_H */
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index c0d7043a0627..6f69c3a47583 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1469,6 +1469,17 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned 
> long gpa,
> return 0;
>  }
>
> +void sev_update_migration_flags(struct kvm *kvm, u64 data)
> +{
> +   struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info;
> +
> +   if (!sev_guest(kvm))
> +   return;
> +
> +   if (data & KVM_SEV_LIVE_MIGRATION_ENABLED)
> +   sev->live_migration_enabled = true;
> +}
> +
>  int svm_get_page_enc_bitmap(struct kvm *kvm,
>struct kvm_page_enc_bitmap *bmap)
>  {
> @@ -1481,6 +1492,9 @@ int svm_get_page_enc_bitmap(struct kvm *kvm,
> if (!sev_guest(kvm))
> return -ENOTTY;
>
> +   if (!sev->live_migration_enabled)
> +   return -EINVAL;
> +
> gfn_start = bmap->start_gfn;
> gfn_end = gfn_start + bmap->num_pages;
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 442adbbb0641..a99f5457f244 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -2633,6 +2633,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
> msr_data *msr)
> svm->msr_decfg = data;
> break;
> }
> +   case MSR_KVM_SEV_LIVE_MIG_EN:
> +   sev_update_migration_flags(vcpu->kvm, data);
> +   break;
> case MSR_IA32_APICBASE:
> if (kvm_vcpu_apicv_active(vcpu))

Re: [PATCH v8 11/18] KVM: x86: Introduce KVM_SET_PAGE_ENC_BITMAP ioctl

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:18 PM Ashish Kalra  wrote:
>
> From: Brijesh Singh 
>
> The ioctl can be used to set page encryption bitmap for an
> incoming guest.
>
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: "H. Peter Anvin" 
> Cc: Paolo Bonzini 
> Cc: "Radim Krčmář" 
> Cc: Joerg Roedel 
> Cc: Borislav Petkov 
> Cc: Tom Lendacky 
> Cc: x...@kernel.org
> Cc: k...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Reviewed-by: Venu Busireddy 
> Signed-off-by: Brijesh Singh 
> Signed-off-by: Ashish Kalra 
> ---
>  Documentation/virt/kvm/api.rst  | 44 +
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/svm/sev.c  | 50 +
>  arch/x86/kvm/svm/svm.c  |  1 +
>  arch/x86/kvm/svm/svm.h  |  1 +
>  arch/x86/kvm/x86.c  | 12 
>  include/uapi/linux/kvm.h|  1 +
>  7 files changed, 111 insertions(+)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index ecad84086892..fa70017ee693 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4663,6 +4663,28 @@ or shared. The bitmap can be used during the guest 
> migration. If the page
>  is private then the userspace need to use SEV migration commands to transmit
>  the page.
>
> +4.126 KVM_SET_PAGE_ENC_BITMAP (vm ioctl)
> +---
> +
> +:Capability: basic
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: struct kvm_page_enc_bitmap (in/out)
> +:Returns: 0 on success, -1 on error
> +
> +/* for KVM_SET_PAGE_ENC_BITMAP */
> +struct kvm_page_enc_bitmap {
> +   __u64 start_gfn;
> +   __u64 num_pages;
> +   union {
> +   void __user *enc_bitmap; /* one bit per page */
> +   __u64 padding2;
> +   };
> +};
> +
> +During the guest live migration the outgoing guest exports its page 
> encryption
> +bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> +bitmap for an incoming guest.
>
>  4.125 KVM_S390_PV_COMMAND
>  -
> @@ -4717,6 +4739,28 @@ KVM_PV_VM_VERIFY
>Verify the integrity of the unpacked image. Only if this succeeds,
>KVM is allowed to start protected VCPUs.
>
> +4.126 KVM_SET_PAGE_ENC_BITMAP (vm ioctl)
> +---
> +
> +:Capability: basic
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: struct kvm_page_enc_bitmap (in/out)
> +:Returns: 0 on success, -1 on error
> +
> +/* for KVM_SET_PAGE_ENC_BITMAP */
> +struct kvm_page_enc_bitmap {
> +   __u64 start_gfn;
> +   __u64 num_pages;
> +   union {
> +   void __user *enc_bitmap; /* one bit per page */
> +   __u64 padding2;
> +   };
> +};
> +
> +During the guest live migration the outgoing guest exports its page 
> encryption
> +bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> +bitmap for an incoming guest.
>
>  5. The kvm_run structure
>  
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9e428befb6a4..fc74144d5ab0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1258,6 +1258,8 @@ struct kvm_x86_ops {
>   unsigned long sz, unsigned long mode);
> int (*get_page_enc_bitmap)(struct kvm *kvm,
> struct kvm_page_enc_bitmap *bmap);
> +   int (*set_page_enc_bitmap)(struct kvm *kvm,
> +   struct kvm_page_enc_bitmap *bmap);
>  };
>
>  struct kvm_x86_init_ops {
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 387045902470..30efc1068707 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1504,6 +1504,56 @@ int svm_get_page_enc_bitmap(struct kvm *kvm,
> return ret;
>  }
>
> +int svm_set_page_enc_bitmap(struct kvm *kvm,
> +  struct kvm_page_enc_bitmap *bmap)
> +{
> +   struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info;
> +   unsigned long gfn_start, gfn_end;
> +   unsigned long *bitmap;
> +   unsigned long sz;
> +   int ret;
> +
> +   if (!sev_guest(kvm))
> +   return -ENOTTY;
> +   /* special case of resetting the complete bitmap */
> +   if (!bmap->enc_bitmap) {
> +   mutex_lock(>lock);
> +   /* by default all pages are marked encrypted */
> +   if (sev->page_enc_bmap_size)
> +   bitmap_fill(sev->page_enc_bmap,
> +   sev->page_enc_bmap_size);
> +   mutex_unlock(>lock);
> +   return 0;
> +   }
>
> +
> +   gfn_start = bmap->start_gfn;
> +   gfn_end = gfn_start + bmap->num_pages;
> +
> +   sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / 8;
> +   bitmap = kmalloc(sz, GFP_KERNEL);
> +   if (!bitmap)
> +   return -ENOMEM;
> +
> +   ret = 

Re: [PATCH v8 10/18] mm: x86: Invoke hypercall when page encryption status is changed

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:18 PM Ashish Kalra  wrote:
>
> From: Brijesh Singh 
>
> Invoke a hypercall when a memory region is changed from encrypted ->
> decrypted and vice versa. Hypervisor needs to know the page encryption
> status during the guest migration.
>
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: "H. Peter Anvin" 
> Cc: Paolo Bonzini 
> Cc: "Radim Krčmář" 
> Cc: Joerg Roedel 
> Cc: Borislav Petkov 
> Cc: Tom Lendacky 
> Cc: x...@kernel.org
> Cc: k...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Reviewed-by: Venu Busireddy 
> Signed-off-by: Brijesh Singh 
> Signed-off-by: Ashish Kalra 
> ---
>  arch/x86/include/asm/paravirt.h   | 10 +
>  arch/x86/include/asm/paravirt_types.h |  2 +
>  arch/x86/kernel/paravirt.c|  1 +
>  arch/x86/mm/mem_encrypt.c | 57 ++-
>  arch/x86/mm/pat/set_memory.c  |  7 
>  5 files changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> index 694d8daf4983..8127b9c141bf 100644
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -78,6 +78,12 @@ static inline void paravirt_arch_exit_mmap(struct 
> mm_struct *mm)
> PVOP_VCALL1(mmu.exit_mmap, mm);
>  }
>
> +static inline void page_encryption_changed(unsigned long vaddr, int npages,
> +   bool enc)
> +{
> +   PVOP_VCALL3(mmu.page_encryption_changed, vaddr, npages, enc);
> +}
> +
>  #ifdef CONFIG_PARAVIRT_XXL
>  static inline void load_sp0(unsigned long sp0)
>  {
> @@ -946,6 +952,10 @@ static inline void paravirt_arch_dup_mmap(struct 
> mm_struct *oldmm,
>  static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
>  {
>  }
> +
> +static inline void page_encryption_changed(unsigned long vaddr, int npages, 
> bool enc)
> +{
> +}
>  #endif
>  #endif /* __ASSEMBLY__ */
>  #endif /* _ASM_X86_PARAVIRT_H */
> diff --git a/arch/x86/include/asm/paravirt_types.h 
> b/arch/x86/include/asm/paravirt_types.h
> index 732f62e04ddb..03bfd515c59c 100644
> --- a/arch/x86/include/asm/paravirt_types.h
> +++ b/arch/x86/include/asm/paravirt_types.h
> @@ -215,6 +215,8 @@ struct pv_mmu_ops {
>
> /* Hook for intercepting the destruction of an mm_struct. */
> void (*exit_mmap)(struct mm_struct *mm);
> +   void (*page_encryption_changed)(unsigned long vaddr, int npages,
> +   bool enc);
>
>  #ifdef CONFIG_PARAVIRT_XXL
> struct paravirt_callee_save read_cr2;
> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
> index c131ba4e70ef..840c02b23aeb 100644
> --- a/arch/x86/kernel/paravirt.c
> +++ b/arch/x86/kernel/paravirt.c
> @@ -367,6 +367,7 @@ struct paravirt_patch_template pv_ops = {
> (void (*)(struct mmu_gather *, void 
> *))tlb_remove_page,
>
> .mmu.exit_mmap  = paravirt_nop,
> +   .mmu.page_encryption_changed= paravirt_nop,
>
>  #ifdef CONFIG_PARAVIRT_XXL
> .mmu.read_cr2   = __PV_IS_CALLEE_SAVE(native_read_cr2),
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index f4bd4b431ba1..c9800fa811f6 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -19,6 +19,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include 
>  #include 
> @@ -29,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include "mm_internal.h"
>
> @@ -196,6 +198,47 @@ void __init sme_early_init(void)
> swiotlb_force = SWIOTLB_FORCE;
>  }
>
> +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages,
> +   bool enc)
> +{
> +   unsigned long sz = npages << PAGE_SHIFT;
> +   unsigned long vaddr_end, vaddr_next;
> +
> +   vaddr_end = vaddr + sz;
> +
> +   for (; vaddr < vaddr_end; vaddr = vaddr_next) {
> +   int psize, pmask, level;
> +   unsigned long pfn;
> +   pte_t *kpte;
> +
> +   kpte = lookup_address(vaddr, );
> +   if (!kpte || pte_none(*kpte))
> +   return;
> +
> +   switch (level) {
> +   case PG_LEVEL_4K:
> +   pfn = pte_pfn(*kpte);
> +   break;
> +   case PG_LEVEL_2M:
> +   pfn = pmd_pfn(*(pmd_t *)kpte);
> +   break;
> +   case PG_LEVEL_1G:
> +   pfn = pud_pfn(*(pud_t *)kpte);
> +   break;
> +   default:
> +   return;
> +   }
> +
> +   psize = page_level_size(level);
> +   pmask = page_level_mask(level);
> +
> +   kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS,
> +  pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, 
> enc);
> +
> +   vaddr_next = (vaddr & pmask) + psize;
> +   }
> +}
> +

Re: [PATCH v8 08/18] KVM: X86: Introduce KVM_HC_PAGE_ENC_STATUS hypercall

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:17 PM Ashish Kalra  wrote:
>
> From: Brijesh Singh 
>
> This hypercall is used by the SEV guest to notify a change in the page
> encryption status to the hypervisor. The hypercall should be invoked
> only when the encryption attribute is changed from encrypted -> decrypted
> and vice versa. By default all guest pages are considered encrypted.
>
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: "H. Peter Anvin" 
> Cc: Paolo Bonzini 
> Cc: "Radim Krčmář" 
> Cc: Joerg Roedel 
> Cc: Borislav Petkov 
> Cc: Tom Lendacky 
> Cc: x...@kernel.org
> Cc: k...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Reviewed-by: Venu Busireddy 
> Signed-off-by: Brijesh Singh 
> Signed-off-by: Ashish Kalra 
> ---
>  Documentation/virt/kvm/hypercalls.rst | 15 +
>  arch/x86/include/asm/kvm_host.h   |  2 +
>  arch/x86/kvm/svm/sev.c| 90 +++
>  arch/x86/kvm/svm/svm.c|  2 +
>  arch/x86/kvm/svm/svm.h|  4 ++
>  arch/x86/kvm/vmx/vmx.c|  1 +
>  arch/x86/kvm/x86.c|  6 ++
>  include/uapi/linux/kvm_para.h |  1 +
>  8 files changed, 121 insertions(+)
>
> diff --git a/Documentation/virt/kvm/hypercalls.rst 
> b/Documentation/virt/kvm/hypercalls.rst
> index dbaf207e560d..ff5287e68e81 100644
> --- a/Documentation/virt/kvm/hypercalls.rst
> +++ b/Documentation/virt/kvm/hypercalls.rst
> @@ -169,3 +169,18 @@ a0: destination APIC ID
>
>  :Usage example: When sending a call-function IPI-many to vCPUs, yield if
> any of the IPI target vCPUs was preempted.
> +
> +
> +8. KVM_HC_PAGE_ENC_STATUS
> +-
> +:Architecture: x86
> +:Status: active
> +:Purpose: Notify the encryption status changes in guest page table (SEV 
> guest)
> +
> +a0: the guest physical address of the start page
> +a1: the number of pages
> +a2: encryption attribute
> +
> +   Where:
> +   * 1: Encryption attribute is set
> +   * 0: Encryption attribute is cleared
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 42a2d0d3984a..4a8ee22f4f5b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1254,6 +1254,8 @@ struct kvm_x86_ops {
>
> bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
> int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> +   int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
> + unsigned long sz, unsigned long mode);
>  };
>
>  struct kvm_x86_init_ops {
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 698704defbcd..f088467708f0 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1347,6 +1347,93 @@ static int sev_receive_finish(struct kvm *kvm, struct 
> kvm_sev_cmd *argp)
> return ret;
>  }
>
> +static int sev_resize_page_enc_bitmap(struct kvm *kvm, unsigned long 
> new_size)
> +{
> +   struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info;
> +   unsigned long *map;
> +   unsigned long sz;
> +
> +   if (sev->page_enc_bmap_size >= new_size)
> +   return 0;
> +
> +   sz = ALIGN(new_size, BITS_PER_LONG) / 8;
> +
> +   map = vmalloc(sz);
> +   if (!map) {
> +   pr_err_once("Failed to allocate encrypted bitmap size %lx\n",
> +   sz);
> +   return -ENOMEM;
> +   }
> +
> +   /* mark the page encrypted (by default) */
> +   memset(map, 0xff, sz);
> +
> +   bitmap_copy(map, sev->page_enc_bmap, sev->page_enc_bmap_size);
> +   kvfree(sev->page_enc_bmap);
> +
> +   sev->page_enc_bmap = map;
> +   sev->page_enc_bmap_size = new_size;
> +
> +   return 0;
> +}
> +
> +int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> + unsigned long npages, unsigned long enc)
> +{
> +   struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info;
> +   kvm_pfn_t pfn_start, pfn_end;
> +   gfn_t gfn_start, gfn_end;
> +
> +   if (!sev_guest(kvm))
> +   return -EINVAL;
> +
> +   if (!npages)
> +   return 0;
> +
> +   gfn_start = gpa_to_gfn(gpa);
> +   gfn_end = gfn_start + npages;
> +
> +   /* out of bound access error check */
> +   if (gfn_end <= gfn_start)
> +   return -EINVAL;
> +
> +   /* lets make sure that gpa exist in our memslot */
> +   pfn_start = gfn_to_pfn(kvm, gfn_start);
> +   pfn_end = gfn_to_pfn(kvm, gfn_end);
> +
> +   if (is_error_noslot_pfn(pfn_start) && !is_noslot_pfn(pfn_start)) {
> +   /*
> +* Allow guest MMIO range(s) to be added
> +* to the page encryption bitmap.
> +*/
> +   return -EINVAL;
> +   }
> +
> +   if (is_error_noslot_pfn(pfn_end) && !is_noslot_pfn(pfn_end)) {
> +   /*
> +* Allow guest MMIO range(s) to be added
> +* 

Re: [PATCH 17/30] KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit

2020-05-29 Thread Krish Sadhukhan



On 5/29/20 8:39 AM, Paolo Bonzini wrote:

The control state changes on every L2->L0 vmexit, and we will have to
serialize it in the nested state.  So keep it up to date in svm->nested.ctl
and just copy them back to the nested VMCB in nested_svm_vmexit.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c | 57 ++-
  arch/x86/kvm/svm/svm.c|  5 +++-
  arch/x86/kvm/svm/svm.h|  1 +
  3 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1e5f460b5540..921466eba556 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -234,6 +234,34 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,
svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
  }
  
+/*

+ * Synchronize fields that are written by the processor, so that
+ * they can be copied back into the nested_vmcb.
+ */
+void sync_nested_vmcb_control(struct vcpu_svm *svm)
+{
+   u32 mask;
+   svm->nested.ctl.event_inj  = svm->vmcb->control.event_inj;
+   svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
+
+   /* Only a few fields of int_ctl are written by the processor.  */
+   mask = V_IRQ_MASK | V_TPR_MASK;
+   if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
+   is_intercept(svm, SVM_EXIT_VINTR)) {
+   /*
+* In order to request an interrupt window, L0 is usurping
+* svm->vmcb->control.int_ctl and possibly setting V_IRQ
+* even if it was clear in L1's VMCB.  Restoring it would be
+* wrong.  However, in this case V_IRQ will remain true until
+* interrupt_window_interception calls svm_clear_vintr and
+* restores int_ctl.  We can just leave it aside.
+*/
+   mask &= ~V_IRQ_MASK;
+   }
+   svm->nested.ctl.int_ctl&= ~mask;
+   svm->nested.ctl.int_ctl|= svm->vmcb->control.int_ctl & mask;
+}
+
  static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb 
*nested_vmcb)
  {
/* Load the nested guest state */
@@ -471,6 +499,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
/* Exit Guest-Mode */
leave_guest_mode(>vcpu);
svm->nested.vmcb = 0;
+   WARN_ON_ONCE(svm->nested.nested_run_pending);
  
  	/* in case we halted in L2 */

svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -497,8 +526,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->save.dr6= svm->vcpu.arch.dr6;
nested_vmcb->save.cpl= vmcb->save.cpl;
  
-	nested_vmcb->control.int_ctl   = vmcb->control.int_ctl;

-   nested_vmcb->control.int_vector= vmcb->control.int_vector;



While it's not related to this patch, I am wondering if we need the 
following line in enter_svm_guest_mode():


    svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;


If int_vector is used only to trigger a #VMEXIT after we have decided to 
inject a virtual interrupt, we probably don't the vector value from the 
nested vmcb.



nested_vmcb->control.int_state = vmcb->control.int_state;
nested_vmcb->control.exit_code = vmcb->control.exit_code;
nested_vmcb->control.exit_code_hi  = vmcb->control.exit_code_hi;
@@ -510,34 +537,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (svm->nrips_enabled)
nested_vmcb->control.next_rip  = vmcb->control.next_rip;
  
-	/*

-* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
-* to make sure that we do not lose injected events. So check event_inj
-* here and copy it to exit_int_info if it is valid.
-* Exit_int_info and event_inj can't be both valid because the case
-* below only happens on a VMRUN instruction intercept which has
-* no valid exit_int_info set.
-*/
-   if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
-   struct vmcb_control_area *nc = _vmcb->control;
-
-   nc->exit_int_info = vmcb->control.event_inj;
-   nc->exit_int_info_err = vmcb->control.event_inj_err;
-   }
-
-   nested_vmcb->control.tlb_ctl   = 0;
-   nested_vmcb->control.event_inj = 0;
-   nested_vmcb->control.event_inj_err = 0;
+   nested_vmcb->control.int_ctl   = svm->nested.ctl.int_ctl;
+   nested_vmcb->control.tlb_ctl   = svm->nested.ctl.tlb_ctl;
+   nested_vmcb->control.event_inj = svm->nested.ctl.event_inj;
+   nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
  
  	nested_vmcb->control.pause_filter_count =

svm->vmcb->control.pause_filter_count;
nested_vmcb->control.pause_filter_thresh =
svm->vmcb->control.pause_filter_thresh;
  
-	/* We always set V_INTR_MASKING and remember the old value in hflags */

-   if 

Re: [PATCH v8 09/18] KVM: x86: Introduce KVM_GET_PAGE_ENC_BITMAP ioctl

2020-05-29 Thread Steve Rutherford
On Tue, May 5, 2020 at 2:17 PM Ashish Kalra  wrote:
>
> From: Brijesh Singh 
>
> The ioctl can be used to retrieve page encryption bitmap for a given
> gfn range.
>
> Return the correct bitmap as per the number of pages being requested
> by the user. Ensure that we only copy bmap->num_pages bytes in the
> userspace buffer, if bmap->num_pages is not byte aligned we read
> the trailing bits from the userspace and copy those bits as is.
>
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: "H. Peter Anvin" 
> Cc: Paolo Bonzini 
> Cc: "Radim Krčmář" 
> Cc: Joerg Roedel 
> Cc: Borislav Petkov 
> Cc: Tom Lendacky 
> Cc: x...@kernel.org
> Cc: k...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Reviewed-by: Venu Busireddy 
> Signed-off-by: Brijesh Singh 
> Signed-off-by: Ashish Kalra 
> ---
>  Documentation/virt/kvm/api.rst  | 27 +
>  arch/x86/include/asm/kvm_host.h |  2 +
>  arch/x86/kvm/svm/sev.c  | 70 +
>  arch/x86/kvm/svm/svm.c  |  1 +
>  arch/x86/kvm/svm/svm.h  |  1 +
>  arch/x86/kvm/x86.c  | 12 ++
>  include/uapi/linux/kvm.h| 12 ++
>  7 files changed, 125 insertions(+)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index efbbe570aa9b..ecad84086892 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4636,6 +4636,33 @@ This ioctl resets VCPU registers and control 
> structures according to
>  the clear cpu reset definition in the POP. However, the cpu is not put
>  into ESA mode. This reset is a superset of the initial reset.
>
> +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl)
> +---
> +
> +:Capability: basic
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: struct kvm_page_enc_bitmap (in/out)
> +:Returns: 0 on success, -1 on error
> +
> +/* for KVM_GET_PAGE_ENC_BITMAP */
> +struct kvm_page_enc_bitmap {
> +   __u64 start_gfn;
> +   __u64 num_pages;
> +   union {
> +   void __user *enc_bitmap; /* one bit per page */
> +   __u64 padding2;
> +   };
> +};
> +
> +The encrypted VMs have the concept of private and shared pages. The private
> +pages are encrypted with the guest-specific key, while the shared pages may
> +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can
> +be used to get the bitmap indicating whether the guest page is private
> +or shared. The bitmap can be used during the guest migration. If the page
> +is private then the userspace need to use SEV migration commands to transmit
> +the page.
> +
>
>  4.125 KVM_S390_PV_COMMAND
>  -
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 4a8ee22f4f5b..9e428befb6a4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1256,6 +1256,8 @@ struct kvm_x86_ops {
> int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
>   unsigned long sz, unsigned long mode);
> +   int (*get_page_enc_bitmap)(struct kvm *kvm,
> +   struct kvm_page_enc_bitmap *bmap);
>  };
>
>  struct kvm_x86_init_ops {
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index f088467708f0..387045902470 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1434,6 +1434,76 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned 
> long gpa,
> return 0;
>  }
>
> +int svm_get_page_enc_bitmap(struct kvm *kvm,
> +  struct kvm_page_enc_bitmap *bmap)
> +{
> +   struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info;
> +   unsigned long gfn_start, gfn_end;
> +   unsigned long sz, i, sz_bytes;
> +   unsigned long *bitmap;
> +   int ret, n;
> +
> +   if (!sev_guest(kvm))
> +   return -ENOTTY;
> +
> +   gfn_start = bmap->start_gfn;
> +   gfn_end = gfn_start + bmap->num_pages;
> +
> +   sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE;
> +   bitmap = kmalloc(sz, GFP_KERNEL);
> +   if (!bitmap)
> +   return -ENOMEM;
> +
> +   /* by default all pages are marked encrypted */
> +   memset(bitmap, 0xff, sz);
> +
> +   mutex_lock(>lock);
> +   if (sev->page_enc_bmap) {
> +   i = gfn_start;
> +   for_each_clear_bit_from(i, sev->page_enc_bmap,
> + min(sev->page_enc_bmap_size, gfn_end))
gfn_end is not a size? I believe you want either gfn_end - gfn_start
or bmap->num_pages.

> +   clear_bit(i - gfn_start, bitmap);
> +   }
> +   mutex_unlock(>lock);
> +
> +   ret = -EFAULT;
> +
> +   n = bmap->num_pages % BITS_PER_BYTE;
> +   sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE;
> +
> +   /*
> +* Return the correct bitmap as per 

Re: [PATCH v2] rtc: rv3028: Add missed check for devm_regmap_init_i2c()

2020-05-29 Thread Alexandre Belloni
On 28/05/2020 18:39:50+0800, Chuhong Yuan wrote:
> rv3028_probe() misses a check for devm_regmap_init_i2c().
> Add the missed check to fix it.
> 
> Fixes: e6e7376cfd7b ("rtc: rv3028: add new driver")
> Signed-off-by: Chuhong Yuan 
> ---
> Changes in v2:
>   - Add fixes tag.
> 
>  drivers/rtc/rtc-rv3028.c | 2 ++
>  1 file changed, 2 insertions(+)
> 

Actually, this is the one I applied.

-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


Re: [PATCH] Revert "f2fs: fix quota_sync failure due to f2fs_lock_op"

2020-05-29 Thread Chao Yu
On 2020/5/30 9:49, Jaegeuk Kim wrote:
> On 05/30, Chao Yu wrote:
>> On 2020/5/30 6:34, Jaegeuk Kim wrote:
>>> On 05/29, Chao Yu wrote:
 Under heavy fsstress, we may triggle panic while issuing discard,
 because __check_sit_bitmap() detects that discard command may earse
 valid data blocks, the root cause is as below race stack described,
 since we removed lock when flushing quota data, quota data writeback
 may race with write_checkpoint(), so that it causes inconsistency in
 between cached discard entry and segment bitmap.

 - f2fs_write_checkpoint
  - block_operations
   - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH)
  - f2fs_flush_sit_entries
   - add_discard_addrs
- __set_bit_le(i, (void *)de->discard_map);
- f2fs_write_data_pages
 - f2fs_write_single_data_page
   : inode is quota one, 
 cp_rwsem won't be locked
  - f2fs_do_write_data_page
   - f2fs_allocate_data_block
- f2fs_wait_discard_bio
  : discard entry has not 
 been added yet.
- update_sit_entry
  - f2fs_clear_prefree_segments
   - f2fs_issue_discard
   : add discard entry

 This patch fixes this issue by reverting 435cbab95e39 ("f2fs: fix 
 quota_sync
 failure due to f2fs_lock_op").

 Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op")
>>>
>>> The previous patch fixes quota_sync gets EAGAIN all the time.
>>> How about this? It seems this works for fsstress test.
>>>
> 
> Then this?
> 
> ---
>  fs/f2fs/segment.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index ebbadde6cbced..ed11dcf2d69ed 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3107,6 +3107,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info 
> *sbi, struct page *page,
>   type = CURSEG_COLD_DATA;
>   }
>  
> + /*
> +  * We need to wait for node_write to avoid block allocation during
> +  * checkpoint. This can only happen to quota writes which can cause
> +  * the below discard race condition.
> +  */
> + if (IS_DATASEG(type))
> + down_write(>node_write);
> +
>   down_read(_I(sbi)->curseg_lock);
>  
>   mutex_lock(>curseg_mutex);
> @@ -3174,6 +3182,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, 
> struct page *page,

Minor thing: unlock order.

if (IS_DATASEG(type))
up_write(>node_write);

Could you merge the diff into this patch?

>  
>   if (put_pin_sem)
>   up_read(>pin_sem);
> +
> + if (IS_DATASEG(type))
> + up_write(>node_write);
>  }
>  
>  static void update_device_state(struct f2fs_io_info *fio)
> 


Re: [PATCH] rtc: rv3028: add the missed check for devm_regmap_init_i2c

2020-05-29 Thread Alexandre Belloni
On Wed, 27 May 2020 23:07:04 +0800, Chuhong Yuan wrote:
> rv3028_probe() misses a check for devm_regmap_init_i2c().
> Add the missed check to fix it.

Applied, thanks!

[1/1] rtc: rv3028: add the missed check for devm_regmap_init_i2c

Best regards,
-- 
Alexandre Belloni 


[PATCH net-next v3] hinic: add set_channels ethtool_ops support

2020-05-29 Thread Luo bin
add support to change TX/RX queue number with ethtool -L ethx combined

Signed-off-by: Luo bin 
---
 .../net/ethernet/huawei/hinic/hinic_ethtool.c | 40 +++
 .../net/ethernet/huawei/hinic/hinic_main.c|  2 +-
 drivers/net/ethernet/huawei/hinic/hinic_tx.c  |  5 +++
 3 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c 
b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index ace18d258049..efb02e03e7da 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -619,14 +619,37 @@ static void hinic_get_channels(struct net_device *netdev,
struct hinic_dev *nic_dev = netdev_priv(netdev);
struct hinic_hwdev *hwdev = nic_dev->hwdev;
 
-   channels->max_rx = hwdev->nic_cap.max_qps;
-   channels->max_tx = hwdev->nic_cap.max_qps;
-   channels->max_other = 0;
-   channels->max_combined = 0;
-   channels->rx_count = hinic_hwdev_num_qps(hwdev);
-   channels->tx_count = hinic_hwdev_num_qps(hwdev);
-   channels->other_count = 0;
-   channels->combined_count = 0;
+   channels->max_combined = nic_dev->max_qps;
+   channels->combined_count = hinic_hwdev_num_qps(hwdev);
+}
+
+static int hinic_set_channels(struct net_device *netdev,
+ struct ethtool_channels *channels)
+{
+   struct hinic_dev *nic_dev = netdev_priv(netdev);
+   unsigned int count = channels->combined_count;
+   int err;
+
+   netif_info(nic_dev, drv, netdev, "Set max combined queue number from %d 
to %d\n",
+  hinic_hwdev_num_qps(nic_dev->hwdev), count);
+
+   if (netif_running(netdev)) {
+   netif_info(nic_dev, drv, netdev, "Restarting netdev\n");
+   hinic_close(netdev);
+
+   nic_dev->hwdev->nic_cap.num_qps = count;
+
+   err = hinic_open(netdev);
+   if (err) {
+   netif_err(nic_dev, drv, netdev,
+ "Failed to open netdev\n");
+   return -EFAULT;
+   }
+   } else {
+   nic_dev->hwdev->nic_cap.num_qps = count;
+   }
+
+   return 0;
 }
 
 static int hinic_get_rss_hash_opts(struct hinic_dev *nic_dev,
@@ -1219,6 +1242,7 @@ static const struct ethtool_ops hinic_ethtool_ops = {
.get_ringparam = hinic_get_ringparam,
.set_ringparam = hinic_set_ringparam,
.get_channels = hinic_get_channels,
+   .set_channels = hinic_set_channels,
.get_rxnfc = hinic_get_rxnfc,
.set_rxnfc = hinic_set_rxnfc,
.get_rxfh_key_size = hinic_get_rxfh_key_size,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c 
b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index c8ab129a7ae8..e9e6f4c9309a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -326,7 +326,6 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev)
int i, node, err = 0;
u16 num_cpus = 0;
 
-   nic_dev->max_qps = hinic_hwdev_max_num_qps(hwdev);
if (nic_dev->max_qps <= 1) {
nic_dev->flags &= ~HINIC_RSS_ENABLE;
nic_dev->rss_limit = nic_dev->max_qps;
@@ -1031,6 +1030,7 @@ static int nic_dev_init(struct pci_dev *pdev)
nic_dev->rq_depth = HINIC_RQ_DEPTH;
nic_dev->sriov_info.hwdev = hwdev;
nic_dev->sriov_info.pdev = pdev;
+   nic_dev->max_qps = num_qps;
 
sema_init(_dev->mgmt_lock, 1);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c 
b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 4c66a0bc1b28..6da761d7a6ef 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -470,6 +470,11 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct 
net_device *netdev)
struct hinic_txq *txq;
struct hinic_qp *qp;
 
+   if (unlikely(!netif_carrier_ok(netdev))) {
+   dev_kfree_skb_any(skb);
+   return NETDEV_TX_OK;
+   }
+
txq = _dev->txqs[q_id];
qp = container_of(txq->sq, struct hinic_qp, sq);
 
-- 
2.17.1



Re: [PATCH] Revert "f2fs: fix quota_sync failure due to f2fs_lock_op"

2020-05-29 Thread Jaegeuk Kim
On 05/30, Chao Yu wrote:
> On 2020/5/30 6:34, Jaegeuk Kim wrote:
> > On 05/29, Chao Yu wrote:
> >> Under heavy fsstress, we may triggle panic while issuing discard,
> >> because __check_sit_bitmap() detects that discard command may earse
> >> valid data blocks, the root cause is as below race stack described,
> >> since we removed lock when flushing quota data, quota data writeback
> >> may race with write_checkpoint(), so that it causes inconsistency in
> >> between cached discard entry and segment bitmap.
> >>
> >> - f2fs_write_checkpoint
> >>  - block_operations
> >>   - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH)
> >>  - f2fs_flush_sit_entries
> >>   - add_discard_addrs
> >>- __set_bit_le(i, (void *)de->discard_map);
> >>- f2fs_write_data_pages
> >> - f2fs_write_single_data_page
> >>   : inode is quota one, 
> >> cp_rwsem won't be locked
> >>  - f2fs_do_write_data_page
> >>   - f2fs_allocate_data_block
> >>- f2fs_wait_discard_bio
> >>  : discard entry has not 
> >> been added yet.
> >>- update_sit_entry
> >>  - f2fs_clear_prefree_segments
> >>   - f2fs_issue_discard
> >>   : add discard entry
> >>
> >> This patch fixes this issue by reverting 435cbab95e39 ("f2fs: fix 
> >> quota_sync
> >> failure due to f2fs_lock_op").
> >>
> >> Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op")
> > 
> > The previous patch fixes quota_sync gets EAGAIN all the time.
> > How about this? It seems this works for fsstress test.
> > 

Then this?

---
 fs/f2fs/segment.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ebbadde6cbced..ed11dcf2d69ed 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3107,6 +3107,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, 
struct page *page,
type = CURSEG_COLD_DATA;
}
 
+   /*
+* We need to wait for node_write to avoid block allocation during
+* checkpoint. This can only happen to quota writes which can cause
+* the below discard race condition.
+*/
+   if (IS_DATASEG(type))
+   down_write(>node_write);
+
down_read(_I(sbi)->curseg_lock);
 
mutex_lock(>curseg_mutex);
@@ -3174,6 +3182,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, 
struct page *page,
 
if (put_pin_sem)
up_read(>pin_sem);
+
+   if (IS_DATASEG(type))
+   up_write(>node_write);
 }
 
 static void update_device_state(struct f2fs_io_info *fio)
-- 
2.27.0.rc0.183.gde8f92d652-goog



[PATCH v3] virtio_vsock: Fix race condition in virtio_transport_recv_pkt

2020-05-29 Thread Jia He
When client on the host tries to connect(SOCK_STREAM, O_NONBLOCK) to the
server on the guest, there will be a panic on a ThunderX2 (armv8a server):

[  463.718844] Unable to handle kernel NULL pointer dereference at virtual 
address 
[  463.718848] Mem abort info:
[  463.718849]   ESR = 0x9644
[  463.718852]   EC = 0x25: DABT (current EL), IL = 32 bits
[  463.718853]   SET = 0, FnV = 0
[  463.718854]   EA = 0, S1PTW = 0
[  463.718855] Data abort info:
[  463.718856]   ISV = 0, ISS = 0x0044
[  463.718857]   CM = 0, WnR = 1
[  463.718859] user pgtable: 4k pages, 48-bit VAs, pgdp=008f6f6e9000
[  463.718861] [] pgd=
[  463.718866] Internal error: Oops: 9644 [#1] SMP
[...]
[  463.718977] CPU: 213 PID: 5040 Comm: vhost-5032 Tainted: G   O  
5.7.0-rc7+ #139
[  463.718980] Hardware name: GIGABYTE R281-T91-00/MT91-FS1-00, BIOS F06 
09/25/2018
[  463.718982] pstate: 6049 (nZCv daif +PAN -UAO)
[  463.718995] pc : virtio_transport_recv_pkt+0x4c8/0xd40 
[vmw_vsock_virtio_transport_common]
[  463.718999] lr : virtio_transport_recv_pkt+0x1fc/0xd40 
[vmw_vsock_virtio_transport_common]
[  463.719000] sp : 80002dbe3c40
[...]
[  463.719025] Call trace:
[  463.719030]  virtio_transport_recv_pkt+0x4c8/0xd40 
[vmw_vsock_virtio_transport_common]
[  463.719034]  vhost_vsock_handle_tx_kick+0x360/0x408 [vhost_vsock]
[  463.719041]  vhost_worker+0x100/0x1a0 [vhost]
[  463.719048]  kthread+0x128/0x130
[  463.719052]  ret_from_fork+0x10/0x18

The race condition is as follows:
Task1Task2
==
__sock_release   virtio_transport_recv_pkt
  __vsock_release  vsock_find_bound_socket (found sk)
lock_sock_nested
vsock_remove_sock
sock_orphan
  sk_set_socket(sk, NULL)
sk->sk_shutdown = SHUTDOWN_MASK
...
release_sock
lock_sock
   virtio_transport_recv_connecting
 sk->sk_socket->state (panic!)

The root cause is that vsock_find_bound_socket can't hold the lock_sock,
so there is a small race window between vsock_find_bound_socket() and
lock_sock(). If __vsock_release() is running in another task,
sk->sk_socket will be set to NULL inadvertently.

This fixes it by checking sk->sk_shutdown(suggested by Stefano) after
lock_sock since sk->sk_shutdown is set to SHUTDOWN_MASK under the
protection of lock_sock_nested.

Signed-off-by: Jia He 
Cc: sta...@vger.kernel.org
Reviewed-by: Stefano Garzarella 
---
v3: - describe the fix of race condition more clearly
- refine the commit log

 net/vmw_vsock/virtio_transport_common.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 69efc891885f..0edda1edf988 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1132,6 +1132,14 @@ void virtio_transport_recv_pkt(struct virtio_transport 
*t,
 
lock_sock(sk);
 
+   /* Check if sk has been released before lock_sock */
+   if (sk->sk_shutdown == SHUTDOWN_MASK) {
+   (void)virtio_transport_reset_no_sock(t, pkt);
+   release_sock(sk);
+   sock_put(sk);
+   goto free_pkt;
+   }
+
/* Update CID in case it has changed after a transport reset event */
vsk->local_addr.svm_cid = dst.svm_cid;
 
-- 
2.17.1



Re: [PATCH] HID: usbhid: do not sleep when opening device

2020-05-29 Thread Dmitry Torokhov
On Fri, May 29, 2020 at 06:22:49PM -0700, Guenter Roeck wrote:
> On Fri, May 29, 2020 at 6:09 PM Dmitry Torokhov
>  wrote:
> >
> > On Fri, May 29, 2020 at 05:48:26PM -0700, Guenter Roeck wrote:
> > > On Fri, May 29, 2020 at 4:50 PM Nicolas Boichat  
> > > wrote:
> > > >
> > > > On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov
> > > >  wrote:
> > > > >
> > > > > usbhid tries to give the device 50 milliseconds to drain its queues
> > > > > when opening the device, but does it naively by simply sleeping in 
> > > > > open
> > > > > handler, which slows down device probing (and thus may affect overall
> > > > > boot time).
> > > > >
> > > > > However we do not need to sleep as we can instead mark a point of time
> > > > > in the future when we should start processing the events.
> > > > >
> > > > > Reported-by: Nicolas Boichat 
> > > > > Signed-off-by: Dmitry Torokhov 
> > > > > ---
> > > > >  drivers/hid/usbhid/hid-core.c | 27 +++
> > > > >  drivers/hid/usbhid/usbhid.h   |  1 +
> > > > >  2 files changed, 16 insertions(+), 12 deletions(-)
> > > > >
> > > > > diff --git a/drivers/hid/usbhid/hid-core.c 
> > > > > b/drivers/hid/usbhid/hid-core.c
> > > > > index c7bc9db5b192..e69992e945b2 100644
> > > > > --- a/drivers/hid/usbhid/hid-core.c
> > > > > +++ b/drivers/hid/usbhid/hid-core.c
> > > > > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid)
> > > > > set_bit(HID_NO_BANDWIDTH, 
> > > > > >iofl);
> > > > > } else {
> > > > > clear_bit(HID_NO_BANDWIDTH, >iofl);
> > > > > +
> > > > > +   if (test_and_clear_bit(HID_RESUME_RUNNING,
> > > > > +  >iofl)) {
> > > > > +   /*
> > > > > +* In case events are generated while 
> > > > > nobody was
> > > > > +* listening, some are released when 
> > > > > the device
> > > > > +* is re-opened. Wait 50 msec for the 
> > > > > queue to
> > > > > +* empty before allowing events to go 
> > > > > through
> > > > > +* hid.
> > > > > +*/
> > > > > +   usbhid->input_start_time = jiffies +
> > > > > +  
> > > > > msecs_to_jiffies(50);
> > > > > +   }
> > > > > }
> > > > > }
> > > > > spin_unlock_irqrestore(>lock, flags);
> > > > > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb)
> > > > > if (!test_bit(HID_OPENED, >iofl))
> > > > > break;
> > > > > usbhid_mark_busy(usbhid);
> > > > > -   if (!test_bit(HID_RESUME_RUNNING, >iofl)) {
> > > > > +   if (!test_bit(HID_RESUME_RUNNING, >iofl) &&
> > > > > +   time_after(jiffies, usbhid->input_start_time)) {
> > > >
> > > > Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 
> > > > days...)
> > > >
> > >
> > > time_after() is overflow-safe. That is why it is used and jiffies is
> > > not compared directly.
> >
> > Well, it is overflow safe, but still can not measure more than 50 days,
> > so if you have a device open for 50+ days there will be a 50msec gap
> > where it may lose events.
> >
> 
> Or you could explicitly use 64-bit jiffies.

Indeed.

Jiri, Benjamin, do you have preference between jiffies64 and ktime_t? I
guess jiffies64 is a tiny bit less expensive.

Thanks.

-- 
Dmitry


Re: [PATCH 1/2] rtc: goldfish: Use correct return value for goldfish_rtc_probe()

2020-05-29 Thread Alexandre Belloni
On Mon, 25 May 2020 09:39:47 +0800, Tiezhu Yang wrote:
> When call function devm_platform_ioremap_resource(), we should use IS_ERR()
> to check the return value and return PTR_ERR() if failed.

Applied, thanks!

[1/2] rtc: goldfish: Use correct return value for goldfish_rtc_probe()
  commit: e712bb5c45313b3ab35ece2fb0a44cdc49548bd9
[2/2] rtc: mpc5121: Use correct return value for mpc5121_rtc_probe()
  commit: 4c2a13a2d686a7412d862802156eebd0e15df7ad

Best regards,
-- 
Alexandre Belloni 


Re: [PATCH net-next v2] hinic: add set_channels ethtool_ops support

2020-05-29 Thread luobin (L)

On 2020/5/30 1:44, Jakub Kicinski wrote:


On Thu, 28 May 2020 18:36:33 + Luo bin wrote:

add support to change TX/RX queue number with ethtool -L

Signed-off-by: Luo bin 

Luo bin, your patches continue to come with Date: header being in the
past. Also suspiciously no time zone offset. Can you address this?


+static int hinic_set_channels(struct net_device *netdev,
+ struct ethtool_channels *channels)
+{
+   struct hinic_dev *nic_dev = netdev_priv(netdev);
+   unsigned int count = channels->combined_count;
+   int err;
+
+   if (!count) {
+   netif_err(nic_dev, drv, netdev,
+ "Unsupported combined_count: 0\n");
+   return -EINVAL;
+   }

This check has been added to the core since the last version of you
patch:

/* ensure there is at least one RX and one TX channel */
if (!channels.combined_count &&
(!channels.rx_count || !channels.tx_count))
return -EINVAL;


+   netif_info(nic_dev, drv, netdev, "Set max combined queue number from %d to 
%d\n",
+  hinic_hwdev_num_qps(nic_dev->hwdev), count);
+
+   if (netif_running(netdev)) {
+   netif_info(nic_dev, drv, netdev, "Restarting netdev\n");
+   hinic_close(netdev);
+
+   nic_dev->hwdev->nic_cap.num_qps = count;
+
+   err = hinic_open(netdev);
+   if (err) {
+   netif_err(nic_dev, drv, netdev,
+ "Failed to open netdev\n");
+   return -EFAULT;
+   }
+   } else {
+   nic_dev->hwdev->nic_cap.num_qps = count;
+   }
+
+   return 0;
  }

Will fix. Thanks.
.


Re: [PATCH] Revert "f2fs: fix quota_sync failure due to f2fs_lock_op"

2020-05-29 Thread Chao Yu
On 2020/5/30 6:34, Jaegeuk Kim wrote:
> On 05/29, Chao Yu wrote:
>> Under heavy fsstress, we may triggle panic while issuing discard,
>> because __check_sit_bitmap() detects that discard command may earse
>> valid data blocks, the root cause is as below race stack described,
>> since we removed lock when flushing quota data, quota data writeback
>> may race with write_checkpoint(), so that it causes inconsistency in
>> between cached discard entry and segment bitmap.
>>
>> - f2fs_write_checkpoint
>>  - block_operations
>>   - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH)
>>  - f2fs_flush_sit_entries
>>   - add_discard_addrs
>>- __set_bit_le(i, (void *)de->discard_map);
>>  - f2fs_write_data_pages
>>   - f2fs_write_single_data_page
>> : inode is quota one, 
>> cp_rwsem won't be locked
>>- f2fs_do_write_data_page
>> - f2fs_allocate_data_block
>>  - f2fs_wait_discard_bio
>>: discard entry has not 
>> been added yet.
>>  - update_sit_entry
>>  - f2fs_clear_prefree_segments
>>   - f2fs_issue_discard
>>   : add discard entry
>>
>> This patch fixes this issue by reverting 435cbab95e39 ("f2fs: fix quota_sync
>> failure due to f2fs_lock_op").
>>
>> Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op")
> 
> The previous patch fixes quota_sync gets EAGAIN all the time.
> How about this? It seems this works for fsstress test.
> 
> ---
>  fs/f2fs/segment.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index ebbadde6cbced..f67cffc38975e 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3095,6 +3095,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info 
> *sbi, struct page *page,
>   struct curseg_info *curseg = CURSEG_I(sbi, type);
>   bool put_pin_sem = false;
>  
> + /*
> +  * We need to wait for node_write to avoid block allocation during
> +  * checkpoint. This can only happen to quota writes which can cause
> +  * the below discard race condition.
> +  */
> + if (IS_DATASEG(type))

type is CURSEG_COLD_DATA_PINNED, IS_DATASEG(CURSEG_COLD_DATA_PINNED) should be 
false,
then node_write lock will not be held, later type will be updated to 
CURSEG_COLD_DATA,
then we will try to release node_write.

Thanks,

> + down_write(>node_write);
> +
>   if (type == CURSEG_COLD_DATA) {
>   /* GC during CURSEG_COLD_DATA_PINNED allocation */
>   if (down_read_trylock(>pin_sem)) {
> @@ -3174,6 +3182,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, 
> struct page *page,
>  
>   if (put_pin_sem)
>   up_read(>pin_sem);
> +
> + if (IS_DATASEG(type))
> + up_write(>node_write);
>  }
>  
>  static void update_device_state(struct f2fs_io_info *fio)
> 


Re: 28307d938f ("percpu: make pcpu_alloc() aware of current gfp .."): BUG: kernel reboot-without-warning in boot stage

2020-05-29 Thread Philip Li
On Fri, May 29, 2020 at 01:32:28PM +0100, Filipe Manana wrote:
> 
> 
> On 29/05/20 08:16, kernel test robot wrote:
> > Greetings,
> > 
> > 0day kernel testing robot got the below dmesg and the first bad commit is
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
> > 
> > commit 28307d938fb2e4056ed4c982c06d1503d7719813
> > Author: Filipe Manana 
> > AuthorDate: Thu May 7 18:36:10 2020 -0700
> > Commit: Linus Torvalds 
> > CommitDate: Thu May 7 19:27:21 2020 -0700
> > 
> > percpu: make pcpu_alloc() aware of current gfp context
> > 
> > Since 5.7-rc1, on btrfs we have a percpu counter initialization for
> > which we always pass a GFP_KERNEL gfp_t argument (this happens since
> > commit 2992df73268f78 ("btrfs: Implement DREW lock")).
> > 
> > That is safe in some contextes but not on others where allowing fs
> > reclaim could lead to a deadlock because we are either holding some
> > btrfs lock needed for a transaction commit or holding a btrfs
> > transaction handle open.  Because of that we surround the call to the
> > function that initializes the percpu counter with a NOFS context using
> > memalloc_nofs_save() (this is done at btrfs_init_fs_root()).
> > 
> > However it turns out that this is not enough to prevent a possible
> > deadlock because percpu_alloc() determines if it is in an atomic context
> > by looking exclusively at the gfp flags passed to it (GFP_KERNEL in this
> > case) and it is not aware that a NOFS context is set.
> > 
> > Because percpu_alloc() thinks it is in a non atomic context it locks the
> > pcpu_alloc_mutex.  This can result in a btrfs deadlock when
> > pcpu_balance_workfn() is running, has acquired that mutex and is waiting
> > for reclaim, while the btrfs task that called percpu_counter_init() (and
> > therefore percpu_alloc()) is holding either the btrfs commit_root
> > semaphore or a transaction handle (done fs/btrfs/backref.c:
> > iterate_extent_inodes()), which prevents reclaim from finishing as an
> > attempt to commit the current btrfs transaction will deadlock.
> > 
> > Lockdep reports this issue with the following trace:
> > 
> >   ==
> >   WARNING: possible circular locking dependency detected
> >   5.6.0-rc7-btrfs-next-77 #1 Not tainted
> >   --
> >   kswapd0/91 is trying to acquire lock:
> >   8938a3b3fdc8 (_node->mutex){+.+.}, at: 
> > __btrfs_release_delayed_node.part.0+0x3f/0x320 [btrfs]
> > 
> >   but task is already holding lock:
> >   b4f0dbc0 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30
> > 
> >   which lock already depends on the new lock.
> > 
> >   the existing dependency chain (in reverse order) is:
> > 
> >   -> #4 (fs_reclaim){+.+.}:
> >  fs_reclaim_acquire.part.0+0x25/0x30
> >  __kmalloc+0x5f/0x3a0
> >  pcpu_create_chunk+0x19/0x230
> >  pcpu_balance_workfn+0x56a/0x680
> >  process_one_work+0x235/0x5f0
> >  worker_thread+0x50/0x3b0
> >  kthread+0x120/0x140
> >  ret_from_fork+0x3a/0x50
> > 
> >   -> #3 (pcpu_alloc_mutex){+.+.}:
> >  __mutex_lock+0xa9/0xaf0
> >  pcpu_alloc+0x480/0x7c0
> >  __percpu_counter_init+0x50/0xd0
> >  btrfs_drew_lock_init+0x22/0x70 [btrfs]
> >  btrfs_get_fs_root+0x29c/0x5c0 [btrfs]
> >  resolve_indirect_refs+0x120/0xa30 [btrfs]
> >  find_parent_nodes+0x50b/0xf30 [btrfs]
> >  btrfs_find_all_leafs+0x60/0xb0 [btrfs]
> >  iterate_extent_inodes+0x139/0x2f0 [btrfs]
> >  iterate_inodes_from_logical+0xa1/0xe0 [btrfs]
> >  btrfs_ioctl_logical_to_ino+0xb4/0x190 [btrfs]
> >  btrfs_ioctl+0x165a/0x3130 [btrfs]
> >  ksys_ioctl+0x87/0xc0
> >  __x64_sys_ioctl+0x16/0x20
> >  do_syscall_64+0x5c/0x260
> >  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > 
> >   -> #2 (_info->commit_root_sem){}:
> >  down_write+0x38/0x70
> >  btrfs_cache_block_group+0x2ec/0x500 [btrfs]
> >  find_free_extent+0xc6a/0x1600 [btrfs]
> >  btrfs_reserve_extent+0x9b/0x180 [btrfs]
> >  btrfs_alloc_tree_block+0xc1/0x350 [btrfs]
> >  alloc_tree_block_no_bg_flush+0x4a/0x60 [btrfs]
> >  __btrfs_cow_block+0x122/0x5a0 [btrfs]
> >  btrfs_cow_block+0x106/0x240 [btrfs]
> >  commit_cowonly_roots+0x55/0x310 [btrfs]
> >  btrfs_commit_transaction+0x509/0xb20 [btrfs]
> >  sync_filesystem+0x74/0x90
> >  generic_shutdown_super+0x22/0x100
> >  kill_anon_super+0x14/0x30
> >  

Re: [PATCH] HID: usbhid: do not sleep when opening device

2020-05-29 Thread Guenter Roeck
On Fri, May 29, 2020 at 6:09 PM Dmitry Torokhov
 wrote:
>
> On Fri, May 29, 2020 at 05:48:26PM -0700, Guenter Roeck wrote:
> > On Fri, May 29, 2020 at 4:50 PM Nicolas Boichat  
> > wrote:
> > >
> > > On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov
> > >  wrote:
> > > >
> > > > usbhid tries to give the device 50 milliseconds to drain its queues
> > > > when opening the device, but does it naively by simply sleeping in open
> > > > handler, which slows down device probing (and thus may affect overall
> > > > boot time).
> > > >
> > > > However we do not need to sleep as we can instead mark a point of time
> > > > in the future when we should start processing the events.
> > > >
> > > > Reported-by: Nicolas Boichat 
> > > > Signed-off-by: Dmitry Torokhov 
> > > > ---
> > > >  drivers/hid/usbhid/hid-core.c | 27 +++
> > > >  drivers/hid/usbhid/usbhid.h   |  1 +
> > > >  2 files changed, 16 insertions(+), 12 deletions(-)
> > > >
> > > > diff --git a/drivers/hid/usbhid/hid-core.c 
> > > > b/drivers/hid/usbhid/hid-core.c
> > > > index c7bc9db5b192..e69992e945b2 100644
> > > > --- a/drivers/hid/usbhid/hid-core.c
> > > > +++ b/drivers/hid/usbhid/hid-core.c
> > > > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid)
> > > > set_bit(HID_NO_BANDWIDTH, 
> > > > >iofl);
> > > > } else {
> > > > clear_bit(HID_NO_BANDWIDTH, >iofl);
> > > > +
> > > > +   if (test_and_clear_bit(HID_RESUME_RUNNING,
> > > > +  >iofl)) {
> > > > +   /*
> > > > +* In case events are generated while 
> > > > nobody was
> > > > +* listening, some are released when 
> > > > the device
> > > > +* is re-opened. Wait 50 msec for the 
> > > > queue to
> > > > +* empty before allowing events to go 
> > > > through
> > > > +* hid.
> > > > +*/
> > > > +   usbhid->input_start_time = jiffies +
> > > > +  
> > > > msecs_to_jiffies(50);
> > > > +   }
> > > > }
> > > > }
> > > > spin_unlock_irqrestore(>lock, flags);
> > > > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb)
> > > > if (!test_bit(HID_OPENED, >iofl))
> > > > break;
> > > > usbhid_mark_busy(usbhid);
> > > > -   if (!test_bit(HID_RESUME_RUNNING, >iofl)) {
> > > > +   if (!test_bit(HID_RESUME_RUNNING, >iofl) &&
> > > > +   time_after(jiffies, usbhid->input_start_time)) {
> > >
> > > Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 
> > > days...)
> > >
> >
> > time_after() is overflow-safe. That is why it is used and jiffies is
> > not compared directly.
>
> Well, it is overflow safe, but still can not measure more than 50 days,
> so if you have a device open for 50+ days there will be a 50msec gap
> where it may lose events.
>

Or you could explicitly use 64-bit jiffies.

Guenter

> I guess we can switch to ktime(). A bit more expensive on 32 bits, but
> in reality I do not think anyone would care.
>
> Thanks.
>
> --
> Dmitry


Re: [PATCH 1/2] rtc: snvs: Make SNVS clock always prepared

2020-05-29 Thread Alexandre Belloni
On Fri, 22 May 2020 10:19:55 +0800, Anson Huang wrote:
> In IRQ handler, ONLY clock enable/disable is called due to
> clock prepare can NOT be called in interrupt context, but
> clock enable/disable will return failure if prepare count
> is 0, to fix this issue, just make SNVS clock always prepared
> there, the SNVS clock has no prepare function implemented,
> so it won't impact anything.

Applied, thanks!

[1/2] rtc: snvs: Make SNVS clock always prepared
  commit: 20af67700bc39bccd838414128f63a72965de6e7
[2/2] rtc: snvs: Add necessary clock operations for RTC APIs
  commit: 4b957bde561f3a56865395be06f1be2c196b0b5e

Best regards,
-- 
Alexandre Belloni 


Darowizna

2020-05-29 Thread SHANE MISSLER



Nazywam się SHANE MISSLER, wygrałem loterię o wartości 451 milionów dolarów i 
postanowiłem przekazać tobie kwotę 5500 000,00 €. Przekazuję wam tę darowiznę 
za miłość, którą mam dla ludzkości i za pomoc osobom dotkniętym pandemią w 
waszym kraju.Skontaktuj się ze mną, aby odebrać tę darowiznęDziękuję i Bóg 
zapłać.pozdrowieniaSHANE MISSLER


RE: [PATCH v2] virtio_vsock: Fix race condition in virtio_transport_recv_pkt

2020-05-29 Thread Justin He
Hi Stefano

> -Original Message-
> From: Stefano Garzarella 
> Sent: Saturday, May 30, 2020 12:34 AM
> To: Justin He 
> Cc: Stefan Hajnoczi ; David S. Miller
> ; Jakub Kicinski ;
> k...@vger.kernel.org; virtualizat...@lists.linux-foundation.org;
> net...@vger.kernel.org; linux-kernel@vger.kernel.org; Kaly Xin
> ; sta...@vger.kernel.org
> Subject: Re: [PATCH v2] virtio_vsock: Fix race condition in
> virtio_transport_recv_pkt
>
> On Fri, May 29, 2020 at 11:21:02PM +0800, Jia He wrote:
> > When client tries to connect(SOCK_STREAM) the server in the guest with
> > NONBLOCK mode, there will be a panic on a ThunderX2 (armv8a server):
> > [  463.718844][ T5040] Unable to handle kernel NULL pointer dereference
> at virtual address 
> > [  463.718848][ T5040] Mem abort info:
> > [  463.718849][ T5040]   ESR = 0x9644
> > [  463.718852][ T5040]   EC = 0x25: DABT (current EL), IL = 32 bits
> > [  463.718853][ T5040]   SET = 0, FnV = 0
> > [  463.718854][ T5040]   EA = 0, S1PTW = 0
> > [  463.718855][ T5040] Data abort info:
> > [  463.718856][ T5040]   ISV = 0, ISS = 0x0044
> > [  463.718857][ T5040]   CM = 0, WnR = 1
> > [  463.718859][ T5040] user pgtable: 4k pages, 48-bit VAs,
> pgdp=008f6f6e9000
> > [  463.718861][ T5040] [] pgd=
> > [  463.718866][ T5040] Internal error: Oops: 9644 [#1] SMP
> > [...]
> > [  463.718977][ T5040] CPU: 213 PID: 5040 Comm: vhost-5032 Tainted: G
> O  5.7.0-rc7+ #139
> > [  463.718980][ T5040] Hardware name: GIGABYTE R281-T91-00/MT91-FS1-00,
> BIOS F06 09/25/2018
> > [  463.718982][ T5040] pstate: 6049 (nZCv daif +PAN -UAO)
> > [  463.718995][ T5040] pc : virtio_transport_recv_pkt+0x4c8/0xd40
> [vmw_vsock_virtio_transport_common]
> > [  463.718999][ T5040] lr : virtio_transport_recv_pkt+0x1fc/0xd40
> [vmw_vsock_virtio_transport_common]
> > [  463.719000][ T5040] sp : 80002dbe3c40
> > [...]
> > [  463.719025][ T5040] Call trace:
> > [  463.719030][ T5040]  virtio_transport_recv_pkt+0x4c8/0xd40
> [vmw_vsock_virtio_transport_common]
> > [  463.719034][ T5040]  vhost_vsock_handle_tx_kick+0x360/0x408
> [vhost_vsock]
> > [  463.719041][ T5040]  vhost_worker+0x100/0x1a0 [vhost]
> > [  463.719048][ T5040]  kthread+0x128/0x130
> > [  463.719052][ T5040]  ret_from_fork+0x10/0x18
>  ^ ^
> Maybe we can remove these two columns from the commit message.
>
> >
> > The race condition as follows:
> > Task1Task2
> > ==
> > __sock_release   virtio_transport_recv_pkt
> >   __vsock_release  vsock_find_bound_socket (found)
> > lock_sock_nested
> > vsock_remove_sock
> > sock_orphan
> >   sk_set_socket(sk, NULL)
>
> Here we can add:
>   sk->sk_shutdown = SHUTDOWN_MASK;

Indeed. This makes it more clearly

--
Cheers,
Justin (Jia He)


>
> > ...
> > release_sock
> > lock_sock
> >virtio_transport_recv_connecting
> >  sk->sk_socket->state (panic)
> >
> > The root cause is that vsock_find_bound_socket can't hold the lock_sock,
> > so there is a small race window between vsock_find_bound_socket() and
> > lock_sock(). If there is __vsock_release() in another task, sk->sk_socket
> > will be set to NULL inadvertently.
> >
> > This fixes it by checking sk->sk_shutdown.
> >
> > Signed-off-by: Jia He 
> > Cc: sta...@vger.kernel.org
> > Cc: Stefano Garzarella 
> > ---
> > v2: use lightweight checking suggested by Stefano Garzarella
> >
> >  net/vmw_vsock/virtio_transport_common.c | 8 
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/net/vmw_vsock/virtio_transport_common.c
> b/net/vmw_vsock/virtio_transport_common.c
> > index 69efc891885f..0edda1edf988 100644
> > --- a/net/vmw_vsock/virtio_transport_common.c
> > +++ b/net/vmw_vsock/virtio_transport_common.c
> > @@ -1132,6 +1132,14 @@ void virtio_transport_recv_pkt(struct
> virtio_transport *t,
> >
> >  lock_sock(sk);
> >
> > +/* Check if sk has been released before lock_sock */
> > +if (sk->sk_shutdown == SHUTDOWN_MASK) {
> > +(void)virtio_transport_reset_no_sock(t, pkt);
> > +release_sock(sk);
> > +sock_put(sk);
> > +goto free_pkt;
> > +}
> > +
> >  /* Update CID in case it has changed after a transport reset event */
> >  vsk->local_addr.svm_cid = dst.svm_cid;
> >
> > --
> > 2.17.1
> >
>
> Anyway, the patch LGTM, let see what David and other say.
>
> Reviewed-by: Stefano Garzarella 
>
> Thanks,
> Stefano

IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier

2020-05-29 Thread Sargun Dhillon
On Fri, May 29, 2020 at 12:31:37AM -0700, Kees Cook wrote:
> On Thu, May 28, 2020 at 04:08:57AM -0700, Sargun Dhillon wrote:
> > This adds a seccomp notifier ioctl which allows for the listener to "add"
> > file descriptors to a process which originated a seccomp user
> > notification. This allows calls like mount, and mknod to be "implemented",
> > as the return value, and the arguments are data in memory. On the other
> > hand, calls like connect can be "implemented" using pidfd_getfd.
> > 
> > Unfortunately, there are calls which return file descriptors, like
> > open, which are vulnerable to TOC-TOU attacks, and require that the
> > more privileged supervisor can inspect the argument, and perform the
> > syscall on behalf of the process generating the notifiation. This
> > allows the file descriptor generated from that open call to be
> > returned to the calling process.
> > 
> > In addition, there is funcitonality to allow for replacement of
> > specific file descriptors, following dup2-like semantics.
> > 
> > Signed-off-by: Sargun Dhillon 
> > Suggested-by: Matt Denton 
> 
> This looks mostly really clean. When I've got more brain tomorrow I want to
> double-check the locking, but I think the use of notify_lock and being
> in the ioctl fully protects everything from any use-after-free-like
> issues.
> 
> Notes below...
> 
> > +/* valid flags for seccomp_notif_addfd */
> > +#define SECCOMP_ADDFD_FLAG_SETFD   (1UL << 0) /* Specify remote fd */
> 
> Nit: please use BIT()
> 
> > @@ -735,6 +770,41 @@ static u64 seccomp_next_notify_id(struct 
> > seccomp_filter *filter)
> > return filter->notif->next_id++;
> >  }
> >  
> > +static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
> > +{
> > +   struct socket *sock;
> > +   int ret, err;
> > +
> > +   /*
> > +* Remove the notification, and reset the list pointers, indicating
> > +* that it has been handled.
> > +*/
> > +   list_del_init(>list);
> > +
> > +   ret = security_file_receive(addfd->file);
> > +   if (ret)
> > +   goto out;
> > +
> > +   if (addfd->fd == -1) {
> > +   ret = get_unused_fd_flags(addfd->flags);
> > +   if (ret >= 0)
> > +   fd_install(ret, get_file(addfd->file));
> > +   } else {
> > +   ret = replace_fd(addfd->fd, addfd->file, addfd->flags);
> > +   }
> > +
> > +   /* These are the semantics from copying FDs via SCM_RIGHTS */
> > +   sock = sock_from_file(addfd->file, );
> > +   if (sock) {
> > +   sock_update_netprioidx(>sk->sk_cgrp_data);
> > +   sock_update_classid(>sk->sk_cgrp_data);
> > +   }
> 
> This made my eye twitch. ;) I see this is borrowed from
> scm_detach_fds()... this really feels like the kind of thing that will
> quickly go out of sync. I think this "receive an fd" logic needs to be
> lifted out of scm_detach_fds() so it and seccomp can share it. I'm not
> sure how to parameterize it quite right, though. Perhaps:
> 
> int file_receive(int fd, unsigned long flags, struct file *file)
> {
>   struct socket *sock;
>   int ret;
> 
>   ret = security_file_receive(file);
>   if (ret)
>   return ret;
> 
>   /* Install the file. */
>   if (fd == -1) {
>   ret = get_unused_fd_flags(flags);
>   if (ret >= 0)
>   fd_install(ret, get_file(file));
>   } else {
>   ret = replace_fd(fd, file, flags);
>   }
> 
>   /* Bump the usage count. */
>   sock = sock_from_file(addfd->file, );
>   if (sock) {
>   sock_update_netprioidx(>sk->sk_cgrp_data);
>   sock_update_classid(>sk->sk_cgrp_data);
>   }
> 
>   return ret;
> }
> 
> 
> static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
> {
>   /*
>* Remove the notification, and reset the list pointers, indicating
>* that it has been handled.
>*/
>   list_del_init(>list);
>   addfd->ret = file_receive(addfd->fd, addfd->flags, addfd->file);
>   complete(>completion);
> }
> 
> scm_detach_fds()
>   ...
>   for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i  i++, cmfptr++)
>   {
> 
>   err = file_receive(-1, MSG_CMSG_CLOEXEC & msg->msg_flags
>   ? O_CLOEXEC : 0, fp[i]);
>   if (err < 0)
>   break;
>   err = put_user(err, cmfptr);
>   if (err)
>   /* wat */
>   }
>   ...
> 
> I'm not sure on the put_user() failure, though. We could check early
> for faults with a put_user(0, cmfptr) before the file_receive() call, or
> we could just ignore it? I'm not sure what SCM does here. I guess
> worst-case:
> 
> int file_receive(int fd, unsigned long flags, struct file *file,
>int __user *fdptr)
> {
>   ...
>   ret = get_unused_fd_flags(flags);
>   if (ret >= 0) {
>   if (cmfptr) {
>

[PATCH net-next 3/6] net: hns3: remove two unused macros in hclgevf_cmd.c

2020-05-29 Thread Huazhong Tan
Macro hclgevf_ring_to_dma_dir and hclgevf_is_csq defined in
hclgevf_cmd.c, but not used, so remove them.

Signed-off-by: Huazhong Tan 
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c 
b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index f38d236..fec65239 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -11,9 +11,6 @@
 #include "hclgevf_main.h"
 #include "hnae3.h"
 
-#define hclgevf_is_csq(ring) ((ring)->flag & HCLGEVF_TYPE_CSQ)
-#define hclgevf_ring_to_dma_dir(ring) (hclgevf_is_csq(ring) ? \
-   DMA_TO_DEVICE : DMA_FROM_DEVICE)
 #define cmq_ring_to_dev(ring)   (&(ring)->dev->pdev->dev)
 
 static int hclgevf_ring_space(struct hclgevf_cmq_ring *ring)
-- 
2.7.4



[PATCH net-next 1/6] net: hns3: fix a print format issue in hclge_mac_mdio_config()

2020-05-29 Thread Huazhong Tan
Use %d to print int variable 'ret' in hclge_mac_mdio_config().

Signed-off-by: Huazhong Tan 
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c 
b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 696c5ae..e898207 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -155,7 +155,7 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev)
ret = mdiobus_register(mdio_bus);
if (ret) {
dev_err(mdio_bus->parent,
-   "Failed to register MDIO bus ret = %#x\n", ret);
+   "failed to register MDIO bus, ret = %d\n", ret);
return ret;
}
 
-- 
2.7.4



[PATCH net-next 5/6] net: hns3: fix two coding style issues in hclgevf_main.c

2020-05-29 Thread Huazhong Tan
Remove a redundant blank line in hclgevf_cmd_set_promisc_mode(),
and fix a reverse xmas tree coding style issue in
hclgevf_set_rss_tc_mode().

Reported-by: Jian Shen 
Signed-off-by: Huazhong Tan 
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c 
b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index a8c0e79..1b9578d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -669,8 +669,8 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev 
*hdev,  u16 rss_size)
u16 tc_size[HCLGEVF_MAX_TC_NUM];
struct hclgevf_desc desc;
u16 roundup_size;
-   int status;
unsigned int i;
+   int status;
 
req = (struct hclgevf_rss_tc_mode_cmd *)desc.data;
 
@@ -1143,7 +1143,6 @@ static int hclgevf_cmd_set_promisc_mode(struct 
hclgevf_dev *hdev,
send_msg.en_mc = en_mc_pmc ? 1 : 0;
 
ret = hclgevf_send_mbx_msg(hdev, _msg, false, NULL, 0);
-
if (ret)
dev_err(>pdev->dev,
"Set promisc mode fail, status is %d.\n", ret);
-- 
2.7.4



[PATCH net-next 2/6] net: hns3: remove an unused macro hclge_is_csq

2020-05-29 Thread Huazhong Tan
Macro hclge_is_csq defined in hcgle_cmd.c has not been used,
so remove it.

Signed-off-by: Huazhong Tan 
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c 
b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 64a1d0bd..1d6c328 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -11,8 +11,6 @@
 #include "hnae3.h"
 #include "hclge_main.h"
 
-#define hclge_is_csq(ring) ((ring)->flag & HCLGE_TYPE_CSQ)
-
 #define cmq_ring_to_dev(ring)   (&(ring)->dev->pdev->dev)
 
 static int hclge_ring_space(struct hclge_cmq_ring *ring)
-- 
2.7.4



[PATCH net-next 6/6] net: hns3: remove some unused codes in hns3_nic_set_features()

2020-05-29 Thread Huazhong Tan
NETIF_F_HW_VLAN_CTAG_FILTER is not set in netdev->hw_feature for
the HNS3 driver, so the handler of NETIF_F_HW_VLAN_CTAG_FILTER
in hns3_nic_set_features() won't be called, remove it.

Reported-by: Jian Shen 
Signed-off-by: Huazhong Tan 
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c 
b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 9fe40c7..b14f2ab 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1544,12 +1544,6 @@ static int hns3_nic_set_features(struct net_device 
*netdev,
return ret;
}
 
-   if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) &&
-   h->ae_algo->ops->enable_vlan_filter) {
-   enable = !!(features & NETIF_F_HW_VLAN_CTAG_FILTER);
-   h->ae_algo->ops->enable_vlan_filter(h, enable);
-   }
-
if ((changed & NETIF_F_HW_VLAN_CTAG_RX) &&
h->ae_algo->ops->enable_hw_strip_rxvtag) {
enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
-- 
2.7.4



[PATCH net-next 4/6] net: hns3: fix an incorrect comment for num_tqps in struct hclgevf_dev

2020-05-29 Thread Huazhong Tan
struct hclgevf_dev stands for VF device, its field num_tqps
indicates the number of VF's task queue pairs, so the comment
is incorrect, replace 'PF' with 'VF'.

Reported-by: Jian Shen 
Signed-off-by: Huazhong Tan 
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h 
b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 738de12..c1fac89 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -278,7 +278,7 @@ struct hclgevf_dev {
struct semaphore reset_sem; /* protect reset process */
 
u32 fw_version;
-   u16 num_tqps;   /* num task queue pairs of this PF */
+   u16 num_tqps;   /* num task queue pairs of this VF */
 
u16 alloc_rss_size; /* allocated RSS task queue */
u16 rss_size_max;   /* HW defined max RSS task queue */
-- 
2.7.4



[PATCH net-next 0/6] net: hns3: adds some cleanups for -next

2020-05-29 Thread Huazhong Tan
There are some cleanups for the HNS3 ethernet driver, fix an
incorrect print format, an incorrect comment and some coding
style issues, also remove some unused codes and macros.

Huazhong Tan (6):
  net: hns3: fix a print format issue in hclge_mac_mdio_config()
  net: hns3: remove an unused macro hclge_is_csq
  net: hns3: remove two unused macros in hclgevf_cmd.c
  net: hns3: fix an incorrect comment for num_tqps in struct hclgevf_dev
  net: hns3: fix two coding style issues in hclgevf_main.c
  net: hns3: remove some unused codes in hns3_nic_set_features()

 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c   | 6 --
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c| 2 --
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c   | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c  | 3 ---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +--
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 +-
 6 files changed, 3 insertions(+), 15 deletions(-)

-- 
2.7.4



Re: [PATCH] HID: usbhid: do not sleep when opening device

2020-05-29 Thread Dmitry Torokhov
On Fri, May 29, 2020 at 05:48:26PM -0700, Guenter Roeck wrote:
> On Fri, May 29, 2020 at 4:50 PM Nicolas Boichat  wrote:
> >
> > On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov
> >  wrote:
> > >
> > > usbhid tries to give the device 50 milliseconds to drain its queues
> > > when opening the device, but does it naively by simply sleeping in open
> > > handler, which slows down device probing (and thus may affect overall
> > > boot time).
> > >
> > > However we do not need to sleep as we can instead mark a point of time
> > > in the future when we should start processing the events.
> > >
> > > Reported-by: Nicolas Boichat 
> > > Signed-off-by: Dmitry Torokhov 
> > > ---
> > >  drivers/hid/usbhid/hid-core.c | 27 +++
> > >  drivers/hid/usbhid/usbhid.h   |  1 +
> > >  2 files changed, 16 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
> > > index c7bc9db5b192..e69992e945b2 100644
> > > --- a/drivers/hid/usbhid/hid-core.c
> > > +++ b/drivers/hid/usbhid/hid-core.c
> > > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid)
> > > set_bit(HID_NO_BANDWIDTH, >iofl);
> > > } else {
> > > clear_bit(HID_NO_BANDWIDTH, >iofl);
> > > +
> > > +   if (test_and_clear_bit(HID_RESUME_RUNNING,
> > > +  >iofl)) {
> > > +   /*
> > > +* In case events are generated while 
> > > nobody was
> > > +* listening, some are released when the 
> > > device
> > > +* is re-opened. Wait 50 msec for the 
> > > queue to
> > > +* empty before allowing events to go 
> > > through
> > > +* hid.
> > > +*/
> > > +   usbhid->input_start_time = jiffies +
> > > +  
> > > msecs_to_jiffies(50);
> > > +   }
> > > }
> > > }
> > > spin_unlock_irqrestore(>lock, flags);
> > > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb)
> > > if (!test_bit(HID_OPENED, >iofl))
> > > break;
> > > usbhid_mark_busy(usbhid);
> > > -   if (!test_bit(HID_RESUME_RUNNING, >iofl)) {
> > > +   if (!test_bit(HID_RESUME_RUNNING, >iofl) &&
> > > +   time_after(jiffies, usbhid->input_start_time)) {
> >
> > Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 
> > days...)
> >
> 
> time_after() is overflow-safe. That is why it is used and jiffies is
> not compared directly.

Well, it is overflow safe, but still can not measure more than 50 days,
so if you have a device open for 50+ days there will be a 50msec gap
where it may lose events.

I guess we can switch to ktime(). A bit more expensive on 32 bits, but
in reality I do not think anyone would care.

Thanks.

-- 
Dmitry


RE: [PATCH] usb: gadget: f_acm: don't disable disabled EP

2020-05-29 Thread Peter Chen
 
> > > @@ -425,9 +425,11 @@ static int acm_set_alt(struct usb_function *f, 
> > > unsigned
> intf, unsigned alt)
> > >   /* we know alt == 0, so this is an activation or a reset */
> > >
> > >   if (intf == acm->ctrl_id) {
> > > - dev_vdbg(>gadget->dev,
> > > - "reset acm control interface %d\n", intf);
> > > - usb_ep_disable(acm->notify);
> > > + if (acm->notify->enabled) {
> > > + dev_vdbg(>gadget->dev,
> > > + "reset acm control interface %d\n", 
> > > intf);
> > > + usb_ep_disable(acm->notify);
> > > + }
> >
> > But it does not fix any issues, the usb_ep_disable checks 'enabled' flag.
> 
> It generates spurious trace events if you enable them.
> 

You mean the trace events from core.c? If it is, we could try to improve it
and indicate it is already enabled or disabled.

Peter


Re: [PATCH v2 0/2] fix missing handling of __user in nommu's uaccess()

2020-05-29 Thread Greg Ungerer

Hi Luc,

On 30/5/20 5:02 am, Luc Van Oostenryck wrote:

I received a bug report for an unrelated patch when used with m68k-nommu.
It appears that the origin of the problem is that __get_user() and
__put_user() doesn't handle correctly __user. These 2 patches fix this.

Note: this is only minimaly tested but is quite straightforward and
   since this only change __user annotation it will not change the
   generated code.


Changes since v1:
* fix typo: s/plan/plain/
* appease checkpatch with better style: s/__force*/__force */
* avoid excessive line length caused by the added cast.

Luc Van Oostenryck (2):
   m68k,nommu: add missing __user in uaccess' __ptr() macro
   m68k,nommu: fix implicit cast from __user in __{get,put}_user_asm()

  arch/m68k/include/asm/uaccess_no.h | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)


Looks good, thanks for the fixup.
Applied to the m68knommu git tree, for-next branch.

Regards
Greg

 


[PATCH v11 2/2] mtd: rawnand: Add NAND controller support on Intel LGM SoC

2020-05-29 Thread Ramuthevar,Vadivel MuruganX
From: Ramuthevar Vadivel Murugan 

This patch adds the new IP of Nand Flash Controller(NFC) support
on Intel's Lightning Mountain(LGM) SoC.

DMA is used for burst data transfer operation, also DMA HW supports
aligned 32bit memory address and aligned data access by default.
DMA burst of 8 supported. Data register used to support the read/write
operation from/to device.

NAND controller driver implements ->exec_op() to replace legacy hooks,
these specific call-back method to execute NAND operations.

Signed-off-by: Ramuthevar Vadivel Murugan 

---
 drivers/mtd/nand/raw/Kconfig |   8 +
 drivers/mtd/nand/raw/Makefile|   1 +
 drivers/mtd/nand/raw/intel-nand-controller.c | 747 +++
 3 files changed, 756 insertions(+)
 create mode 100644 drivers/mtd/nand/raw/intel-nand-controller.c

diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig
index a80a46bb5b8b..75ab2afb78cf 100644
--- a/drivers/mtd/nand/raw/Kconfig
+++ b/drivers/mtd/nand/raw/Kconfig
@@ -457,6 +457,14 @@ config MTD_NAND_CADENCE
  Enable the driver for NAND flash on platforms using a Cadence NAND
  controller.
 
+config MTD_NAND_INTEL_LGM
+   tristate "Support for NAND controller on Intel LGM SoC"
+   depends on OF || COMPILE_TEST
+   depends on HAS_IOMEM
+   help
+ Enables support for NAND Flash chips on Intel's LGM SoC.
+ NAND flash controller interfaced through the External Bus Unit.
+
 comment "Misc"
 
 config MTD_SM_COMMON
diff --git a/drivers/mtd/nand/raw/Makefile b/drivers/mtd/nand/raw/Makefile
index 2d136b158fb7..bfc8fe4d2cb0 100644
--- a/drivers/mtd/nand/raw/Makefile
+++ b/drivers/mtd/nand/raw/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_MTD_NAND_TEGRA)  += tegra_nand.o
 obj-$(CONFIG_MTD_NAND_STM32_FMC2)  += stm32_fmc2_nand.o
 obj-$(CONFIG_MTD_NAND_MESON)   += meson_nand.o
 obj-$(CONFIG_MTD_NAND_CADENCE) += cadence-nand-controller.o
+obj-$(CONFIG_MTD_NAND_INTEL_LGM)   += intel-nand-controller.o
 
 nand-objs := nand_base.o nand_legacy.o nand_bbt.o nand_timings.o nand_ids.o
 nand-objs += nand_onfi.o
diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c 
b/drivers/mtd/nand/raw/intel-nand-controller.c
new file mode 100644
index ..564d28978943
--- /dev/null
+++ b/drivers/mtd/nand/raw/intel-nand-controller.c
@@ -0,0 +1,747 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2020 Intel Corporation. */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define EBU_CLC0x000
+#define EBU_CLC_RST0xu
+
+#define EBU_ADDR_SEL(n)(0x20 + (n) * 4)
+/* 5 bits 26:22 included for comparison in the ADDR_SELx */
+#define EBU_ADDR_MASK(x)   ((x) << 4)
+#define EBU_ADDR_SEL_REGEN 0x1
+
+#define EBU_BUSCON(n)  (0x60 + (n) * 4)
+#define EBU_BUSCON_CMULT_V40x1
+#define EBU_BUSCON_RECOVC(n)   ((n) << 2)
+#define EBU_BUSCON_HOLDC(n)((n) << 4)
+#define EBU_BUSCON_WAITRDC(n)  ((n) << 6)
+#define EBU_BUSCON_WAITWRC(n)  ((n) << 8)
+#define EBU_BUSCON_BCGEN_CS0x0
+#define EBU_BUSCON_SETUP_ENBIT(22)
+#define EBU_BUSCON_ALEC0xC000
+
+#define EBU_CON0x0B0
+#define EBU_CON_NANDM_EN   BIT(0)
+#define EBU_CON_NANDM_DIS  0x0
+#define EBU_CON_CSMUX_E_EN BIT(1)
+#define EBU_CON_ALE_P_LOW  BIT(2)
+#define EBU_CON_CLE_P_LOW  BIT(3)
+#define EBU_CON_CS_P_LOW   BIT(4)
+#define EBU_CON_SE_P_LOW   BIT(5)
+#define EBU_CON_WP_P_LOW   BIT(6)
+#define EBU_CON_PRE_P_LOW  BIT(7)
+#define EBU_CON_IN_CS_S(n) ((n) << 8)
+#define EBU_CON_OUT_CS_S(n)((n) << 10)
+#define EBU_CON_LAT_EN_CS_P((0x3D) << 18)
+
+#define EBU_WAIT   0x0B4
+#define EBU_WAIT_RDBY  BIT(0)
+#define EBU_WAIT_WR_C  BIT(3)
+
+#define HSNAND_CTL10x110
+#define HSNAND_CTL1_ADDR_SHIFT 24
+
+#define HSNAND_CTL20x114
+#define HSNAND_CTL2_ADDR_SHIFT 8
+#define HSNAND_CTL2_CYC_N_V5   (0x2 << 16)
+
+#define HSNAND_INT_MSK_CTL 0x124
+#define HSNAND_INT_MSK_CTL_WR_CBIT(4)
+
+#define HSNAND_INT_STA 0x128
+#define HSNAND_INT_STA_WR_CBIT(4)
+
+#define HSNAND_CTL 0x130
+#define HSNAND_CTL_ENABLE_ECC  BIT(0)
+#define HSNAND_CTL_GO  BIT(2)
+#define HSNAND_CTL_CE_SEL_CS(n)BIT(3 + (n))
+#define HSNAND_CTL_RW_READ 0x0
+#define HSNAND_CTL_RW_WRITEBIT(10)
+#define HSNAND_CTL_ECC_OFF_V8THBIT(11)
+#define HSNAND_CTL_CKFF_EN 0x0
+#define HSNAND_CTL_MSG_EN  BIT(17)
+
+#define HSNAND_PARA0   0x13c
+#define HSNAND_PARA0_PAGE_V81920x3
+#define HSNAND_PARA0_PIB_V256  (0x3 << 4)
+#define HSNAND_PARA0_BYP_EN_NP 0x0
+#define HSNAND_PARA0_BYP_DEC_NP0x0
+#define HSNAND_PARA0_TYPE_ONFI BIT(18)

[PATCH v11 1/2] dt-bindings: mtd: Add Nand Flash Controller support for Intel LGM SoC

2020-05-29 Thread Ramuthevar,Vadivel MuruganX
From: Ramuthevar Vadivel Murugan 

Add YAML file for dt-bindings to support NAND Flash Controller
on Intel's Lightning Mountain SoC.

Signed-off-by: Ramuthevar Vadivel Murugan 

---
 .../devicetree/bindings/mtd/intel,lgm-nand.yaml| 99 ++
 1 file changed, 99 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml

diff --git a/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml 
b/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml
new file mode 100644
index ..313daec4d783
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/intel,lgm-nand.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel LGM SoC NAND Controller Device Tree Bindings
+
+allOf:
+  - $ref: "nand-controller.yaml"
+
+maintainers:
+  - Ramuthevar Vadivel Murugan 
+
+properties:
+  compatible:
+const: intel,lgm-nand
+
+  reg:
+maxItems: 6
+
+  reg-names:
+items:
+   - const: ebunand
+   - const: hsnand
+   - const: nand_cs0
+   - const: nand_cs1
+   - const: addr_sel0
+   - const: addr_sel1
+
+  clocks:
+maxItems: 1
+
+  dmas:
+maxItems: 2
+
+  dma-names:
+items:
+  - const: tx
+  - const: rx
+
+  "#address-cells":
+const: 1
+
+  "#size-cells":
+const: 0
+
+patternProperties:
+  "^nand@[a-f0-9]+$":
+type: object
+properties:
+  reg:
+minimum: 0
+maximum: 7
+
+  nand-ecc-mode: true
+
+  nand-ecc-algo:
+const: hw
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+  - dmas
+  - dma-names
+  - "#address-cells"
+  - "#size-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+nand-controller@e0f0 {
+  compatible = "intel,lgm-nand";
+  reg = <0xe0f0 0x100>,
+<0xe100 0x300>,
+<0xe140 0x8000>,
+<0xe1c0 0x1000>,
+<0x1740 0x4>,
+<0x17c0 0x4>;
+  reg-names = "ebunand", "hsnand", "nand_cs0", "nand_cs1",
+"addr_sel0", "addr_sel1";
+  clocks = < 125>;
+  dmas = < 8>, < 9>;
+  dma-names = "tx", "rx";
+  #address-cells = <1>;
+  #size-cells = <0>;
+
+  nand@0 {
+reg = <0>;
+nand-ecc-mode = "hw";
+  };
+};
+
+...
-- 
2.11.0



Re: [PATCH 1/2] objtool: Rename rela to reloc

2020-05-29 Thread Matt Helsley
On Fri, May 29, 2020 at 06:22:00PM -0500, Josh Poimboeuf wrote:
> On Fri, May 29, 2020 at 02:01:13PM -0700, Matt Helsley wrote:
> > Before supporting additional relocation types rename the relevant
> > types and functions from "rela" to "reloc". This work be done with
> > the following regex:
> > 
> > sed -i -e 's/struct rela/struct reloc/g' \
> > -e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
> > -e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
> > -e 's/relasec/relocsec/g' \
> > -e 's/rela_list/reloc_list/g' \
> > -e 's/rela_hash/reloc_hash/g' \
> > -e 's/add_rela/add_reloc/g' \
> > -e 's/rela->/reloc->/g' \
> > -e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
> > -e 's/rela =/reloc =/g' \
> > -e 's/relas =/relocs =/g' \
> > -e 's/relas\[/relocs[/g' \
> > -e 's/relaname =/relocname =/g' \
> > -e 's/= rela\;/= reloc\;/g' \
> > -e 's/= relas\;/= relocs\;/g' \
> > -e 's/= relaname\;/= relocname\;/g' \
> > -e 's/, rela)/, reloc)/g' \
> > -e 's/, relaname/, relocname/g' \
> > -e 's/sec->rela/sec->reloc/g' \
> > -e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
> > arch.h \
> > arch/x86/decode.c  \
> > check.c \
> > check.h \
> > elf.c \
> > elf.h \
> > orc_gen.c \
> > special.c \
> 
> Holy regex!  Thanks for doing that :-)

So I was rebasing my future patches and I found a few spots where
objtool warning strings and code comments weren't fixed-up to
consistent. Here's the new, complete regex -- it includes the original
changes and the missed bits (e.g. note the new substitution for @rela
comments):

sed -i -e 's/struct rela/struct reloc/g' \
-e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \
-e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \
-e 's/relasec/relocsec/g' \
-e 's/rela_list/reloc_list/g' \
-e 's/rela_hash/reloc_hash/g' \
-e 's/add_rela/add_reloc/g' \
-e 's/rela->/reloc->/g' \
-e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \
-e 's/rela =/reloc =/g' \
-e 's/relas =/relocs =/g' \
-e 's/relas\[/relocs[/g' \
-e 's/relaname =/relocname =/g' \
-e 's/= rela\;/= reloc\;/g' \
-e 's/= relas\;/= relocs\;/g' \
-e 's/= relaname\;/= relocname\;/g' \
-e 's/, rela)/, reloc)/g' \
-e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \
-e 's/ rela$/ reloc/g' \
-e 's/, relaname/, relocname/g' \
-e 's/sec->rela/sec->reloc/g' \
-e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \
arch.h \
arch/x86/decode.c  \
check.c \
check.h \
elf.c \
elf.h \
orc_gen.c \
special.c

If you want to fixup this commit you could just re-run the regex after
applying it but before applying the next commit.

Cheers,
-Matt Helsley



[PATCH v11 0/2] mtd: rawnand: Add NAND controller support on Intel LGM SoC

2020-05-29 Thread Ramuthevar,Vadivel MuruganX
This patch adds the new IP of Nand Flash Controller(NFC) support
on Intel's Lightning Mountain(LGM) SoC.

DMA is used for burst data transfer operation, also DMA HW supports
aligned 32bit memory address and aligned data access by default.
DMA burst of 8 supported. Data register used to support the read/write
operation from/to device.

NAND controller also supports in-built HW ECC engine.

NAND controller driver implements ->exec_op() to replace legacy hooks,
these specific call-back method to execute NAND operations.

Thanks Boris, Andy, Arnd and Rob for the review comments and suggestions.
---
v11:
  - No Change
v10:
  - No Change
v9:
  - No change
v8:
  - fix the kbuild bot warnings
  - correct the typo's
v7:
  - indentation issue is fixed
  - add error check for retrieve the resource from dt
v6:
  - update EBU_ADDR_SELx register base value build it from DT
  - Add tabs in in Kconfig
v5:
  - replace by 'HSNAND_CLE_OFFS | HSNAND_CS_OFFS' to NAND_WRITE_CMD and 
NAND_WRITE_ADDR
  - remove the unused macros
  - update EBU_ADDR_MASK(x) macro
  - update the EBU_ADDR_SELx register values to be written
v4:
  - add ebu_nand_cs structure for multiple-CS support
  - mask/offset encoding for 0x51 value
  - update macro HSNAND_CTL_ENABLE_ECC
  - drop the op argument and un-used macros.
  - updated the datatype and macros
  - add function disable nand module
  - remove ebu_host->dma_rx = NULL;
  - rename MMIO address range variables to ebu and hsnand
  - implement ->setup_data_interface()
  - update label err_cleanup_nand and err_cleanup_dma
  - add return value check in the nand_remove function
  - add/remove tabs and spaces as per coding standard
  - encoded CS ids by reg property
v3:
  - Add depends on MACRO in Kconfig
  - file name update in Makefile
  - file name update to intel-nand-controller
  - modification of MACRO divided like EBU, HSNAND and NAND
  - add NAND_ALE_OFFS, NAND_CLE_OFFS and NAND_CS_OFFS
  - rename lgm_ to ebu_ and _va suffix is removed in the whole file
  - rename structure and varaibles as per review comments.
  - remove lgm_read_byte(), lgm_dev_ready() and cmd_ctrl() un-used function
  - update in exec_op() as per review comments
  - rename function lgm_dma_exit() by lgm_dma_cleanup()
  - hardcoded magic value  for base and offset replaced by MACRO defined
  - mtd_device_unregister() + nand_cleanup() instead of nand_release()
v2:
  - implement the ->exec_op() to replaces the legacy hook-up.
  - update the commit message
  - add MIPS maintainers and xway_nand driver author in CC

v1:
 - initial version
 
dt-bindings: mtd: Add Nand Flash Controller support for Intel LGM SoC
---
v11:
  - Fixed the compatible issue with example
10:
  - fix bot errors
v9:
  - Rob's review comments address
  - dual licensed
  - compatible change
  - add reg-names
  - drop clock-names and clock-cells
  - correct typo's
v8:
  No change
v7:
  - Rob's review comments addressed
  - dt-schema build issue fixed with upgraded dt-schema
v6:
  - Rob's review comments addressed in YAML file
  - add addr_sel0 and addr_sel1 reg-names in YAML example
v5:
  - add the example in YAML file
v4:
  - No change
v3:
  - No change
v2:
  YAML compatible string update to intel, lgm-nand-controller
v1:
  - initial version


Ramuthevar Vadivel Murugan (2):
  dt-bindings: mtd: Add Nand Flash Controller support for Intel LGM SoC
  mtd: rawnand: Add NAND controller support on Intel LGM SoC

 .../devicetree/bindings/mtd/intel,lgm-nand.yaml|  99 +++
 drivers/mtd/nand/raw/Kconfig   |   8 +
 drivers/mtd/nand/raw/Makefile  |   1 +
 drivers/mtd/nand/raw/intel-nand-controller.c   | 747 +
 4 files changed, 855 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml
 create mode 100644 drivers/mtd/nand/raw/intel-nand-controller.c

-- 
2.11.0



Re: [PATCH] HID: usbhid: do not sleep when opening device

2020-05-29 Thread Guenter Roeck
On Fri, May 29, 2020 at 4:50 PM Nicolas Boichat  wrote:
>
> On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov
>  wrote:
> >
> > usbhid tries to give the device 50 milliseconds to drain its queues
> > when opening the device, but does it naively by simply sleeping in open
> > handler, which slows down device probing (and thus may affect overall
> > boot time).
> >
> > However we do not need to sleep as we can instead mark a point of time
> > in the future when we should start processing the events.
> >
> > Reported-by: Nicolas Boichat 
> > Signed-off-by: Dmitry Torokhov 
> > ---
> >  drivers/hid/usbhid/hid-core.c | 27 +++
> >  drivers/hid/usbhid/usbhid.h   |  1 +
> >  2 files changed, 16 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
> > index c7bc9db5b192..e69992e945b2 100644
> > --- a/drivers/hid/usbhid/hid-core.c
> > +++ b/drivers/hid/usbhid/hid-core.c
> > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid)
> > set_bit(HID_NO_BANDWIDTH, >iofl);
> > } else {
> > clear_bit(HID_NO_BANDWIDTH, >iofl);
> > +
> > +   if (test_and_clear_bit(HID_RESUME_RUNNING,
> > +  >iofl)) {
> > +   /*
> > +* In case events are generated while 
> > nobody was
> > +* listening, some are released when the 
> > device
> > +* is re-opened. Wait 50 msec for the queue 
> > to
> > +* empty before allowing events to go 
> > through
> > +* hid.
> > +*/
> > +   usbhid->input_start_time = jiffies +
> > +  
> > msecs_to_jiffies(50);
> > +   }
> > }
> > }
> > spin_unlock_irqrestore(>lock, flags);
> > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb)
> > if (!test_bit(HID_OPENED, >iofl))
> > break;
> > usbhid_mark_busy(usbhid);
> > -   if (!test_bit(HID_RESUME_RUNNING, >iofl)) {
> > +   if (!test_bit(HID_RESUME_RUNNING, >iofl) &&
> > +   time_after(jiffies, usbhid->input_start_time)) {
>
> Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 
> days...)
>

time_after() is overflow-safe. That is why it is used and jiffies is
not compared directly.

Guenter

> > hid_input_report(urb->context, HID_INPUT_REPORT,
> >  urb->transfer_buffer,
> >  urb->actual_length, 1);
> > @@ -714,17 +728,6 @@ static int usbhid_open(struct hid_device *hid)
> > }
> >
> > usb_autopm_put_interface(usbhid->intf);
> > -
> > -   /*
> > -* In case events are generated while nobody was listening,
> > -* some are released when the device is re-opened.
> > -* Wait 50 msec for the queue to empty before allowing events
> > -* to go through hid.
> > -*/
> > -   if (res == 0)
> > -   msleep(50);
> > -
> > -   clear_bit(HID_RESUME_RUNNING, >iofl);
> > return res;
> >  }
> >
> > diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h
> > index 8620408bd7af..805949671b96 100644
> > --- a/drivers/hid/usbhid/usbhid.h
> > +++ b/drivers/hid/usbhid/usbhid.h
> > @@ -82,6 +82,7 @@ struct usbhid_device {
> >
> > spinlock_t lock;/* 
> > fifo spinlock */
> > unsigned long iofl; /* 
> > I/O flags (CTRL_RUNNING, OUT_RUNNING) */
> > +   unsigned long input_start_time; /* 
> > When to start handling input, in jiffies */
> > struct timer_list io_retry; /* 
> > Retry timer */
> > unsigned long stop_retry;   /* 
> > Time to give up, in jiffies */
> > unsigned int retry_delay;   /* 
> > Delay length in ms */
> > --
> > 2.27.0.rc0.183.gde8f92d652-goog
> >
> >
> > --
> > Dmitry


Re: [net-next 1/6] net: marvell: prestera: Add driver for Prestera family ASIC devices

2020-05-29 Thread Vadym Kochan
Hi David,

On Fri, May 29, 2020 at 05:18:39PM -0700, David Miller wrote:
> 
> Please remove all of the __packed attributes.
> 
> I looked at your data structures and all of them use fixed sized types
> and are multiples of 4 so the __packed attribute is completely
> unnecessary.
> 
> The alignment attribute is also unnecessary so please remove that too.

Some of the fields are u8, so I assume there might be holes added by
the compiler ? Also these attributes guarantee some ABI compatibility
with FW side, I will try to remove them and check but it sounds for me a bit
risky.


[PATCH] lib: make a test module with get_count_order/long

2020-05-29 Thread Wei Yang
A test module to make sure get_count_order/long returns the correct result.

Signed-off-by: Wei Yang 
---
 lib/Kconfig.debug  | 13 ++
 lib/Makefile   |  2 +
 lib/test_getorder.c| 64 ++
 tools/testing/selftests/lib/config |  1 +
 4 files changed, 80 insertions(+)
 create mode 100644 lib/test_getorder.c

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c0ef216bb803..01e671151f42 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1999,6 +1999,19 @@ config TEST_BITOPS
 
  If unsure, say N.
 
+config TEST_GETORDER
+   tristate "Test module for compilation of get_count_order operations"
+   depends on m
+   help
+ This builds the "test_getorder" module that is much like the
+ TEST_LKM module except that it does a basic exercise of the
+ get_count_order and get_count_order_long to make sure there are no
+ compiler warnings from C=1 sparse checker or -Wextra compilations.
+ It has no dependencies and doesn't run or load unless explicitly
+ requested by name. For example: modprobe test_getorder.
+
+ If unsure, say N.
+
 config TEST_VMALLOC
tristate "Test module for stress/performance analysis of vmalloc 
allocator"
default n
diff --git a/lib/Makefile b/lib/Makefile
index 0d942f7c7478..806d4df8f7c7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -81,6 +81,8 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_BITOPS) += test_bitops.o
 CFLAGS_test_bitops.o += -Werror
+obj-$(CONFIG_TEST_GETORDER) += test_getorder.o
+CFLAGS_test_getorder.o += -Werror
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
diff --git a/lib/test_getorder.c b/lib/test_getorder.c
new file mode 100644
index ..6492abc793af
--- /dev/null
+++ b/lib/test_getorder.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Author: Wei Yang 
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+
+/* a tiny module only meant to test get_count_order/long */
+unsigned int order_comb[][2] = {
+   {0x0003,  2},
+   {0x0004,  2},
+   {0x1fff, 13},
+   {0x2000, 13},
+   {0x5000, 31},
+   {0x8000, 31},
+   {0x80003000, 32},
+};
+
+unsigned long order_comb_long[][2] = {
+   {0x0003, 34},
+   {0x0004, 34},
+   {0x1fff, 45},
+   {0x2000, 45},
+   {0x5000, 63},
+   {0x8000, 63},
+   {0x80003000, 64},
+};
+
+static int __init test_getorder_startup(void)
+{
+   int i;
+
+   pr_warn("Loaded test module\n");
+   for (i = 0; i < ARRAY_SIZE(order_comb); i++) {
+   if (order_comb[i][1] != get_count_order(order_comb[i][0]))
+   pr_warn("get_count_order wrong for %x\n",
+   order_comb[i][0]);
+   }
+
+   for (i = 0; i < ARRAY_SIZE(order_comb_long); i++) {
+   if (order_comb_long[i][1] !=
+   get_count_order_long(order_comb_long[i][0]))
+   pr_warn("get_count_order_long wrong for %lx\n",
+   order_comb_long[i][0]);
+   }
+
+   return 0;
+}
+
+static void __exit test_getorder_unstartup(void)
+{
+   pr_warn("Unloaded test module\n");
+}
+
+module_init(test_getorder_startup);
+module_exit(test_getorder_unstartup);
+
+MODULE_AUTHOR("Wei Yang ");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("get_count_order/long testing module");
diff --git a/tools/testing/selftests/lib/config 
b/tools/testing/selftests/lib/config
index b80ee3f6e265..2ad467d34648 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -3,3 +3,4 @@ CONFIG_TEST_BITMAP=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_TEST_STRSCPY=m
 CONFIG_TEST_BITOPS=m
+CONFIG_TEST_GETORDER=m
-- 
2.23.0



Re: [PATCH v7 0/3] perf arm-spe: Add support for synthetic events

2020-05-29 Thread Leo Yan
On Fri, May 29, 2020 at 01:18:30PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Fri, May 29, 2020 at 11:28:01PM +0800, Leo Yan escreveu:
> > Hi James,
> > 
> > On Fri, May 29, 2020 at 03:58:23PM +0100, James Clark wrote:
> > > Hi Will and Leo,
> > > 
> > > I've tested this on an Arm N1 machine and it looks good to me.
> > 
> > This is great!  Will respin the new patch set with adding your test tag
> > and send to ML.  Thanks a lot for the effort.
> > 
> > Hi Will, Arnaldo, sorry for late replying you due to other works and
> > thanks for suggestions in other emails.
> 
> Np, please do it on top of my tmp.perf/core branch, it'll become
> perf/core as soon as testing that is ongoing finishes.

Understood, will do.

Thanks,
Leo


Re: [PATCH v10 1/2] dt-bindings: mtd: Add Nand Flash Controller support for Intel LGM SoC

2020-05-29 Thread Ramuthevar, Vadivel MuruganX

Hi Rob,

On 30/5/2020 3:31 am, Rob Herring wrote:

On Thu, May 28, 2020 at 11:39:28PM +0800, Ramuthevar,Vadivel MuruganX wrote:

From: Ramuthevar Vadivel Murugan

Add YAML file for dt-bindings to support NAND Flash Controller
on Intel's Lightning Mountain SoC.

Signed-off-by: Ramuthevar Vadivel 
Murugan
---
  .../devicetree/bindings/mtd/intel,lgm-nand.yaml| 93 ++
  1 file changed, 93 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml

diff --git a/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml 
b/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml
new file mode 100644
index ..afecc9920e04
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id:http://devicetree.org/schemas/mtd/intel,lgm-nand.yaml#
+$schema:http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel LGM SoC NAND Controller Device Tree Bindings
+
+allOf:
+  - $ref: "nand-controller.yaml"
+
+maintainers:
+  - Ramuthevar Vadivel Murugan
+
+properties:
+  compatible:
+const: intel,lgm-nand-controller

Doesn't match the example.

Thank you for the review comments...

if we add the compatible = intel,lgm-nand-controller it throws an error 
like below..


/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/mtd/intel,lgm-nand.example.dt.yaml: 
nand-controller@e0f0: '#address-cells', '#size-cells' do not match 
any of the regexes: '^nand@[a-f0-9]+$', 'pinctrl-[0-9]+'
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/mtd/intel,lgm-nand.example.dt.yaml: 
nand-controller@e0f0: nand@0: '#address-cells', '#size-cells', 
'nand-on-flash-bbt' do not match any of the regexes: 'pinctrl-[0-9]+'


referred from this file 
:Documentation/devicetree/bindings/mtd/nand-controller.yaml


fixed the compatible and example doesn't match issue.

Regards
Vadivel





Re: [net-next 1/6] net: marvell: prestera: Add driver for Prestera family ASIC devices

2020-05-29 Thread David Miller


Please remove all of the __packed attributes.

I looked at your data structures and all of them use fixed sized types
and are multiples of 4 so the __packed attribute is completely
unnecessary.

The alignment attribute is also unnecessary so please remove that too.


Re: [PATCH][net-next] nexthop: fix incorrect allocation failure on nhg->spare

2020-05-29 Thread David Miller
From: Colin King 
Date: Thu, 28 May 2020 15:51:14 +0100

> @@ -1185,7 +1185,7 @@ static struct nexthop *nexthop_create_group(struct net 
> *net,
>  
>   /* spare group used for removals */
>   nhg->spare = nexthop_grp_alloc(num_nh);

I don't even see this line in the current net-next tree nor any references
to ->spare.

What am I missing?


Re: KASAN: use-after-free Read in ib_uverbs_remove_one

2020-05-29 Thread Jason Gunthorpe
On Thu, May 28, 2020 at 07:33:16AM -0700, syzbot wrote:
> Hello,
> 
> syzbot found the following crash on:
> 
> HEAD commit:c11d28ab Add linux-next specific files for 20200522
> git tree:   linux-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=12fc3e7210
> kernel config:  https://syzkaller.appspot.com/x/.config?x=3f6dbdea4159fb66
> dashboard link: https://syzkaller.appspot.com/bug?extid=478fd0d54412b8759e0d
> compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
> 
> Unfortunately, I don't have any reproducer for this crash yet.
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+478fd0d54412b8759...@syzkaller.appspotmail.com
> 
> ==
> BUG: KASAN: use-after-free in ib_uverbs_remove_one+0x411/0x4e0 
> drivers/infiniband/core/uverbs_main.c:1210
> Read of size 4 at addr 888096324578 by task syz-executor.2/15847
> 
> CPU: 1 PID: 15847 Comm: syz-executor.2 Not tainted 
> 5.7.0-rc6-next-20200522-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x18f/0x20d lib/dump_stack.c:118
>  print_address_description.constprop.0.cold+0xd3/0x413 mm/kasan/report.c:383
>  __kasan_report mm/kasan/report.c:513 [inline]
>  kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
>  ib_uverbs_remove_one+0x411/0x4e0 drivers/infiniband/core/uverbs_main.c:1210
>  remove_client_context+0xbe/0x110 drivers/infiniband/core/device.c:732
>  disable_device+0x13b/0x230 drivers/infiniband/core/device.c:1278
>  __ib_unregister_device+0x91/0x180 drivers/infiniband/core/device.c:1445
>  ib_unregister_device_and_put+0x57/0x80 drivers/infiniband/core/device.c:1508
>  nldev_dellink+0x20a/0x310 drivers/infiniband/core/nldev.c:1571
>  rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline]
>  rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
>  rdma_nl_rcv+0x586/0x900 drivers/infiniband/core/netlink.c:259
>  netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
>  netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
>  netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
>  sock_sendmsg_nosec net/socket.c:652 [inline]
>  sock_sendmsg+0xcf/0x120 net/socket.c:672
>  sys_sendmsg+0x6e6/0x810 net/socket.c:2352
>  ___sys_sendmsg+0x100/0x170 net/socket.c:2406
>  __sys_sendmsg+0xe5/0x1b0 net/socket.c:2439
>  do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
>  entry_SYSCALL_64_after_hwframe+0x49/0xb3
> RIP: 0033:0x45ca29
> Code: 0d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 
> 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 
> 83 db b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:7fd6764d1c78 EFLAGS: 0246 ORIG_RAX: 002e
> RAX: ffda RBX: 004ffa00 RCX: 0045ca29
> RDX:  RSI: 2180 RDI: 0003
> RBP: 0078bf00 R08:  R09: 
> R10:  R11: 0246 R12: 
> R13: 09af R14: 004d61b0 R15: 7fd6764d26d4
> 
> Allocated by task 9061:
>  save_stack+0x1b/0x40 mm/kasan/common.c:48
>  set_track mm/kasan/common.c:56 [inline]
>  __kasan_kmalloc mm/kasan/common.c:494 [inline]
>  __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:467
>  kmem_cache_alloc_trace+0x153/0x7d0 mm/slab.c:3551
>  kmalloc include/linux/slab.h:555 [inline]
>  kzalloc include/linux/slab.h:669 [inline]
>  ib_uverbs_add_one+0x80/0x7c0 drivers/infiniband/core/uverbs_main.c:1107
>  add_client_context+0x400/0x5e0 drivers/infiniband/core/device.c:677
>  enable_device_and_get+0x1cd/0x3b0 drivers/infiniband/core/device.c:1326
>  ib_register_device drivers/infiniband/core/device.c:1392 [inline]
>  ib_register_device+0xa12/0xda0 drivers/infiniband/core/device.c:1353
>  siw_device_register drivers/infiniband/sw/siw/siw_main.c:70 [inline]
>  siw_newlink drivers/infiniband/sw/siw/siw_main.c:565 [inline]
>  siw_newlink+0xd37/0x1240 drivers/infiniband/sw/siw/siw_main.c:542
>  nldev_newlink+0x29e/0x420 drivers/infiniband/core/nldev.c:1541
>  rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline]
>  rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
>  rdma_nl_rcv+0x586/0x900 drivers/infiniband/core/netlink.c:259
>  netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
>  netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
>  netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
>  sock_sendmsg_nosec net/socket.c:652 [inline]
>  sock_sendmsg+0xcf/0x120 net/socket.c:672
>  sys_sendmsg+0x6e6/0x810 net/socket.c:2352
>  ___sys_sendmsg+0x100/0x170 net/socket.c:2406
>  __sys_sendmsg+0xe5/0x1b0 net/socket.c:2439
>  do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
>  entry_SYSCALL_64_after_hwframe+0x49/0xb3
> 
> Freed by task 

Re: [PATCH net-next v2] dt-bindings: net: rename the bindings document for MediaTek STAR EMAC

2020-05-29 Thread David Miller
From: Bartosz Golaszewski 
Date: Thu, 28 May 2020 15:59:02 +0200

> From: Bartosz Golaszewski 
> 
> The driver itself was renamed before getting merged into mainline, but
> the binding document kept the old name. This makes both names consistent.
> 
> Signed-off-by: Bartosz Golaszewski 
> ---
> v1 -> v2:
> - update the id field as well

Applied, thank you.


Re: [PATCH] NFC: st21nfca: add missed kfree_skb() in an error path

2020-05-29 Thread David Miller
From: Chuhong Yuan 
Date: Thu, 28 May 2020 18:20:37 +0800

> st21nfca_tm_send_atr_res() misses to call kfree_skb() in an error path.
> Add the missed function call to fix it.
> 
> Fixes: 1892bf844ea0 ("NFC: st21nfca: Adding P2P support to st21nfca in 
> Initiator & Target mode")
> Signed-off-by: Chuhong Yuan 

Applied and queued up for -stable, thank you.


Re: [PATCHES] uaccess misc

2020-05-29 Thread Linus Torvalds
On Fri, May 29, 2020 at 4:54 PM Linus Torvalds
 wrote:
>
> My only complaint was that kvm thing that I think should have gone even 
> further.

Oh... And the cc list looks a bit odd.

You cc'd fsdevel, but none of the patches were really to any
filesystem code (ok, the pselect and binfmt thing is in fs/, but
that's kind of incidental and more of a "we split core system calls up
by area too" than any "it's filesystem code").

The kvm patch didn't have anybody from kvm-land cc'd, for example. I
added some to my "this shouldn't use double underscores" reply, but..

  Linus


Re: [PATCHES] uaccess misc

2020-05-29 Thread Linus Torvalds
On Fri, May 29, 2020 at 4:26 PM Al Viro  wrote:
>
> The stuff that doesn't fit anywhere else.  Hopefully
> saner marshalling for weird 7-argument syscalls (pselect6()),

That looked fine to me, btw. Looks like an improvement even outside
the "avoid __get_user()" and double STAC/CLAC issue.

> low-hanging fruit in several binfmt, unsafe_put_user-based
> x86 cp_stat64(), etc. - there's really no common topic here.

My only complaint was that kvm thing that I think should have gone even further.

   Linus


Re: [PATCH 8/9] x86: kvm_hv_set_msr(): use __put_user() instead of 32bit __clear_user()

2020-05-29 Thread Linus Torvalds
On Fri, May 29, 2020 at 4:27 PM Al Viro  wrote:
> a/arch/x86/kvm/hyperv.c
> -   if (__clear_user((void __user *)addr, sizeof(u32)))
> +   if (__put_user(0, (u32 __user *)addr))

I'm not doubting that this is a correct transformation and an
improvement, but why is it using that double-underscore version in the
first place?

There's a __copy_to_user() in kvm_hv_set_msr_pw() in addition to this
one in kvm_hv_set_msr(). Both go back to 2011 and commit 8b0cedff040b
("KVM: use __copy_to_user/__clear_user to write guest page") and both
look purely like "pointlessly avoid the access_ok".

All these KVM "optimizations" seem entirely pointless, since
access_ok() isn't the problem. And the address _claims_ to be
verified, but I'm not seeing it. There is not a single 'access_ok()'
anywhere in arch/x86/kvm/ that I can see.

It looks like the argument for the address being validated is that it
comes from "gfn_to_hva()", which should only return
host-virtual-addresses. That may be true.

But "should" is not "does", and honestly, the cost of gfn_to_hva() is
high enough that then using that as an argument for removing
"access_ok()" smells.

So I would suggest just removing all these completely bogus
double-underscore versions. It's pointless, it's wrong, and it's
unsafe.

This isn't even some critical path, but even if it was, that user
address check isn't where the problems are.

 Linus


Re: [PATCH] HID: usbhid: do not sleep when opening device

2020-05-29 Thread Nicolas Boichat
On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov
 wrote:
>
> usbhid tries to give the device 50 milliseconds to drain its queues
> when opening the device, but does it naively by simply sleeping in open
> handler, which slows down device probing (and thus may affect overall
> boot time).
>
> However we do not need to sleep as we can instead mark a point of time
> in the future when we should start processing the events.
>
> Reported-by: Nicolas Boichat 
> Signed-off-by: Dmitry Torokhov 
> ---
>  drivers/hid/usbhid/hid-core.c | 27 +++
>  drivers/hid/usbhid/usbhid.h   |  1 +
>  2 files changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
> index c7bc9db5b192..e69992e945b2 100644
> --- a/drivers/hid/usbhid/hid-core.c
> +++ b/drivers/hid/usbhid/hid-core.c
> @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid)
> set_bit(HID_NO_BANDWIDTH, >iofl);
> } else {
> clear_bit(HID_NO_BANDWIDTH, >iofl);
> +
> +   if (test_and_clear_bit(HID_RESUME_RUNNING,
> +  >iofl)) {
> +   /*
> +* In case events are generated while nobody 
> was
> +* listening, some are released when the 
> device
> +* is re-opened. Wait 50 msec for the queue to
> +* empty before allowing events to go through
> +* hid.
> +*/
> +   usbhid->input_start_time = jiffies +
> +  
> msecs_to_jiffies(50);
> +   }
> }
> }
> spin_unlock_irqrestore(>lock, flags);
> @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb)
> if (!test_bit(HID_OPENED, >iofl))
> break;
> usbhid_mark_busy(usbhid);
> -   if (!test_bit(HID_RESUME_RUNNING, >iofl)) {
> +   if (!test_bit(HID_RESUME_RUNNING, >iofl) &&
> +   time_after(jiffies, usbhid->input_start_time)) {

Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 days...)

> hid_input_report(urb->context, HID_INPUT_REPORT,
>  urb->transfer_buffer,
>  urb->actual_length, 1);
> @@ -714,17 +728,6 @@ static int usbhid_open(struct hid_device *hid)
> }
>
> usb_autopm_put_interface(usbhid->intf);
> -
> -   /*
> -* In case events are generated while nobody was listening,
> -* some are released when the device is re-opened.
> -* Wait 50 msec for the queue to empty before allowing events
> -* to go through hid.
> -*/
> -   if (res == 0)
> -   msleep(50);
> -
> -   clear_bit(HID_RESUME_RUNNING, >iofl);
> return res;
>  }
>
> diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h
> index 8620408bd7af..805949671b96 100644
> --- a/drivers/hid/usbhid/usbhid.h
> +++ b/drivers/hid/usbhid/usbhid.h
> @@ -82,6 +82,7 @@ struct usbhid_device {
>
> spinlock_t lock;/* 
> fifo spinlock */
> unsigned long iofl; /* 
> I/O flags (CTRL_RUNNING, OUT_RUNNING) */
> +   unsigned long input_start_time; /* 
> When to start handling input, in jiffies */
> struct timer_list io_retry; /* 
> Retry timer */
> unsigned long stop_retry;   /* 
> Time to give up, in jiffies */
> unsigned int retry_delay;   /* 
> Delay length in ms */
> --
> 2.27.0.rc0.183.gde8f92d652-goog
>
>
> --
> Dmitry


Re: [PATCH v2] hwmon:(adm1275) Enable adm1278 ADM1278_TEMP1_EN

2020-05-29 Thread Vijay Khemka


On 5/29/20, 11:56 AM, "Guenter Roeck"  wrote:

On 5/29/20 10:57 AM, Vijay Khemka wrote:
> 
> 
> On 5/29/20, 5:47 AM, "Manikandan Elumalai" 
 wrote:
> 
> The adm1278 temperature sysfs attribute need it for one of the 
openbmc platform . 
> This functionality is not enabled by default, so PMON_CONFIG needs to 
be modified in order to enable it.
> 
> Signed-off-by   : Manikandan Elumalai 

> 
> v2:
>- Add Signed-off-by.
>- Removed ADM1278_TEMP1_EN check.
> ---
>  drivers/hwmon/pmbus/adm1275.c | 21 +
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/hwmon/pmbus/adm1275.c 
b/drivers/hwmon/pmbus/adm1275.c
> index 5caa37fb..ab5fceb 100644
> --- a/drivers/hwmon/pmbus/adm1275.c
> +++ b/drivers/hwmon/pmbus/adm1275.c
> @@ -666,7 +666,23 @@ static int adm1275_probe(struct i2c_client 
*client,
>   tindex = 3;
>  
>   info->func[0] |= PMBUS_HAVE_PIN | 
PMBUS_HAVE_STATUS_INPUT |
> - PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT;
> + PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
> + PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
> +
> + config = i2c_smbus_read_byte_data(client, 
ADM1275_PMON_CONFIG);
> + if (config < 0)
> + return config;
> +
> + /* Enable TEMP1 by default */
> + config |= ADM1278_TEMP1_EN;
> + ret = i2c_smbus_write_byte_data(client,
> + ADM1275_PMON_CONFIG,
> + config);
> + if (ret < 0) {
> + dev_err(>dev,
> + "Failed to enable temperature config\n");
> + return -ENODEV;
> + }
> You don't need this above code removing check as below should be enough  
to
> populate sysfs entry you need.
> 

You mean you are only interested in having the attribute, even if it doesn't
report anything useful because monitoring is disabled in the chip ?
Sorry, I don't understand. Can you elaborate ?
Sorry for misinterpretation, No I don't mean that. This should be fine.

Thanks,
Guenter

>   /* Enable VOUT if not enabled (it is disabled by 
default) */
>   if (!(config & ADM1278_VOUT_EN)) {
> @@ -681,9 +697,6 @@ static int adm1275_probe(struct i2c_client 
*client,
>   }
>   }
>  
> - if (config & ADM1278_TEMP1_EN)
> - info->func[0] |=
> - PMBUS_HAVE_TEMP | 
PMBUS_HAVE_STATUS_TEMP;
>   if (config & ADM1278_VIN_EN)
>   info->func[0] |= PMBUS_HAVE_VIN;
>   break;
> -- 
> 2.7.4
> 
> 
> 




[PATCH 2/2] vhost: convert get_user_pages() --> pin_user_pages()

2020-05-29 Thread John Hubbard
This code was using get_user_pages*(), in approximately a "Case 5"
scenario (accessing the data within a page), using the categorization
from [1]. That means that it's time to convert the get_user_pages*() +
put_page() calls to pin_user_pages*() + unpin_user_pages() calls.

There is some helpful background in [2]: basically, this is a small
part of fixing a long-standing disconnect between pinning pages, and
file systems' use of those pages.

[1] Documentation/core-api/pin_user_pages.rst

[2] "Explicit pinning of user-space pages":
https://lwn.net/Articles/807108/

Cc: Michael S. Tsirkin 
Cc: Jason Wang 
Cc: k...@vger.kernel.org
Cc: virtualizat...@lists.linux-foundation.org
Cc: net...@vger.kernel.org
Signed-off-by: John Hubbard 
---
 drivers/vhost/vhost.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 21a59b598ed8..596132a96cd5 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1762,15 +1762,14 @@ static int set_bit_to_user(int nr, void __user *addr)
int bit = nr + (log % PAGE_SIZE) * 8;
int r;
 
-   r = get_user_pages_fast(log, 1, FOLL_WRITE, );
+   r = pin_user_pages_fast(log, 1, FOLL_WRITE, );
if (r < 0)
return r;
BUG_ON(r != 1);
base = kmap_atomic(page);
set_bit(bit, base);
kunmap_atomic(base);
-   set_page_dirty_lock(page);
-   put_page(page);
+   unpin_user_pages_dirty_lock(, 1, true);
return 0;
 }
 
-- 
2.26.2



[PATCH 1/2] docs: mm/gup: pin_user_pages.rst: add a "case 5"

2020-05-29 Thread John Hubbard
There are four cases listed in pin_user_pages.rst. These are
intended to help developers figure out whether to use
get_user_pages*(), or pin_user_pages*(). However, the four cases
do not cover all the situations. For example, drivers/vhost/vhost.c
has a "pin, write to page, set page dirty, unpin" case.

Add a fifth case, to help explain that there is a general pattern
that requires pin_user_pages*() API calls.

Cc: Vlastimil Babka 
Cc: Jan Kara 
Cc: Jérôme Glisse 
Cc: Dave Chinner 
Cc: Jonathan Corbet 
Cc: linux-...@vger.kernel.org
Cc: linux-fsde...@vger.kernel.org
Signed-off-by: John Hubbard 
---
 Documentation/core-api/pin_user_pages.rst | 20 
 1 file changed, 20 insertions(+)

diff --git a/Documentation/core-api/pin_user_pages.rst 
b/Documentation/core-api/pin_user_pages.rst
index 4675b04e8829..b9f2688a2c67 100644
--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -171,6 +171,26 @@ If only struct page data (as opposed to the actual memory 
contents that a page
 is tracking) is affected, then normal GUP calls are sufficient, and neither 
flag
 needs to be set.
 
+CASE 5: Pinning in order to write to the data within the page
+-
+Even though neither DMA nor Direct IO is involved, just a simple case of "pin,
+access page's data, unpin" can cause a problem. Case 5 may be considered a
+superset of Case 1, plus Case 2, plus anything that invokes that pattern. In
+other words, if the code is neither Case 1 nor Case 2, it may still require
+FOLL_PIN, for patterns like this:
+
+Correct (uses FOLL_PIN calls):
+pin_user_pages()
+access the data within the pages
+set_page_dirty_lock()
+unpin_user_pages()
+
+INCORRECT (uses FOLL_GET calls):
+get_user_pages()
+access the data within the pages
+set_page_dirty_lock()
+put_page()
+
 page_maybe_dma_pinned(): the whole point of pinning
 ===
 
-- 
2.26.2



[PATCH 0/2] vhost, docs: convert to pin_user_pages(), new "case 5"

2020-05-29 Thread John Hubbard
Hi,

It recently became clear to me that there are some get_user_pages*()
callers that don't fit neatly into any of the four cases that are so
far listed in pin_user_pages.rst. vhost.c is one of those.

Add a Case 5 to the documentation, and refer to that when converting
vhost.c.

Thanks to Jan Kara for helping me (again) in understanding the
interaction between get_user_pages() and page writeback [1].

This is based on today's mmotm, which has a nearby patch to
pin_user_pages.rst that rewords cases 3 and 4.

Note that I have only compile-tested the vhost.c patch, although that
does also include cross-compiling for a few other arches. Any run-time
testing would be greatly appreciated.

[1] https://lore.kernel.org/r/20200529070343.gl14...@quack2.suse.cz

John Hubbard (2):
  docs: mm/gup: pin_user_pages.rst: add a "case 5"
  vhost: convert get_user_pages() --> pin_user_pages()

 Documentation/core-api/pin_user_pages.rst | 20 
 drivers/vhost/vhost.c |  5 ++---
 2 files changed, 22 insertions(+), 3 deletions(-)

-- 
2.26.2



[PATCH 3/4] hpsa: get rid of compat_alloc_user_space()

2020-05-29 Thread Al Viro
From: Al Viro 

no need for building a native struct on kernel stack, copying
it to userland one, then calling hpsa_ioctl() which copies it
back into _another_ instance of the same struct.

Signed-off-by: Al Viro 
---
 drivers/scsi/hpsa.c | 80 -
 1 file changed, 36 insertions(+), 44 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 64fd97272109..c7fbe56891ef 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -254,6 +254,10 @@ static irqreturn_t do_hpsa_intr_intx(int irq, void 
*dev_id);
 static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id);
 static int hpsa_ioctl(struct scsi_device *dev, unsigned int cmd,
  void __user *arg);
+static int hpsa_passthru_ioctl(struct ctlr_info *h,
+  IOCTL_Command_struct *iocommand);
+static int hpsa_big_passthru_ioctl(struct ctlr_info *h,
+  BIG_IOCTL_Command_struct *ioc);
 
 #ifdef CONFIG_COMPAT
 static int hpsa_compat_ioctl(struct scsi_device *dev, unsigned int cmd,
@@ -6217,75 +6221,63 @@ static void cmd_free(struct ctlr_info *h, struct 
CommandList *c)
 static int hpsa_ioctl32_passthru(struct scsi_device *dev, unsigned int cmd,
void __user *arg)
 {
-   IOCTL32_Command_struct __user *arg32 =
-   (IOCTL32_Command_struct __user *) arg;
+   struct ctlr_info *h = sdev_to_hba(dev);
+   IOCTL32_Command_struct __user *arg32 = arg;
IOCTL_Command_struct arg64;
-   IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64));
int err;
u32 cp;
 
-   memset(, 0, sizeof(arg64));
-   err = 0;
-   err |= copy_from_user(_info, >LUN_info,
-  sizeof(arg64.LUN_info));
-   err |= copy_from_user(, >Request,
-  sizeof(arg64.Request));
-   err |= copy_from_user(_info, >error_info,
-  sizeof(arg64.error_info));
-   err |= get_user(arg64.buf_size, >buf_size);
-   err |= get_user(cp, >buf);
-   arg64.buf = compat_ptr(cp);
-   err |= copy_to_user(p, , sizeof(arg64));
+   if (!arg)
+   return -EINVAL;
 
-   if (err)
+   memset(, 0, sizeof(arg64));
+   if (copy_from_user(, arg32, offsetof(IOCTL_Command_struct, buf)))
+   return -EFAULT;
+   if (get_user(cp, >buf))
return -EFAULT;
+   arg64.buf = compat_ptr(cp);
 
-   err = hpsa_ioctl(dev, CCISS_PASSTHRU, p);
+   if (atomic_dec_if_positive(>passthru_cmds_avail) < 0)
+   return -EAGAIN;
+   err = hpsa_passthru_ioctl(h, );
+   atomic_inc(>passthru_cmds_avail);
if (err)
return err;
-   err |= copy_in_user(>error_info, >error_info,
-sizeof(arg32->error_info));
-   if (err)
+   if (copy_to_user(>error_info, _info,
+sizeof(arg32->error_info)))
return -EFAULT;
-   return err;
+   return 0;
 }
 
 static int hpsa_ioctl32_big_passthru(struct scsi_device *dev,
unsigned int cmd, void __user *arg)
 {
-   BIG_IOCTL32_Command_struct __user *arg32 =
-   (BIG_IOCTL32_Command_struct __user *) arg;
+   struct ctlr_info *h = sdev_to_hba(dev);
+   BIG_IOCTL32_Command_struct __user *arg32 = arg;
BIG_IOCTL_Command_struct arg64;
-   BIG_IOCTL_Command_struct __user *p =
-   compat_alloc_user_space(sizeof(arg64));
int err;
u32 cp;
 
+   if (!arg)
+   return -EINVAL;
memset(, 0, sizeof(arg64));
-   err = 0;
-   err |= copy_from_user(_info, >LUN_info,
-  sizeof(arg64.LUN_info));
-   err |= copy_from_user(, >Request,
-  sizeof(arg64.Request));
-   err |= copy_from_user(_info, >error_info,
-  sizeof(arg64.error_info));
-   err |= get_user(arg64.buf_size, >buf_size);
-   err |= get_user(arg64.malloc_size, >malloc_size);
-   err |= get_user(cp, >buf);
-   arg64.buf = compat_ptr(cp);
-   err |= copy_to_user(p, , sizeof(arg64));
-
-   if (err)
+   if (copy_from_user(, arg32,
+  offsetof(BIG_IOCTL32_Command_struct, buf)))
return -EFAULT;
+   if (get_user(cp, >buf))
+   return -EFAULT;
+   arg64.buf = compat_ptr(cp);
 
-   err = hpsa_ioctl(dev, CCISS_BIG_PASSTHRU, p);
+   if (atomic_dec_if_positive(>passthru_cmds_avail) < 0)
+   return -EAGAIN;
+   err = hpsa_big_passthru_ioctl(h, );
+   atomic_inc(>passthru_cmds_avail);
if (err)
return err;
-   err |= copy_in_user(>error_info, >error_info,
-sizeof(arg32->error_info));
-   if (err)
+   if (copy_to_user(>error_info, _info,
+sizeof(arg32->error_info)))
return -EFAULT;
-   return err;
+   return 0;
 }
 
 

[PATCH 1/4] hpsa passthrough: lift {BIG_,}IOCTL_Command_struct copy{in,out} into hpsa_ioctl()

2020-05-29 Thread Al Viro
From: Al Viro 

Signed-off-by: Al Viro 
---
 drivers/scsi/hpsa.c | 116 +---
 1 file changed, 56 insertions(+), 60 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 1e9302e99d05..3344a06c938e 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -6358,37 +6358,33 @@ static int hpsa_getdrivver_ioctl(struct ctlr_info *h, 
void __user *argp)
return 0;
 }
 
-static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
+static int hpsa_passthru_ioctl(struct ctlr_info *h,
+  IOCTL_Command_struct *iocommand)
 {
-   IOCTL_Command_struct iocommand;
struct CommandList *c;
char *buff = NULL;
u64 temp64;
int rc = 0;
 
-   if (!argp)
-   return -EINVAL;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
-   if (copy_from_user(, argp, sizeof(iocommand)))
-   return -EFAULT;
-   if ((iocommand.buf_size < 1) &&
-   (iocommand.Request.Type.Direction != XFER_NONE)) {
+   if ((iocommand->buf_size < 1) &&
+   (iocommand->Request.Type.Direction != XFER_NONE)) {
return -EINVAL;
}
-   if (iocommand.buf_size > 0) {
-   buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
+   if (iocommand->buf_size > 0) {
+   buff = kmalloc(iocommand->buf_size, GFP_KERNEL);
if (buff == NULL)
return -ENOMEM;
-   if (iocommand.Request.Type.Direction & XFER_WRITE) {
+   if (iocommand->Request.Type.Direction & XFER_WRITE) {
/* Copy the data into the buffer we created */
-   if (copy_from_user(buff, iocommand.buf,
-   iocommand.buf_size)) {
+   if (copy_from_user(buff, iocommand->buf,
+   iocommand->buf_size)) {
rc = -EFAULT;
goto out_kfree;
}
} else {
-   memset(buff, 0, iocommand.buf_size);
+   memset(buff, 0, iocommand->buf_size);
}
}
c = cmd_alloc(h);
@@ -6398,23 +6394,23 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, 
void __user *argp)
c->scsi_cmd = SCSI_CMD_BUSY;
/* Fill in Command Header */
c->Header.ReplyQueue = 0; /* unused in simple mode */
-   if (iocommand.buf_size > 0) {   /* buffer to fill */
+   if (iocommand->buf_size > 0) {  /* buffer to fill */
c->Header.SGList = 1;
c->Header.SGTotal = cpu_to_le16(1);
} else  { /* no buffers to fill */
c->Header.SGList = 0;
c->Header.SGTotal = cpu_to_le16(0);
}
-   memcpy(>Header.LUN, _info, sizeof(c->Header.LUN));
+   memcpy(>Header.LUN, >LUN_info, sizeof(c->Header.LUN));
 
/* Fill in Request block */
-   memcpy(>Request, ,
+   memcpy(>Request, >Request,
sizeof(c->Request));
 
/* Fill in the scatter gather information */
-   if (iocommand.buf_size > 0) {
+   if (iocommand->buf_size > 0) {
temp64 = dma_map_single(>pdev->dev, buff,
-   iocommand.buf_size, DMA_BIDIRECTIONAL);
+   iocommand->buf_size, DMA_BIDIRECTIONAL);
if (dma_mapping_error(>pdev->dev, (dma_addr_t) temp64)) {
c->SG[0].Addr = cpu_to_le64(0);
c->SG[0].Len = cpu_to_le32(0);
@@ -6422,12 +6418,12 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, 
void __user *argp)
goto out;
}
c->SG[0].Addr = cpu_to_le64(temp64);
-   c->SG[0].Len = cpu_to_le32(iocommand.buf_size);
+   c->SG[0].Len = cpu_to_le32(iocommand->buf_size);
c->SG[0].Ext = cpu_to_le32(HPSA_SG_LAST); /* not chaining */
}
rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE,
NO_TIMEOUT);
-   if (iocommand.buf_size > 0)
+   if (iocommand->buf_size > 0)
hpsa_pci_unmap(h->pdev, c, 1, DMA_BIDIRECTIONAL);
check_ioctl_unit_attention(h, c);
if (rc) {
@@ -6436,16 +6432,12 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, 
void __user *argp)
}
 
/* Copy the error information out */
-   memcpy(_info, c->err_info,
-   sizeof(iocommand.error_info));
-   if (copy_to_user(argp, , sizeof(iocommand))) {
-   rc = -EFAULT;
-   goto out;
-   }
-   if ((iocommand.Request.Type.Direction & XFER_READ) &&
-   iocommand.buf_size > 0) {
+   memcpy(>error_info, c->err_info,
+   sizeof(iocommand->error_info));
+   if ((iocommand->Request.Type.Direction & XFER_READ) &&
+   

[PATCH 4/4] hpsa_ioctl(): tidy up a bit

2020-05-29 Thread Al Viro
From: Al Viro 

Signed-off-by: Al Viro 
---
 drivers/scsi/hpsa.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index c7fbe56891ef..81d0414e2117 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -6577,14 +6577,11 @@ static void check_ioctl_unit_attention(struct ctlr_info 
*h,
  * ioctl
  */
 static int hpsa_ioctl(struct scsi_device *dev, unsigned int cmd,
- void __user *arg)
+ void __user *argp)
 {
-   struct ctlr_info *h;
-   void __user *argp = (void __user *)arg;
+   struct ctlr_info *h = sdev_to_hba(dev);
int rc;
 
-   h = sdev_to_hba(dev);
-
switch (cmd) {
case CCISS_DEREGDISK:
case CCISS_REGNEWDISK:
-- 
2.11.0



  1   2   3   4   5   6   7   8   9   10   >