[PATCH net-next] mlx5: Restore err assignment in mlx5_mdev_init
Clang warns: drivers/net/ethernet/mellanox/mlx5/core/main.c:1278:6: warning: variable 'err' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (!priv->dbg_root) { ^~~ drivers/net/ethernet/mellanox/mlx5/core/main.c:1303:9: note: uninitialized use occurs here return err; ^~~ drivers/net/ethernet/mellanox/mlx5/core/main.c:1278:2: note: remove the 'if' if its condition is always false if (!priv->dbg_root) { ^~ drivers/net/ethernet/mellanox/mlx5/core/main.c:1259:9: note: initialize the variable 'err' to silence this warning int err; ^ = 0 1 warning generated. This path previously returned -ENOMEM, restore that error code so that it is not uninitialized. Fixes: 810cbb25549b ("net/mlx5: Add missing mutex destroy") Link: https://github.com/ClangBuiltLinux/linux/issues/1042 Signed-off-by: Nathan Chancellor --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index df46b1fce3a7..ac68445fde2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1277,6 +1277,7 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mlx5_debugfs_root); if (!priv->dbg_root) { dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n"); + err = -ENOMEM; goto err_dbg_root; } base-commit: c0cc73b79123e67b212bd537a7af88e52c9fbeac -- 2.27.0.rc0
Re: [PATCH v8 14/18] EFI: Introduce the new AMD Memory Encryption GUID.
Hello Steve, On Fri, May 29, 2020 at 07:07:56PM -0700, Steve Rutherford wrote: > On Tue, May 5, 2020 at 2:20 PM Ashish Kalra wrote: > > > > From: Ashish Kalra > > > > Introduce a new AMD Memory Encryption GUID which is currently > > used for defining a new UEFI enviroment variable which indicates > > UEFI/OVMF support for the SEV live migration feature. This variable > > is setup when UEFI/OVMF detects host/hypervisor support for SEV > > live migration and later this variable is read by the kernel using > > EFI runtime services to verify if OVMF supports the live migration > > feature. > > > > Signed-off-by: Ashish Kalra > > --- > > include/linux/efi.h | 1 + > > 1 file changed, 1 insertion(+) > > > > diff --git a/include/linux/efi.h b/include/linux/efi.h > > index 251f1f783cdf..2efb42ccf3a8 100644 > > --- a/include/linux/efi.h > > +++ b/include/linux/efi.h > > @@ -358,6 +358,7 @@ void efi_native_runtime_setup(void); > > > > /* OEM GUIDs */ > > #define DELLEMC_EFI_RCI2_TABLE_GUIDEFI_GUID(0x2d9f28a2, > > 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55) > > +#define MEM_ENCRYPT_GUID EFI_GUID(0x0cf29b71, > > 0x9e51, 0x433a, 0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75) > > > > typedef struct { > > efi_guid_t guid; > > -- > > 2.17.1 > > > Have you gotten this GUID upstreamed into edk2? > Not yet. This patch and the other OVMF patches are ready to be sent for upstreaming, i was waiting for this kernel patch-set to be accepted and upstreamed. > Reviewed-by: Steve Rutherford
Re: [PATCH v8 12/18] KVM: SVM: Add support for static allocation of unified Page Encryption Bitmap.
Hello Steve, On Fri, May 29, 2020 at 07:07:33PM -0700, Steve Rutherford wrote: > On Tue, May 5, 2020 at 2:18 PM Ashish Kalra wrote: > > > > From: Ashish Kalra > > > > Add support for static allocation of the unified Page encryption bitmap by > > extending kvm_arch_commit_memory_region() callack to add svm specific > > x86_ops > > which can read the userspace provided memory region/memslots and calculate > > the amount of guest RAM managed by the KVM and grow the bitmap based > > on that information, i.e. the highest guest PA that is mapped by a memslot. > > > > Signed-off-by: Ashish Kalra > > --- > > arch/x86/include/asm/kvm_host.h | 1 + > > arch/x86/kvm/svm/sev.c | 35 + > > arch/x86/kvm/svm/svm.c | 1 + > > arch/x86/kvm/svm/svm.h | 1 + > > arch/x86/kvm/x86.c | 5 + > > 5 files changed, 43 insertions(+) > > > > diff --git a/arch/x86/include/asm/kvm_host.h > > b/arch/x86/include/asm/kvm_host.h > > index fc74144d5ab0..b573ea85b57e 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1254,6 +1254,7 @@ struct kvm_x86_ops { > > > > bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); > > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > > + void (*commit_memory_region)(struct kvm *kvm, enum kvm_mr_change > > change); > > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > > unsigned long sz, unsigned long mode); > > int (*get_page_enc_bitmap)(struct kvm *kvm, > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > > index 30efc1068707..c0d7043a0627 100644 > > --- a/arch/x86/kvm/svm/sev.c > > +++ b/arch/x86/kvm/svm/sev.c > > @@ -1377,6 +1377,41 @@ static int sev_resize_page_enc_bitmap(struct kvm > > *kvm, unsigned long new_size) > > return 0; > > } > > > > +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change) > > +{ > > + struct kvm_memslots *slots; > > + struct kvm_memory_slot *memslot; > > + gfn_t start, end = 0; > > + > > + spin_lock(>mmu_lock); > > + if (change == KVM_MR_CREATE) { > > + slots = kvm_memslots(kvm); > > + kvm_for_each_memslot(memslot, slots) { > > + start = memslot->base_gfn; > > + end = memslot->base_gfn + memslot->npages; > > + /* > > +* KVM memslots is a sorted list, starting with > > +* the highest mapped guest PA, so pick the topmost > > +* valid guest PA. > > +*/ > > + if (memslot->npages) > > + break; > > + } > > + } > > + spin_unlock(>mmu_lock); > > + > > + if (end) { > > + /* > > +* NORE: This callback is invoked in vm ioctl > > +* set_user_memory_region, hence we can use a > > +* mutex here. > > +*/ > > + mutex_lock(>lock); > > + sev_resize_page_enc_bitmap(kvm, end); > > + mutex_unlock(>lock); > > + } > > +} > > + > > int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > > unsigned long npages, unsigned long enc) > > { > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > > index 501e82f5593c..442adbbb0641 100644 > > --- a/arch/x86/kvm/svm/svm.c > > +++ b/arch/x86/kvm/svm/svm.c > > @@ -4015,6 +4015,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { > > > > .check_nested_events = svm_check_nested_events, > > > > + .commit_memory_region = svm_commit_memory_region, > > .page_enc_status_hc = svm_page_enc_status_hc, > > .get_page_enc_bitmap = svm_get_page_enc_bitmap, > > .set_page_enc_bitmap = svm_set_page_enc_bitmap, > > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > > index 2ebdcce50312..fd99e0a5417a 100644 > > --- a/arch/x86/kvm/svm/svm.h > > +++ b/arch/x86/kvm/svm/svm.h > > @@ -406,6 +406,7 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned > > long gpa, > > unsigned long npages, unsigned long enc); > > int svm_get_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap > > *bmap); > > int svm_set_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap > > *bmap); > > +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change); > > > > /* avic.c */ > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index c4166d7a0493..8938de868d42 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -10133,6 +10133,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > > kvm_mmu_change_mmu_pages(kvm, > >
Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier
On Sat, May 30, 2020 at 03:58:18AM +, Sargun Dhillon wrote: > Isn't the "right" way to do this to allocate a bunch of file descriptors, > and fill up the user buffer with them, and then install the files? This > seems to like half-install the file descriptors and then error out. > > I know that's the current behaviour, but that seems like a bad idea. Do > we really want to perpetuate this half-broken state? I guess that some > userspace programs could be depending on this -- and their recovery > semantics could rely on this. I mean this is 10+ year old code. Right -- my instincts on this are to leave the behavior as-is. I've been burned by so many "nothing could possible rely on THIS" cases. ;) It might be worth adding a comment (or commit log note) that describes the alternative you suggest here. But I think building a common helper that does all of the work (and will get used in three^Wfour places now) is the correct way to refactor this. Oh hey! Look at scm_detach_fds_compat(). It needs this too. (And it's missing the cgroup tracking.) That would fix: 48a87cc26c13 ("net: netprio: fd passed in SCM_RIGHTS datagram not set correctly") d84295067fc7 ("net: net_cls: fd passed in SCM_RIGHTS datagram not set correctly") So, yes, let's get this fixed up. I'd say first fix the missing sock update in the compat path (so it can be CCed stable). Then fix the missing sock update in pidfd_getfd() (so it can be CCed stable), then write the helper with a refactoring of scm_detach_fds(), scm_detach_fds_compat(), and pidfd_getfd(). And then add the addfd seccomp user_notif ioctl cmd. -- Kees Cook
[PATCH V2] mm, memory_failure: don't send BUS_MCEERR_AO for action required error
Some processes dont't want to be killed early, but in "Action Required" case, those also may be killed by BUS_MCEERR_AO when sharing memory with other which is accessing the fail memory. And sending SIGBUS with BUS_MCEERR_AO for action required error is strange, so ignore the non-current processes here. Suggested-by: Naoya Horiguchi Signed-off-by: Wetp Zhang --- mm/memory-failure.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a96364be8ab4..dd3862fcf2e9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -210,14 +210,17 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags) { struct task_struct *t = tk->tsk; short addr_lsb = tk->size_shift; - int ret; + int ret = 0; - pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", - pfn, t->comm, t->pid); + if ((t->mm == current->mm) || !(flags & MF_ACTION_REQUIRED)) + pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", + pfn, t->comm, t->pid); - if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) { - ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr, - addr_lsb); + if (flags & MF_ACTION_REQUIRED) { + if (t->mm == current->mm) + ret = force_sig_mceerr(BUS_MCEERR_AR, +(void __user *)tk->addr, addr_lsb); + /* send no signal to non-current processes */ } else { /* * Don't use force here, it's convenient if the signal -- 2.14.3 (Apple Git-98)
Re: [PATCH 9/9] staging: media: atomisp: add PMIC_OPREGION dependency
Em Fri, 29 May 2020 20:11:29 -0700 Nathan Chancellor escreveu: > On Fri, May 29, 2020 at 10:00:31PM +0200, Arnd Bergmann wrote: > > Without that driver, there is a link failure in > > > > ERROR: modpost: "intel_soc_pmic_exec_mipi_pmic_seq_element" > > [drivers/staging/media/atomisp/pci/atomisp_gmin_platform.ko] undefined! > > > > Add an explicit Kconfig dependency. > > > > Signed-off-by: Arnd Bergmann > > It'd be interesting to know if this is strictly required for the driver > to work properly. It is. Without OpRegion, the driver can't power on the camera sensors. > The call to intel_soc_pmic_exec_mipi_pmic_seq_element > has some error handling after it, maybe that should just be surrounded > by an #ifdef or IS_ENABLED for PMIC_OPREGION, like some other drivers > do. > > Regardless of that: > > Reviewed-by: Nathan Chancellor > > > --- > > drivers/staging/media/atomisp/Kconfig | 1 + > > 1 file changed, 1 insertion(+) > > > > diff --git a/drivers/staging/media/atomisp/Kconfig > > b/drivers/staging/media/atomisp/Kconfig > > index c4f3049b0706..e86311c14329 100644 > > --- a/drivers/staging/media/atomisp/Kconfig > > +++ b/drivers/staging/media/atomisp/Kconfig > > @@ -11,6 +11,7 @@ menuconfig INTEL_ATOMISP > > config VIDEO_ATOMISP > > tristate "Intel Atom Image Signal Processor Driver" > > depends on VIDEO_V4L2 && INTEL_ATOMISP > > + depends on PMIC_OPREGION > > select IOSF_MBI > > select VIDEOBUF_VMALLOC > > ---help--- > > -- > > 2.26.2 > > Thanks, Mauro
Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier
On Sat, May 30, 2020 at 05:17:24AM +0200, Jann Horn wrote: > On Sat, May 30, 2020 at 4:43 AM Kees Cook wrote: > > I mean, yes, that's certainly better, but it just seems a shame that > > everyone has to do the get_unused/put_unused dance just because of how > > SCM_RIGHTS does this weird put_user() in the middle. > > > > Can anyone clarify the expected failure mode from SCM_RIGHTS? Can we > > move the put_user() after instead? > > Honestly, I think trying to remove file descriptors and such after > -EFAULT is a waste of time. If userspace runs into -EFAULT, userspace > is beyond saving and can't really do much other than exit immediately. > There are a bunch of places that will change state and then throw > -EFAULT at the end if userspace supplied an invalid address, because > trying to hold locks across userspace accesses just in case userspace > supplied a bogus address is kinda silly (and often borderline > impossible). Logically, I agree. I'm more worried about the behavioral change -- if we don't remove the fd on failure, the fd is installed with no indication to the process that it exists (it won't know the close it -- if it keeps running -- and it may survive across exec). Before, it never entered the file table. > You can actually see that even scm_detach_fds() currently just > silently swallows errors if writing some header fields fails at the > end. Yeah, and it's a corner case. But it should be possible (trivial, even) to clean up on failure to retain the original results. -- Kees Cook
Re: [PATCH 2/2] exec: Compute file based creds only once
On Fri, May 29, 2020 at 10:28:41PM -0500, Eric W. Biederman wrote: > The range-diff winds up being: > 1: c9258ef4879b ! 1: a7868323c263 exec: Add a per bprm->file version of > per_clear > @@ Commit message > > History Tree: > git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git > Fixes: 1bb0fa189c6a ("[PATCH] NX: clean up legacy binary support") > +Reviewed-by: Kees Cook > Signed-off-by: "Eric W. Biederman" > > ## fs/exec.c ## > @@ include/linux/lsm_hooks.h >* transitions between security domains). >* The hook must set @bprm->active_secureexec to 1 if AT_SECURE > should be set to >* request libc enable secure mode. > -+ * The hook must set @bprm->pf_per_clear to the personality flags > that > -+ * should be cleared from current->personality. > ++ * The hook must add to @bprm->pf_per_clear any personality flags > that > ++ * should be cleared from current->personality. >* @bprm contains the linux_binprm structure. >* Return 0 if the hook is successful and permission is granted. >* @bprm_check_security: > 2: e6f20c69b96e ! 2: 56305aa9b6fa exec: Compute file based creds only once > @@ Commit message > secureity attribute and derive capabilities from the fact the > user had uid 0 has been added. > > +Reviewed-by: Kees Cook > Signed-off-by: "Eric W. Biederman" > > ## fs/binfmt_misc.c ## > @@ include/linux/lsm_hooks.h > + * between security domains). > + * The hook must set @bprm->secureexec to 1 if AT_SECURE should be > set to >* request libc enable secure mode. > -- * The hook must set @bprm->pf_per_clear to the personality flags > that > -+ * The hook must set @bprm->per_clear to the personality flags that > - * should be cleared from current->personality. > +- * The hook must add to @bprm->pf_per_clear any personality flags > that > ++ * The hook must add to @bprm->per_clear any personality flags that > + * should be cleared from current->personality. >* @bprm contains the linux_binprm structure. >* Return 0 if the hook is successful and permission is granted. Awesome; thanks! > > The cap_ambient_invariant_ok() test is needlessly repeated: it doesn't > > examine securebits, and nonroot_raised_pE appears to have no > > side-effects. > > > > One of those can be dropped, yes? > > That is what it looks like to me. Okay, cool. I was worried I was missing something in the mess of tiny helper calls. :) > I hope that when the dust clears the function can become a > straightforward implementation of the capability equations. > We will see. Yeah, this looks better and better every day! I'm glad you're able to dig through all of this. -- Kees Cook
Re: [PATCH 1/2] exec: Add a per bprm->file version of per_clear
On Fri, May 29, 2020 at 10:23:58PM -0500, Eric W. Biederman wrote: > Kees Cook writes: > > I wish we had more robust execve tests. :( > > I think you have more skill at writing automated tests than I do. So > feel free to write some. Yeah, my limiting factor is available time. No worries; I didn't mean it as a request to you -- it was more a commiseration. :) -- Kees Cook
Re: [PATCH 17/30] KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit
On 30/05/20 04:06, Krish Sadhukhan wrote: >> >> - nested_vmcb->control.int_ctl = vmcb->control.int_ctl; >> - nested_vmcb->control.int_vector = vmcb->control.int_vector; > > > While it's not related to this patch, I am wondering if we need the > following line in enter_svm_guest_mode(): > > svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; > > If int_vector is used only to trigger a #VMEXIT after we have decided to > inject a virtual interrupt, we probably don't the vector value from the > nested vmcb. KVM does not use int_vector, but other hypervisor sometimes trigger virtual interrupts using int_ctl+int_vector instead of eventinj. (Unlike KVM they don't intercept EXIT_VINTR). Hyper-V operates like that. Paolo
[PATCH v2] locking/Kconfig: Don't forcefully uninline spin_unlock for PREEMPT
From: Sultan Alsawaf This change was originally done in 2005 without any justification in commit bda98685b855 ("[PATCH] x86: inline spin_unlock if !CONFIG_DEBUG_SPINLOCK and !CONFIG_PREEMPT"). Perhaps the reasoning at the time was that PREEMPT was still considered unstable and needed extra debugging; however, this is no longer the case, so remove the artificial limitation. Signed-off-by: Sultan Alsawaf --- kernel/Kconfig.locks | 10 +- kernel/Kconfig.preempt | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 3de8fd11873b..2d992f5c6ec3 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -139,7 +139,7 @@ config INLINE_SPIN_UNLOCK_BH config INLINE_SPIN_UNLOCK_IRQ def_bool y - depends on !PREEMPTION || ARCH_INLINE_SPIN_UNLOCK_IRQ + depends on ARCH_INLINE_SPIN_UNLOCK_IRQ config INLINE_SPIN_UNLOCK_IRQRESTORE def_bool y @@ -168,7 +168,7 @@ config INLINE_READ_LOCK_IRQSAVE config INLINE_READ_UNLOCK def_bool y - depends on !PREEMPTION || ARCH_INLINE_READ_UNLOCK + depends on ARCH_INLINE_READ_UNLOCK config INLINE_READ_UNLOCK_BH def_bool y @@ -176,7 +176,7 @@ config INLINE_READ_UNLOCK_BH config INLINE_READ_UNLOCK_IRQ def_bool y - depends on !PREEMPTION || ARCH_INLINE_READ_UNLOCK_IRQ + depends on ARCH_INLINE_READ_UNLOCK_IRQ config INLINE_READ_UNLOCK_IRQRESTORE def_bool y @@ -205,7 +205,7 @@ config INLINE_WRITE_LOCK_IRQSAVE config INLINE_WRITE_UNLOCK def_bool y - depends on !PREEMPTION || ARCH_INLINE_WRITE_UNLOCK + depends on ARCH_INLINE_WRITE_UNLOCK config INLINE_WRITE_UNLOCK_BH def_bool y @@ -213,7 +213,7 @@ config INLINE_WRITE_UNLOCK_BH config INLINE_WRITE_UNLOCK_IRQ def_bool y - depends on !PREEMPTION || ARCH_INLINE_WRITE_UNLOCK_IRQ + depends on ARCH_INLINE_WRITE_UNLOCK_IRQ config INLINE_WRITE_UNLOCK_IRQRESTORE def_bool y diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index bf82259cff96..5a9e0409c844 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -39,7 +39,6 @@ config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" depends on !ARCH_NO_PREEMPT select PREEMPTION - select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK help This option reduces the latency of the kernel by making all kernel code (that is not executing in a critical section) -- 2.26.2
[PATCH] locking/Kconfig: Don't forcefully uninline spin_unlock for PREEMPT
From: Sultan Alsawaf This change was originally done in 2005 without any justification in commit bda98685b855 ("[PATCH] x86: inline spin_unlock if !CONFIG_DEBUG_SPINLOCK and !CONFIG_PREEMPT"). Perhaps the reasoning at the time was that PREEMPT was still considered unstable and needed extra debugging; however, this is no longer the case, so remove the artificial limitation. Signed-off-by: Sultan Alsawaf --- kernel/Kconfig.preempt | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index bf82259cff96..5a9e0409c844 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -39,7 +39,6 @@ config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" depends on !ARCH_NO_PREEMPT select PREEMPTION - select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK help This option reduces the latency of the kernel by making all kernel code (that is not executing in a critical section) -- 2.26.2
linux-next: Signed-off-by missing for commit in the clk tree
Hi all, Commit 5f2feacb7639 ("clk: vc5: Add support for IDT VersaClock 5P49V6965") is missing a Signed-off-by from its committer. -- Cheers, Stephen Rothwell pgpRr3mitCWYb.pgp Description: OpenPGP digital signature
Re: [PATCH] flow_dissector: work around stack frame size warning
On Fri, May 29, 2020 at 1:14 PM Arnd Bergmann wrote: > > The fl_flow_key structure is around 500 bytes, so having two of them > on the stack in one function now exceeds the warning limit after an > otherwise correct change: > > net/sched/cls_flower.c:298:12: error: stack frame size of 1056 bytes in > function 'fl_classify' [-Werror,-Wframe-larger-than=] > > I suspect the fl_classify function could be reworked to only have one > of them on the stack and modify it in place, but I could not work out > how to do that. > > As a somewhat hacky workaround, move one of them into an out-of-line > function to reduce its scope. This does not necessarily reduce the stack > usage of the outer function, but at least the second copy is removed > from the stack during most of it and does not add up to whatever is > called from there. > > I now see 552 bytes of stack usage for fl_classify(), plus 528 bytes > for fl_mask_lookup(). > > Fixes: 58cff782cc55 ("flow_dissector: Parse multiple MPLS Label Stack > Entries") > Signed-off-by: Arnd Bergmann I think this is probably the quickest way to amend this warning, so: Acked-by: Cong Wang Thanks.
[PATCH][v5] KVM: X86: support APERF/MPERF registers
Guest kernel reports a fixed cpu frequency in /proc/cpuinfo, this is confused to user when turbo is enable, and aperf/mperf can be used to show current cpu frequency after 7d5905dc14a "(x86 / CPU: Always show current CPU frequency in /proc/cpuinfo)" so guest should support aperf/mperf capability This patch implements aperf/mperf by three mode: none, software emulation, and pass-through None: default mode, guest does not support aperf/mperf Software emulation: the period of aperf/mperf in guest mode are accumulated as emulated value Pass-though: it is only suitable for KVM_HINTS_REALTIME, Because that hint guarantees we have a 1:1 vCPU:CPU binding and guaranteed no over-commit. And a per-VM capability is added to configure aperfmperf mode Signed-off-by: Li RongQing Signed-off-by: Chai Wen Signed-off-by: Jia Lina --- diff v4: fix maybe-uninitialized warning diff v3: fix interception of MSR_IA32_MPERF/APERF in svm diff v2: support aperfmperf pass though move common codes to kvm_get_msr_common diff v1: 1. support AMD, but not test 2. support per-vm capability to enable Documentation/virt/kvm/api.rst | 10 ++ arch/x86/include/asm/kvm_host.h | 11 +++ arch/x86/kvm/cpuid.c| 13 - arch/x86/kvm/svm/svm.c | 8 arch/x86/kvm/vmx/vmx.c | 6 ++ arch/x86/kvm/x86.c | 42 + arch/x86/kvm/x86.h | 15 +++ include/uapi/linux/kvm.h| 1 + 8 files changed, 105 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index d871dacb984e..f854f4da6fd8 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6126,3 +6126,13 @@ KVM can therefore start protected VMs. This capability governs the KVM_S390_PV_COMMAND ioctl and the KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected guests when the state change is invalid. + +8.23 KVM_CAP_APERFMPERF + + +:Architectures: x86 +:Parameters: args[0] is aperfmperf mode; + 0 for not support, 1 for software emulation, 2 for pass-through +:Returns: 0 on success; -1 on error + +This capability indicates that KVM supports APERF and MPERF MSR registers diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fd78bd44b2d6..14643f8af9c4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -824,6 +824,9 @@ struct kvm_vcpu_arch { /* AMD MSRC001_0015 Hardware Configuration */ u64 msr_hwcr; + + u64 v_mperf; + u64 v_aperf; }; struct kvm_lpage_info { @@ -889,6 +892,12 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT,/* created with KVM_CAP_SPLIT_IRQCHIP */ }; +enum kvm_aperfmperf_mode { + KVM_APERFMPERF_NONE, + KVM_APERFMPERF_SOFT, /* software emulate aperfmperf */ + KVM_APERFMPERF_PT,/* pass-through aperfmperf to guest */ +}; + #define APICV_INHIBIT_REASON_DISABLE0 #define APICV_INHIBIT_REASON_HYPERV 1 #define APICV_INHIBIT_REASON_NESTED 2 @@ -986,6 +995,8 @@ struct kvm_arch { struct kvm_pmu_event_filter *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; + + enum kvm_aperfmperf_mode aperfmperf_mode; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cd708b0b460a..c960dda4251b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -122,6 +122,14 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) MSR_IA32_MISC_ENABLE_MWAIT); } + best = kvm_find_cpuid_entry(vcpu, 6, 0); + if (best) { + if (guest_has_aperfmperf(vcpu->kvm) && + boot_cpu_has(X86_FEATURE_APERFMPERF)) + best->ecx |= 1; + else + best->ecx &= ~1; + } /* Note, maxphyaddr must be updated before tdp_level. */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu); @@ -557,7 +565,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) case 6: /* Thermal management */ entry->eax = 0x4; /* allow ARAT */ entry->ebx = 0; - entry->ecx = 0; + if (boot_cpu_has(X86_FEATURE_APERFMPERF)) + entry->ecx = 0x1; + else + entry->ecx = 0x0; entry->edx = 0; break; /* function 7 has additional index. */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e9c0fb68387d..1d38fe3afc0d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1200,6 +1200,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->msrpm = page_address(msrpm_pages);
[RFC][PATCH] usb: typec: tcpci_rt1711h: Try to avoid screaming irq causing boot hangs
I've recently (since 5.7-rc1) started noticing very rare hangs pretty early in bootup on my HiKey960 board. They have been particularly difficult to debug, as the system seems to not respond at all to sysrq- commands. However, the system is alive as I'll occaionally see firmware loading timeout errors after awhile. Adding changes like initcall_debug and lockdep weren't informative, as it tended to cause the problem to hide. I finally tried to dig in a bit more on this today, and noticed that the last dmesg output before the hang was usually: "random: crng init done" So I dumped the stack at that point, and saw it was being called from the pl061 gpio irq, and the hang always occurred when the crng init finished on cpu 0. Instrumenting that more I could see that when the issue triggered, we were getting a stream of irqs. Chasing further, I found the screaming irq was for the rt1711h, and narrowed down that we were hitting the !chip->tcpci check which immediately returns IRQ_HANDLED, but does not stop the irq from triggering immediately afterwards. This patch slightly reworks the logic, so if we hit the irq before the chip->tcpci has been assigned, we still read and write the alert register, but just skip calling tcpci_irq(). With this change, I haven't managed to trip over the problem (though it hasn't been super long - but I did confirm I hit the error case and it didn't hang the system). I still have some concern that I don't know why this cropped up since 5.7-rc, as there haven't been any changes to the driver since 5.4 (or before). It may just be the initialization timing has changed due to something else, and its just exposed this issue? I'm not sure, and that's not super re-assuring. Anyway, I'd love to hear your thoughts if this looks like a sane fix or not. Cc: Guenter Roeck Cc: Heikki Krogerus Cc: Greg Kroah-Hartman Cc: YongQin Liu Cc: linux-...@vger.kernel.org Signed-off-by: John Stultz --- drivers/usb/typec/tcpm/tcpci_rt1711h.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c index 017389021b96..530fd2c111ad 100644 --- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c +++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c @@ -159,9 +159,6 @@ static irqreturn_t rt1711h_irq(int irq, void *dev_id) u8 status; struct rt1711h_chip *chip = dev_id; - if (!chip->tcpci) - return IRQ_HANDLED; - ret = rt1711h_read16(chip, TCPC_ALERT, ); if (ret < 0) goto out; @@ -176,6 +173,9 @@ static irqreturn_t rt1711h_irq(int irq, void *dev_id) } out: + if (!chip->tcpci) + return IRQ_HANDLED; + return tcpci_irq(chip->tcpci); } -- 2.17.1
Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier
> > I mean, yes, that's certainly better, but it just seems a shame that > everyone has to do the get_unused/put_unused dance just because of how > SCM_RIGHTS does this weird put_user() in the middle. > > Can anyone clarify the expected failure mode from SCM_RIGHTS? Can we > move the put_user() after instead? I think cleanup would just be: > replace_fd(fd, NULL, 0) > > So: > > (updated to skip sock updates on failure; thank you Christian!) > > int file_receive(int fd, unsigned long flags, struct file *file) > { > struct socket *sock; > int ret; > > ret = security_file_receive(file); > if (ret) > return ret; > > /* Install the file. */ > if (fd == -1) { > ret = get_unused_fd_flags(flags); > if (ret >= 0) > fd_install(ret, get_file(file)); > } else { > ret = replace_fd(fd, file, flags); > } > > /* Bump the sock usage counts. */ > if (ret >= 0) { > sock = sock_from_file(addfd->file, ); > if (sock) { > sock_update_netprioidx(>sk->sk_cgrp_data); > sock_update_classid(>sk->sk_cgrp_data); > } > } > > return ret; > } > > scm_detach_fds() > ... > for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i i++, cmfptr++) > { > int new_fd; > > err = file_receive(-1, MSG_CMSG_CLOEXEC & msg->msg_flags > ? O_CLOEXEC : 0, fp[i]); > if (err < 0) > break; > new_fd = err; > Isn't the "right" way to do this to allocate a bunch of file descriptors, and fill up the user buffer with them, and then install the files? This seems to like half-install the file descriptors and then error out. I know that's the current behaviour, but that seems like a bad idea. Do we really want to perpetuate this half-broken state? I guess that some userspace programs could be depending on this -- and their recovery semantics could rely on this. I mean this is 10+ year old code. > err = put_user(err, cmfptr); > if (err) { > /* >* If we can't notify userspace that it got the >* fd, we need to unwind and remove it again. >*/ > replace_fd(new_fd, NULL, 0); > break; > } > } > ... > > > > -- > Kees Cook
Re: [PATCH] refperf: work around 64-bit division
On Fri, May 29, 2020 at 10:15:51PM +0200, Arnd Bergmann wrote: > A 64-bit division was introduced in refperf, breaking compilation > on all 32-bit architectures: > > kernel/rcu/refperf.o: in function `main_func': > refperf.c:(.text+0x57c): undefined reference to `__aeabi_uldivmod' > > Work it by using div_u64 to mark the expensive operation. > > Fixes: bd5b16d6c88d ("refperf: Allow decimal nanoseconds") > Signed-off-by: Arnd Bergmann > --- > kernel/rcu/refperf.c | 9 +++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/kernel/rcu/refperf.c b/kernel/rcu/refperf.c > index 47df72c492b3..c2366648981d 100644 > --- a/kernel/rcu/refperf.c > +++ b/kernel/rcu/refperf.c > @@ -386,7 +386,7 @@ static int main_func(void *arg) > if (torture_must_stop()) > goto end; > > - result_avg[exp] = 1000 * process_durations(nreaders) / > (nreaders * loops); > + result_avg[exp] = div_u64(1000 * process_durations(nreaders), > nreaders * loops); > } > > // Print the average of all experiments > @@ -397,9 +397,14 @@ static int main_func(void *arg) > strcat(buf, "Threads\tTime(ns)\n"); > > for (exp = 0; exp < nruns; exp++) { > + u64 avg; > + u32 rem; > + > if (errexit) > break; > - sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, result_avg[exp] / > 1000, (int)(result_avg[exp] % 1000)); > + > + avg = div_s64_rem(result_avg[exp], 1000, ); Shouldn't this be div_u64_rem? result_avg is u64. > + sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, avg, rem); Would %03u be the better specifier since rem is u32? > strcat(buf, buf1); > } > > -- > 2.26.2 > Cheers, Nathan
Re: [PATCH 2/2] exec: Compute file based creds only once
Kees Cook writes: > On Fri, May 29, 2020 at 11:47:29AM -0500, Eric W. Biederman wrote: >> diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h >> index cd3dd0afceb5..37bb3df751c6 100644 >> --- a/include/linux/lsm_hooks.h >> +++ b/include/linux/lsm_hooks.h >> @@ -44,18 +44,18 @@ >> * request libc enable secure mode. >> - * The hook must set @bprm->pf_per_clear to the personality flags that >> + * The hook must set @bprm->per_clear to the personality flags that > > Here and the other per_clear comment have language that doesn't quite > line up with how hooks should deal with the bits. They should not "set > it to" the personality flags they want clear, they need to "add the > bits" they want to see cleared. i.e I don't want something thinking > they're the only one touching per_clear, so they should never do: > bprm->per_clear = PER_CLEAR_ON_SETID; > but always: > bprm->per_clear |= PER_CLEAR_ON_SETID; > > How about: > > The hook must set @bprm->per_clear with any personality flag bits that Sounds good: The range-diff winds up being: 1: c9258ef4879b ! 1: a7868323c263 exec: Add a per bprm->file version of per_clear @@ Commit message History Tree: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Fixes: 1bb0fa189c6a ("[PATCH] NX: clean up legacy binary support") +Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" ## fs/exec.c ## @@ include/linux/lsm_hooks.h *transitions between security domains). *The hook must set @bprm->active_secureexec to 1 if AT_SECURE should be set to *request libc enable secure mode. -+ *The hook must set @bprm->pf_per_clear to the personality flags that -+ *should be cleared from current->personality. ++ *The hook must add to @bprm->pf_per_clear any personality flags that ++ *should be cleared from current->personality. *@bprm contains the linux_binprm structure. *Return 0 if the hook is successful and permission is granted. * @bprm_check_security: 2: e6f20c69b96e ! 2: 56305aa9b6fa exec: Compute file based creds only once @@ Commit message secureity attribute and derive capabilities from the fact the user had uid 0 has been added. +Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" ## fs/binfmt_misc.c ## @@ include/linux/lsm_hooks.h + *between security domains). + *The hook must set @bprm->secureexec to 1 if AT_SECURE should be set to *request libc enable secure mode. -- *The hook must set @bprm->pf_per_clear to the personality flags that -+ *The hook must set @bprm->per_clear to the personality flags that - *should be cleared from current->personality. +- *The hook must add to @bprm->pf_per_clear any personality flags that ++ *The hook must add to @bprm->per_clear any personality flags that + *should be cleared from current->personality. *@bprm contains the linux_binprm structure. *Return 0 if the hook is successful and permission is granted. >> diff --git a/security/commoncap.c b/security/commoncap.c > > Not about this patch, but while looking through this file, I see: > > int cap_bprm_set_creds(struct linux_binprm *bprm) > { > ... > *capability manipulations* > > if (WARN_ON(!cap_ambient_invariant_ok(new))) > return -EPERM; > > if (nonroot_raised_pE(new, old, root_uid, has_fcap)) { > ret = audit_log_bprm_fcaps(bprm, new, old); > if (ret < 0) > return ret; > } > > new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); > > if (WARN_ON(!cap_ambient_invariant_ok(new))) > return -EPERM; > ... > } > > The cap_ambient_invariant_ok() test is needlessly repeated: it doesn't > examine securebits, and nonroot_raised_pE appears to have no > side-effects. > > One of those can be dropped, yes? That is what it looks like to me. I am hoping to take a deep dive into this function after I finish with bprm_fill_uid (the patches that were dropped). My brain bends on little details like is_setid not testing if the excutable was suid or sgid, but instead is testing something close but unrelated. I hope that when the dust clears the function can become a straightforward implementation of the capability equations. We will see. Eric
Re: [PATCH 1/2] exec: Add a per bprm->file version of per_clear
Kees Cook writes: > On Fri, May 29, 2020 at 11:46:40AM -0500, Eric W. Biederman wrote: >> >> There is a small bug in the code that recomputes parts of bprm->cred >> for every bprm->file. The code never recomputes the part of >> clear_dangerous_personality_flags it is responsible for. >> >> Which means that in practice if someone creates a sgid script >> the interpreter will not be able to use any of: >> READ_IMPLIES_EXEC >> ADDR_NO_RANDOMIZE >> ADDR_COMPAT_LAYOUT >> MMAP_PAGE_ZERO. >> >> This accentially clearing of personality flags probably does >> not matter in practice because no one has complained >> but it does make the code more difficult to understand. >> >> Further remaining bug compatible prevents the recomputation from being >> removed and replaced by simply computing bprm->cred once from the >> final bprm->file. >> >> Making this change removes the last behavior difference between >> computing bprm->creds from the final file and recomputing >> bprm->cred several times. Which allows this behavior change >> to be justified for it's own reasons, and for any but hunts >> looking into why the behavior changed to wind up here instead >> of in the code that will follow that computes bprm->cred >> from the final bprm->file. >> >> This small logic bug appears to have existed since the code >> started clearing dangerous personality bits. >> >> History Tree: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git >> Fixes: 1bb0fa189c6a ("[PATCH] NX: clean up legacy binary support") >> Signed-off-by: "Eric W. Biederman" > > Yup, this looks good. Pointless nit because it's removed in the next > patch, but pf_per_clear is following the same behavioral pattern as > active_secureexec, it could be named active_per_clear, but since this > already been bikeshed in v1, it's fine! :) That plus it is very much true that active_ isn't a particularly good prefix. pf_ for per_file seems slightly better. The only time I can imagine this patch seeing the light of day is if someone happens to discover that this fixes a bug for them and just this patch is backported. At which point pf_per_clear pairs with cap_elevated. So I don't think it hurts. *Shrug* The next patch is my long term solution to the mess. > Reviewed-by: Kees Cook > > I wish we had more robust execve tests. :( I think you have more skill at writing automated tests than I do. So feel free to write some. Eric
[PATCH] usb: cdns3: fix possible buffer overflow caused by bad DMA value
In cdns3_ep0_setup_phase(): struct usb_ctrlrequest *ctrl = priv_dev->setup_buf; Because priv_dev->setup_buf (allocated in cdns3_gadget_start) is stored in DMA memory, and thus ctrl is a DMA value. cdns3_ep0_setup_phase() cdns3_ep0_standard_request(priv_dev, ctrl) cdns3_req_ep0_get_status(priv_dev, ctrl) index = cdns3_ep_addr_to_index(ctrl->wIndex); priv_ep = priv_dev->eps[index]; cdns3_ep0_setup_phase() cdns3_ep0_standard_request(priv_dev, ctrl) cdns3_req_ep0_handle_feature(priv_dev, ctrl_req, 0) cdns3_ep0_feature_handle_endpoint(priv_dev, ctrl, set) index = cdns3_ep_addr_to_index(ctrl->wIndex); priv_ep = priv_dev->eps[index]; In these cases, ctrl->wIndex can be be modified at anytime by malicious hardware, and thus a buffer overflow can occur when the code "priv_dev->eps[index]" is executed. To fix these possible bugs, index is checked before being used. Signed-off-by: Jia-Ju Bai --- drivers/usb/cdns3/ep0.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/usb/cdns3/ep0.c b/drivers/usb/cdns3/ep0.c index e71240b386b4..0a80c7ade613 100644 --- a/drivers/usb/cdns3/ep0.c +++ b/drivers/usb/cdns3/ep0.c @@ -265,6 +265,8 @@ static int cdns3_req_ep0_get_status(struct cdns3_device *priv_dev, return cdns3_ep0_delegate_req(priv_dev, ctrl); case USB_RECIP_ENDPOINT: index = cdns3_ep_addr_to_index(ctrl->wIndex); + if (index >= CDNS3_ENDPOINTS_MAX_COUNT) + return -EINVAL; priv_ep = priv_dev->eps[index]; /* check if endpoint is stalled or stall is pending */ @@ -388,6 +390,9 @@ static int cdns3_ep0_feature_handle_endpoint(struct cdns3_device *priv_dev, return 0; index = cdns3_ep_addr_to_index(ctrl->wIndex); + if (index >= CDNS3_ENDPOINTS_MAX_COUNT) + return -EINVAL; + priv_ep = priv_dev->eps[index]; cdns3_select_ep(priv_dev, ctrl->wIndex); -- 2.17.1
Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier
On Sat, May 30, 2020 at 4:43 AM Kees Cook wrote: > I mean, yes, that's certainly better, but it just seems a shame that > everyone has to do the get_unused/put_unused dance just because of how > SCM_RIGHTS does this weird put_user() in the middle. > > Can anyone clarify the expected failure mode from SCM_RIGHTS? Can we > move the put_user() after instead? Honestly, I think trying to remove file descriptors and such after -EFAULT is a waste of time. If userspace runs into -EFAULT, userspace is beyond saving and can't really do much other than exit immediately. There are a bunch of places that will change state and then throw -EFAULT at the end if userspace supplied an invalid address, because trying to hold locks across userspace accesses just in case userspace supplied a bogus address is kinda silly (and often borderline impossible). You can actually see that even scm_detach_fds() currently just silently swallows errors if writing some header fields fails at the end.
Re: [PATCH 9/9] staging: media: atomisp: add PMIC_OPREGION dependency
On Fri, May 29, 2020 at 10:00:31PM +0200, Arnd Bergmann wrote: > Without that driver, there is a link failure in > > ERROR: modpost: "intel_soc_pmic_exec_mipi_pmic_seq_element" > [drivers/staging/media/atomisp/pci/atomisp_gmin_platform.ko] undefined! > > Add an explicit Kconfig dependency. > > Signed-off-by: Arnd Bergmann It'd be interesting to know if this is strictly required for the driver to work properly. The call to intel_soc_pmic_exec_mipi_pmic_seq_element has some error handling after it, maybe that should just be surrounded by an #ifdef or IS_ENABLED for PMIC_OPREGION, like some other drivers do. Regardless of that: Reviewed-by: Nathan Chancellor > --- > drivers/staging/media/atomisp/Kconfig | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/staging/media/atomisp/Kconfig > b/drivers/staging/media/atomisp/Kconfig > index c4f3049b0706..e86311c14329 100644 > --- a/drivers/staging/media/atomisp/Kconfig > +++ b/drivers/staging/media/atomisp/Kconfig > @@ -11,6 +11,7 @@ menuconfig INTEL_ATOMISP > config VIDEO_ATOMISP > tristate "Intel Atom Image Signal Processor Driver" > depends on VIDEO_V4L2 && INTEL_ATOMISP > + depends on PMIC_OPREGION > select IOSF_MBI > select VIDEOBUF_VMALLOC > ---help--- > -- > 2.26.2 >
Re: [PATCH 8/9] staging: media: atomisp: disable all custom formats
On Fri, May 29, 2020 at 10:00:30PM +0200, Arnd Bergmann wrote: > clang points out the usage of an incorrect enum type in the > list of supported image formats: > > drivers/staging/media/atomisp/pci/atomisp_subdev.c:49:65: error: implicit > conversion from enumeration type 'enum ia_css_frame_format' to different > enumeration type 'enum atomisp_input_format' [-Werror,-Wenum-conversion] > { V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, > CSS_FRAME_FORMAT_NV21 }, > drivers/staging/media/atomisp/pci/atomisp_subdev.c:49:39: error: implicit > conversion from enumeration type 'enum ia_css_frame_format' to different > enumeration type 'enum atomisp_input_format' [-Werror,-Wenum-conversion] > { V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, > CSS_FRAME_FORMAT_NV21 }, > { V4L2_MBUS_FMT_CUSTOM_NV12, 12, 12, CSS_FRAME_FORMAT_NV12, 0, > CSS_FRAME_FORMAT_NV12 }, > { MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, > ATOMISP_INPUT_FORMAT_BINARY_8 }, > > Checking the git history, I found a commit that disabled one such case > because it did not work. It seems likely that the incorrect enum was > part of the original problem and that the others do not work either, > or have never been tested. > > Disable all the ones that cause a warning. > > Fixes: cb02ae3d71ea ("media: staging: atomisp: Disable custom format for now") > Signed-off-by: Arnd Bergmann I have this patch in my local tree and debated sending it myself. I think that this is the right fix for now, as the driver is being cleaned up. Maybe add a FIXME like the rest of this driver? Regardless of that last point: Reviewed-by: Nathan Chancellor > --- > drivers/staging/media/atomisp/pci/atomisp_subdev.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c > b/drivers/staging/media/atomisp/pci/atomisp_subdev.c > index 46590129cbe3..8bce466cc128 100644 > --- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c > +++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c > @@ -44,9 +44,11 @@ const struct atomisp_in_fmt_conv atomisp_in_fmt_conv[] = { > { MEDIA_BUS_FMT_SRGGB12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, > CSS_BAYER_ORDER_RGGB, CSS_FORMAT_RAW_12 }, > { MEDIA_BUS_FMT_UYVY8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, > ATOMISP_INPUT_FORMAT_YUV422_8 }, > { MEDIA_BUS_FMT_YUYV8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, > ATOMISP_INPUT_FORMAT_YUV422_8 }, > +#if 0 > { MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, > ATOMISP_INPUT_FORMAT_BINARY_8 }, > { V4L2_MBUS_FMT_CUSTOM_NV12, 12, 12, CSS_FRAME_FORMAT_NV12, 0, > CSS_FRAME_FORMAT_NV12 }, > { V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, > CSS_FRAME_FORMAT_NV21 }, > +#endif > { V4L2_MBUS_FMT_CUSTOM_YUV420, 12, 12, > ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, 0, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY > }, > #if 0 > { V4L2_MBUS_FMT_CUSTOM_M10MO_RAW, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, > ATOMISP_INPUT_FORMAT_BINARY_8 }, > -- > 2.26.2 >
Re: [PATCH 7/9] staging: media: atomisp: fix enum type mixups
On Fri, May 29, 2020 at 10:00:29PM +0200, Arnd Bergmann wrote: > Some function calls pass an incorrect enum type: > > drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:858:16: > error: implicit conversion from enumeration type 'input_system_ID_t' to > different enumeration type 'gp_device_ID_t' [-Werror,-Wenum-conversion] > gp_device_rst(INPUT_SYSTEM0_ID); > ~ ^~~~ > drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:860:19: > error: implicit conversion from enumeration type 'input_system_ID_t' to > different enumeration type 'gp_device_ID_t' [-Werror,-Wenum-conversion] > input_switch_rst(INPUT_SYSTEM0_ID); > ^~~~ > drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:876:27: > error: implicit conversion from enumeration type 'input_system_cfg_flag_t' > to different enumeration type 'input_system_connection_t' > [-Werror,-Wenum-conversion] > config.multicast[i] = > INPUT_SYSTEM_CFG_FLAG_RESET; > ~ ^~~ > drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:1326:32: > error: implicit conversion from enumeration type 'input_system_ID_t' to > different enumeration type 'gp_device_ID_t' [-Werror,-Wenum-conversion] > input_selector_cfg_for_sensor(INPUT_SYSTEM0_ID); > ~ ^~~~ > drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c:1329:19: > error: implicit conversion from enumeration type 'input_system_ID_t' to > different enumeration type 'gp_device_ID_t' [-Werror,-Wenum-conversion] > input_switch_cfg(INPUT_SYSTEM0_ID, _switch_cfg); > ^~~~ > > INPUT_SYSTEM0_ID is zero, so use the corresponding zero-value > of the expected types instead. > > Fixes: a49d25364dfb ("staging/atomisp: Add support for the Intel IPU v2") > Signed-off-by: Arnd Bergmann Huh weird that I did not see this warning but you do randconfigs so that's expected. Reviewed-by: Nathan Chancellor > --- > .../pci/hive_isp_css_common/host/input_system.c| 10 +- > 1 file changed, 5 insertions(+), 5 deletions(-) > > diff --git > a/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c > b/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c > index 2114cf4f3fda..aa0f0fca9346 100644 > --- > a/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c > +++ > b/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/input_system.c > @@ -855,9 +855,9 @@ input_system_error_t > input_system_configuration_reset(void) > > input_system_network_rst(INPUT_SYSTEM0_ID); > > - gp_device_rst(INPUT_SYSTEM0_ID); > + gp_device_rst(GP_DEVICE0_ID); > > - input_switch_rst(INPUT_SYSTEM0_ID); > + input_switch_rst(GP_DEVICE0_ID); > > //target_rst(); > > @@ -873,7 +873,7 @@ input_system_error_t > input_system_configuration_reset(void) > > for (i = 0; i < N_CSI_PORTS; i++) { > config.csi_buffer_flags[i] = INPUT_SYSTEM_CFG_FLAG_RESET; > - config.multicast[i] = INPUT_SYSTEM_CFG_FLAG_RESET; > + config.multicast[i] = INPUT_SYSTEM_DISCARD_ALL; > } > > config.source_type_flags = > INPUT_SYSTEM_CFG_FLAG_RESET; > @@ -1323,10 +1323,10 @@ static input_system_error_t > configuration_to_registers(void) > } // end of switch (source_type) > > // Set input selector. > - input_selector_cfg_for_sensor(INPUT_SYSTEM0_ID); > + input_selector_cfg_for_sensor(GP_DEVICE0_ID); > > // Set input switch. > - input_switch_cfg(INPUT_SYSTEM0_ID, _switch_cfg); > + input_switch_cfg(GP_DEVICE0_ID, _switch_cfg); > > // Set input formatters. > // AM: IF are set dynamically. > -- > 2.26.2 >
Re: [PATCH 5/9] staging: media: atomisp: fix stack overflow in init_pipe_defaults()
On Fri, May 29, 2020 at 10:00:27PM +0200, Arnd Bergmann wrote: > When building with clang, multiple copies of the structures to be > initialized are passed around on the stack and copied locally, using an > insane amount of stack space: > > drivers/staging/media/atomisp/pci/sh_css.c:2371:1: error: stack frame size of > 26864 bytes in function 'create_pipe' [-Werror,-Wframe-larger-than=] > > Use constantly-allocated variables plus an explicit memcpy() > to avoid that. > > Fixes: 6dc9a2568f84 ("media: atomisp: convert default struct values to use > compound-literals with designated initializers") > Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor > --- > drivers/staging/media/atomisp/pci/sh_css.c | 16 +++- > 1 file changed, 11 insertions(+), 5 deletions(-) > > diff --git a/drivers/staging/media/atomisp/pci/sh_css.c > b/drivers/staging/media/atomisp/pci/sh_css.c > index e91c6029c651..1e8b9d637116 100644 > --- a/drivers/staging/media/atomisp/pci/sh_css.c > +++ b/drivers/staging/media/atomisp/pci/sh_css.c > @@ -2264,6 +2264,12 @@ static enum ia_css_err > init_pipe_defaults(enum ia_css_pipe_mode mode, > struct ia_css_pipe *pipe, > bool copy_pipe) { > + static const struct ia_css_pipe default_pipe = IA_CSS_DEFAULT_PIPE; > + static const struct ia_css_preview_settings preview = > IA_CSS_DEFAULT_PREVIEW_SETTINGS; > + static const struct ia_css_capture_settings capture = > IA_CSS_DEFAULT_CAPTURE_SETTINGS; > + static const struct ia_css_video_settings video = > IA_CSS_DEFAULT_VIDEO_SETTINGS; > + static const struct ia_css_yuvpp_settings yuvpp = > IA_CSS_DEFAULT_YUVPP_SETTINGS; > + > if (!pipe) > { > IA_CSS_ERROR("NULL pipe parameter"); > @@ -2271,14 +2277,14 @@ init_pipe_defaults(enum ia_css_pipe_mode mode, > } > > /* Initialize pipe to pre-defined defaults */ > - *pipe = IA_CSS_DEFAULT_PIPE; > + memcpy(pipe, _pipe, sizeof(default_pipe)); > > /* TODO: JB should not be needed, but temporary backward reference */ > switch (mode) > { > case IA_CSS_PIPE_MODE_PREVIEW: > pipe->mode = IA_CSS_PIPE_ID_PREVIEW; > - pipe->pipe_settings.preview = IA_CSS_DEFAULT_PREVIEW_SETTINGS; > + memcpy(>pipe_settings.preview, , sizeof(preview)); > break; > case IA_CSS_PIPE_MODE_CAPTURE: > if (copy_pipe) { > @@ -2286,11 +2292,11 @@ init_pipe_defaults(enum ia_css_pipe_mode mode, > } else { > pipe->mode = IA_CSS_PIPE_ID_CAPTURE; > } > - pipe->pipe_settings.capture = IA_CSS_DEFAULT_CAPTURE_SETTINGS; > + memcpy(>pipe_settings.capture, , sizeof(capture)); > break; > case IA_CSS_PIPE_MODE_VIDEO: > pipe->mode = IA_CSS_PIPE_ID_VIDEO; > - pipe->pipe_settings.video = IA_CSS_DEFAULT_VIDEO_SETTINGS; > + memcpy(>pipe_settings.video, , sizeof(video)); > break; > case IA_CSS_PIPE_MODE_ACC: > pipe->mode = IA_CSS_PIPE_ID_ACC; > @@ -2300,7 +2306,7 @@ init_pipe_defaults(enum ia_css_pipe_mode mode, > break; > case IA_CSS_PIPE_MODE_YUVPP: > pipe->mode = IA_CSS_PIPE_ID_YUVPP; > - pipe->pipe_settings.yuvpp = IA_CSS_DEFAULT_YUVPP_SETTINGS; > + memcpy(>pipe_settings.yuvpp, , sizeof(yuvpp)); > break; > default: > return IA_CSS_ERR_INVALID_ARGUMENTS; > -- > 2.26.2 >
Re: [PATCH 2/9] staging: media: atomisp: declare 'struct device' before using it
On Fri, May 29, 2020 at 10:00:24PM +0200, Arnd Bergmann wrote: > In some configurations, including this header leads to a warning: > > drivers/staging/media/atomisp//pci/sh_css_firmware.h:41:38: error: > declaration of 'struct device' will not be visible outside of this function > [-Werror,-Wvisibility] > > Make sure the struct tag is known before declaring a function > that uses it as an argument. > > Fixes: 9d4fa1a16b28 ("media: atomisp: cleanup directory hierarchy") > Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor > --- > drivers/staging/media/atomisp/pci/sh_css_firmware.h | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/staging/media/atomisp/pci/sh_css_firmware.h > b/drivers/staging/media/atomisp/pci/sh_css_firmware.h > index f6253392a6c9..317559c7689f 100644 > --- a/drivers/staging/media/atomisp/pci/sh_css_firmware.h > +++ b/drivers/staging/media/atomisp/pci/sh_css_firmware.h > @@ -37,6 +37,7 @@ extern unsigned int sh_css_num_binaries; > char > *sh_css_get_fw_version(void); > > +struct device; > bool > sh_css_check_firmware_version(struct device *dev, const char *fw_data); > > -- > 2.26.2 >
Re: [PATCH 3/9] staging: media: atomisp: annotate an unused function
On Fri, May 29, 2020 at 10:00:25PM +0200, Arnd Bergmann wrote: > atomisp_mrfld_power() has no more callers and produces > a warning: > > drivers/staging/media/atomisp/pci/atomisp_v4l2.c:764:12: error: unused > function 'atomisp_mrfld_power' [-Werror,-Wunused-function] > > Mark the function as unused while the PM code is being > debugged, expecting that it will be used again in the > future and should not just be removed. > > Fixes: 95d1f398c4dc ("media: atomisp: keep the ISP powered on when setting > it") > Signed-off-by: Arnd Bergmann Mauro fixed this in his experimental branch: https://git.linuxtv.org/mchehab/experimental.git/commit/?id=dbcee8118fc9283401df9dbe8ba03ab9d17433ff > --- > drivers/staging/media/atomisp/pci/atomisp_v4l2.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c > b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c > index 694268d133c0..10abb35ba0e0 100644 > --- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c > +++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c > @@ -761,7 +761,8 @@ static void punit_ddr_dvfs_enable(bool enable) > pr_info("DDR DVFS, door bell is not cleared within 3ms\n"); > } > > -static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable) > +static __attribute__((unused)) int > +atomisp_mrfld_power(struct atomisp_device *isp, bool enable) > { > unsigned long timeout; > u32 val = enable ? MRFLD_ISPSSPM0_IUNIT_POWER_ON : > -- > 2.26.2 >
Re: [PATCH 1/9] staging: media: atomisp: fix incorrect NULL pointer check
On Fri, May 29, 2020 at 10:31:44PM +0200, Arnd Bergmann wrote: > On Fri, May 29, 2020 at 10:23 PM Arnd Bergmann wrote: > > > > On Fri, May 29, 2020 at 10:04 PM 'Nick Desaulniers' via Clang Built > > Linux wrote: > > > > > > See also Nathan's 7 patch series. > > > https://lore.kernel.org/lkml/20200527071150.3381228-1-natechancel...@gmail.com/ > > > > > > Might be some overlap between series? > > > > > > > Probably. I really should have checked when I saw the number of warnings. > > > > At least this gives Mauro a chance to double-check the changes and see if > > Nathan and I came to different conclusions on any of them. > > I checked now and found that the overlap is smaller than I expected. > In each case, Nathans' solution seems more complete than mine, > so this patch ("staging: media: atomisp: fix incorrect NULL pointer check") > and also "staging: media: atomisp: fix a type conversion warning" can be > dropped, but I think the others are still needed. > > Arnd Thanks for double checking! I will read through the rest of the series and review as I can. Cheers, Nathan
Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier
On Sat, May 30, 2020 at 01:10:55AM +, Sargun Dhillon wrote: > // And then SCM reads: > for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); ii++, cmfptr++) > { > int new_fd; > err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags > ? O_CLOEXEC : 0); > if (err < 0) > break; > new_fd = err; > err = put_user(new_fd, cmfptr); > if (err) { > put_unused_fd(new_fd); > break; > } > > err = file_receive(new_fd, fp[i]); > if (err) { > put_unused_fd(new_fd); > break; > } > } > > And our code reads: > > > static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) > { > int ret, err; > > /* >* Remove the notification, and reset the list pointers, indicating >* that it has been handled. >*/ > list_del_init(>list); > > if (addfd->fd == -1) { > ret = get_unused_fd_flags(addfd->flags); > if (ret < 0) > goto err; > > err = file_receive(ret, addfd->file); > if (err) { > put_unused_fd(ret); > ret = err; > } > } else { > ret = file_receive_replace(addfd->fd, addfd->flags, > addfd->file); > } > > err: > addfd->ret = ret; > complete(>completion); > } > > > And the pidfd getfd code reads: > > static int pidfd_getfd(struct pid *pid, int fd) > { > struct task_struct *task; > struct file *file; > int ret, err; > > task = get_pid_task(pid, PIDTYPE_PID); > if (!task) > return -ESRCH; > > file = __pidfd_fget(task, fd); > put_task_struct(task); > if (IS_ERR(file)) > return PTR_ERR(file); > > ret = get_unused_fd_flags(O_CLOEXEC); > if (ret >= 0) { > err = file_receive(ret, file); > if (err) { > put_unused_fd(ret); > ret = err; > } > } > > fput(file); > return ret; > } I mean, yes, that's certainly better, but it just seems a shame that everyone has to do the get_unused/put_unused dance just because of how SCM_RIGHTS does this weird put_user() in the middle. Can anyone clarify the expected failure mode from SCM_RIGHTS? Can we move the put_user() after instead? I think cleanup would just be: replace_fd(fd, NULL, 0) So: (updated to skip sock updates on failure; thank you Christian!) int file_receive(int fd, unsigned long flags, struct file *file) { struct socket *sock; int ret; ret = security_file_receive(file); if (ret) return ret; /* Install the file. */ if (fd == -1) { ret = get_unused_fd_flags(flags); if (ret >= 0) fd_install(ret, get_file(file)); } else { ret = replace_fd(fd, file, flags); } /* Bump the sock usage counts. */ if (ret >= 0) { sock = sock_from_file(addfd->file, ); if (sock) { sock_update_netprioidx(>sk->sk_cgrp_data); sock_update_classid(>sk->sk_cgrp_data); } } return ret; } scm_detach_fds() ... for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); imsg_flags ? O_CLOEXEC : 0, fp[i]); if (err < 0) break; new_fd = err; err = put_user(err, cmfptr); if (err) { /* * If we can't notify userspace that it got the * fd, we need to unwind and remove it again. */ replace_fd(new_fd, NULL, 0); break; } } ... -- Kees Cook
[PATCH] net: vmxnet3: fix possible buffer overflow caused by bad DMA value in vmxnet3_get_rss()
The value adapter->rss_conf is stored in DMA memory, and it is assigned to rssConf, so rssConf->indTableSize can be modified at anytime by malicious hardware. Because rssConf->indTableSize is assigned to n, buffer overflow may occur when the code "rssConf->indTable[n]" is executed. To fix this possible bug, n is checked after being used. Signed-off-by: Jia-Ju Bai --- drivers/net/vmxnet3/vmxnet3_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 6528940ce5f3..b53bb8bcd47f 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -700,6 +700,8 @@ vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key, u8 *hfunc) *hfunc = ETH_RSS_HASH_TOP; if (!p) return 0; + if (n > UPT1_RSS_MAX_IND_TABLE_SIZE) + return 0; while (n--) p[n] = rssConf->indTable[n]; return 0; -- 2.17.1
[PATCH] media: venus: fix possible buffer overlow casued bad DMA value in venus_sfr_print()
The value hdev->sfr.kva is stored in DMA memory, and it is assigned to sfr, so sfr->buf_size can be modified at anytime by malicious hardware. In this case, a buffer overflow may happen when the code "sfr->data[sfr->buf_size - 1]" is executed. To fix this possible bug, sfr->buf_size is assigned to a local variable, and then this variable is checked before being used. Signed-off-by: Jia-Ju Bai --- drivers/media/platform/qcom/venus/hfi_venus.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c index 0d8855014ab3..4251a9e47a1b 100644 --- a/drivers/media/platform/qcom/venus/hfi_venus.c +++ b/drivers/media/platform/qcom/venus/hfi_venus.c @@ -960,18 +960,23 @@ static void venus_sfr_print(struct venus_hfi_device *hdev) { struct device *dev = hdev->core->dev; struct hfi_sfr *sfr = hdev->sfr.kva; + u32 buf_size; void *p; if (!sfr) return; - p = memchr(sfr->data, '\0', sfr->buf_size); + buf_size = sfr->buf_size; + if (buf_size > 1) + return; + + p = memchr(sfr->data, '\0', buf_size); /* * SFR isn't guaranteed to be NULL terminated since SYS_ERROR indicates * that Venus is in the process of crashing. */ if (!p) - sfr->data[sfr->buf_size - 1] = '\0'; + sfr->data[buf_size - 1] = '\0'; dev_err_ratelimited(dev, "SFR message from FW: %s\n", sfr->data); } -- 2.17.1
Re: [PATCH v2 2/2] regulator: Add support for sync_state() callbacks
On Fri, May 29, 2020 at 6:00 AM Mark Brown wrote: > > On Thu, May 28, 2020 at 12:06:10PM -0700, Saravana Kannan wrote: > > When a regulator is left on by the bootloader or anything else before > > the kernel starts (let's call this a "boot on" regulator), we need to > > keep it on till all the consumers of the regulator have probed. This is > > We we don't in general have a requirement that any of the consumers of a > regulator will ever probe. As I thought I'd already made clear that > case really needs to be handled. Sorry, in the eagerness to address all your concerns, I had forgotten to update the commit text in v2. I'll reword it to clarify this'll happen only when there's no timeout set. > > especially important for regulators that might be powering more than one > > consumer. Otherwise, when the first consumer probes, enables and then > > disables the "boot on" regulator, it'd turn off the power to rest of the > > consumers of the "boot on" regulator. > > Which is a problem because...? Those consumers that haven't probed can be powered on and active and can crash the system if the power is pulled under their feet. > > The sync_state() callback that's been added to drivers is meant for > > situations like this. It gets called when all the consumers of a device > > have probed successfully. To ease the transition to sync_state() > > callbacks, it is never called before late_initcall_sync(). > > This is not terribly useful for regulators where adding any of these > delays is going to create surprises. Frankly I can't really see why > deferring things until late_initcall() would help anything. Is this different from what's done already? Not having this delay is easy -- but will have to be at a global level across devices/resources. > > sync_state() callbacks become even more useful when combined with > > fw_devlink. If fw_devlink is off, sync_state() is called at > > late_initcall_sync() or the regulator device probing successfully -- > > whichever is later. This is because, with fw_devlink off, there would be > > no consumers to the regulator device when it probes. > > This breaks the case where no driver ever instantiates for a device. > > Oh, actually now I get to the very end of the patch I see there is > actually a timeout in here which wasn't mentioned in the changelog at > all. I very nearly didn't read the actual code as according to the > changelog this issue hadn't been addressed. Again, sorry I didn't update the commit text. I'll skip replying to rest of your comments on the commit text, because all of them are due to stale commit text. > > This commit adds a regulator_sync_state() helper function that takes > > care of all the "boot on" regulator clean up for any regulator driver. > > All one needs to do is add the following line to the driver struct. > > > > .sync_state = regulator_sync_state, > > Exactly the same issues as before apply here, why are devices getting > involved here? > > > +static void regulator_set_minimum_state(struct regulator_dev *rdev) > > +{ > > I find this name very confusing. If anything it's doing the opposite of > setting a minimum state, it's trying to prevent that happening. I agree. I renamed it to try and make it better, but I can see how it's more confusing once you called it out. Suggestions? regulator_set_boot_limits? regulator_set_boot_constraints? > > + /* > > + * Wait for parent supply to be ready before trying to keep this > > + * regulator on. > > + */ > > + if (rdev->supply_name && !rdev->supply) > > + return; > > I can't make sense of this. This stuff only limits disabling, not > enabling, regulators, we're keeping things on here anyway and why would > we care about the supply for disabling? We want to wait till supply is set up before we try to put the "enable" vote on a regulator. Otherwise, it won't be propagated properly? I do know that regulator_resolve_supply() takes care of propagating the use count, but we could delete that code if we just till the supply is resolved before "enabling" the regulator. The usual clients can't "get" the regulator anyway without the supply being resolved. Long story short, doing it this way can allow us to delete some functionally duplicative code. > > +static int regulator_rel_minimum_state(struct device *dev, void *data) > > +{ > > + struct regulator_dev *rdev = dev_to_rdev(dev); > > + > > + if (dev->parent != data) > > + return 0; > > I've just realised that this is even more restrictive than the > descriptions have suggested - it's not just preventing any changes until > all potential consumers of a given regulator have instantiated, it's > preventing changes until all potential consumers of all resources > provided by a given device have instantiated. Given that many systems > have a single PMIC which may also be providing other things like GPIOs > that would mean that any consumer that doesn't instantiate would prevent > any device
Re: [net-next 1/6] net: marvell: prestera: Add driver for Prestera family ASIC devices
On Sat, May 30, 2020 at 03:46:22AM +0300, Vadym Kochan wrote: > Hi David, > > On Fri, May 29, 2020 at 05:18:39PM -0700, David Miller wrote: > > > > Please remove all of the __packed attributes. > > > > I looked at your data structures and all of them use fixed sized types > > and are multiples of 4 so the __packed attribute is completely > > unnecessary. > > > > The alignment attribute is also unnecessary so please remove that too. > > Some of the fields are u8, so I assume there might be holes added by > the compiler ? Also these attributes guarantee some ABI compatibility > with FW side, I will try to remove them and check but it sounds for me a bit > risky. Hi Vadym You might want to play with pahole(1). Andrew
[PATCH v5 1/2] cpufreq: change '.set_boost' to act on only one policy
Macro 'for_each_active_policy()' is defined internally. To avoid some cpufreq driver needing this macro to iterate over all the policies in '.set_boost' callback, we redefine '.set_boost' to act on only one policy and pass the policy as an argument. 'cpufreq_boost_trigger_state()' iterate over all the policies to set boost for the system. This is preparation for adding SW BOOST support for CPPC. To protect Boost enable/disable by sysfs from CPU online/offline, we add 'cpu_hotplug_lock' before calling '.set_boost' for each CPU. Also we move the lock from 'set_boost()' to 'store_cpb()' for acpi_cpufreq. Signed-off-by: Xiongfeng Wang Suggested-by: Viresh Kumar --- drivers/cpufreq/acpi-cpufreq.c | 14 ++- drivers/cpufreq/cpufreq.c | 57 +++--- include/linux/cpufreq.h| 2 +- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 289e8ce..429e5a3 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -126,12 +126,12 @@ static void boost_set_msr_each(void *p_en) boost_set_msr(enable); } -static int set_boost(int val) +static int set_boost(struct cpufreq_policy *policy, int val) { - get_online_cpus(); - on_each_cpu(boost_set_msr_each, (void *)(long)val, 1); - put_online_cpus(); - pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis"); + on_each_cpu_mask(policy->cpus, boost_set_msr_each, +(void *)(long)val, 1); + pr_debug("CPU %*pbl: Core Boosting %sabled.\n", +cpumask_pr_args(policy->cpus), val ? "en" : "dis"); return 0; } @@ -162,7 +162,9 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, if (ret || val > 1) return -EINVAL; - set_boost(val); + get_online_cpus(); + set_boost(policy, val); + put_online_cpus(); return count; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d03f250..0128de3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2532,34 +2532,29 @@ void cpufreq_update_limits(unsigned int cpu) /* * BOOST * */ -static int cpufreq_boost_set_sw(int state) +static int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) { - struct cpufreq_policy *policy; - - for_each_active_policy(policy) { - int ret; + int ret; - if (!policy->freq_table) - return -ENXIO; + if (!policy->freq_table) + return -ENXIO; - ret = cpufreq_frequency_table_cpuinfo(policy, - policy->freq_table); - if (ret) { - pr_err("%s: Policy frequency update failed\n", - __func__); - return ret; - } - - ret = freq_qos_update_request(policy->max_freq_req, policy->max); - if (ret < 0) - return ret; + ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table); + if (ret) { + pr_err("%s: Policy frequency update failed\n", __func__); + return ret; } + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + return ret; + return 0; } int cpufreq_boost_trigger_state(int state) { + struct cpufreq_policy *policy; unsigned long flags; int ret = 0; @@ -2570,15 +2565,25 @@ int cpufreq_boost_trigger_state(int state) cpufreq_driver->boost_enabled = state; write_unlock_irqrestore(_driver_lock, flags); - ret = cpufreq_driver->set_boost(state); - if (ret) { - write_lock_irqsave(_driver_lock, flags); - cpufreq_driver->boost_enabled = !state; - write_unlock_irqrestore(_driver_lock, flags); - - pr_err("%s: Cannot %s BOOST\n", - __func__, state ? "enable" : "disable"); + get_online_cpus(); + for_each_active_policy(policy) { + ret = cpufreq_driver->set_boost(policy, state); + if (ret) + goto err_reset_state; } + put_online_cpus(); + + return 0; + +err_reset_state: + put_online_cpus(); + + write_lock_irqsave(_driver_lock, flags); + cpufreq_driver->boost_enabled = !state; + write_unlock_irqrestore(_driver_lock, flags); + + pr_err("%s: Cannot %s BOOST\n", + __func__, state ? "enable" : "disable"); return ret; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index
[PATCH v5 0/2] add SW BOOST support for CPPC
ACPI spec 6.2 section 8.4.7.1 provide the following two CPC registers. "Highest performance is the absolute maximum performance an individual processor may reach, assuming ideal conditions. This performance level may not be sustainable for long durations, and may only be achievable if other platform components are in a specific state; for example, it may require other processors be in an idle state. Nominal Performance is the maximum sustained performance level of the processor, assuming ideal operating conditions. In absence of an external constraint (power, thermal, etc.) this is the performance level the platform is expected to be able to maintain continuously. All processors are expected to be able to sustain their nominal performance state simultaneously." We can use Highest Performance as the max performance in boost mode and Nomial Performance as the max performance in non-boost mode. If the Highest Performance is greater than the Nominal Performance, we assume SW BOOST is supported. Changelog: v4 -> v5: add 'cpu_hotplug_lock' before calling '.set_boost' v3 -> v4: run 'boost_set_msr_each' for each CPU in the policy rather than each CPU in the system for 'acpi-cpufreq' add 'Suggested-by' Xiongfeng Wang (2): cpufreq: change '.set_boost' to act on only one policy CPPC: add support for SW BOOST drivers/cpufreq/acpi-cpufreq.c | 14 ++- drivers/cpufreq/cppc_cpufreq.c | 39 +++-- drivers/cpufreq/cpufreq.c | 57 +++--- include/linux/cpufreq.h| 2 +- 4 files changed, 77 insertions(+), 35 deletions(-) -- 1.7.12.4
[PATCH v5 2/2] CPPC: add support for SW BOOST
To add SW BOOST support for CPPC, we need to get the max frequency of boost mode and non-boost mode. ACPI spec 6.2 section 8.4.7.1 describe the following two CPC registers. "Highest performance is the absolute maximum performance an individual processor may reach, assuming ideal conditions. This performance level may not be sustainable for long durations, and may only be achievable if other platform components are in a specific state; for example, it may require other processors be in an idle state. Nominal Performance is the maximum sustained performance level of the processor, assuming ideal operating conditions. In absence of an external constraint (power, thermal, etc.) this is the performance level the platform is expected to be able to maintain continuously. All processors are expected to be able to sustain their nominal performance state simultaneously." To add SW BOOST support for CPPC, we can use Highest Performance as the max performance in boost mode and Nominal Performance as the max performance in non-boost mode. If the Highest Performance is greater than the Nominal Performance, we assume SW BOOST is supported. The current CPPC driver does not support SW BOOST and use 'Highest Performance' as the max performance the CPU can achieve. 'Nominal Performance' is used to convert 'performance' to 'frequency'. That means, if firmware enable boost and provide a value for Highest Performance which is greater than Nominal Performance, boost feature is enabled by default. Because SW BOOST is disabled by default, so, after this patch, boost feature is disabled by default even if boost is enabled by firmware. Signed-off-by: Xiongfeng Wang Suggested-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 39 +-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index bda0b24..257d726 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -37,6 +37,7 @@ * requested etc. */ static struct cppc_cpudata **all_cpu_data; +static bool boost_supported; struct cppc_workaround_oem_info { char oem_id[ACPI_OEM_ID_SIZE + 1]; @@ -310,7 +311,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ policy->min = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.lowest_nonlinear_perf); - policy->max = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.highest_perf); + policy->max = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance @@ -318,7 +319,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * nonlinear perf */ policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.lowest_perf); - policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.highest_perf); + policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.nominal_perf); policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu_num); policy->shared_type = cpu->shared_type; @@ -343,6 +344,13 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu->cur_policy = policy; + /* +* If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost +* is supported. +*/ + if (cpu->perf_caps.highest_perf > cpu->perf_caps.nominal_perf) + boost_supported = true; + /* Set policy->cur to max now. The governors will adjust later. */ policy->cur = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.highest_perf); @@ -410,6 +418,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpunum) return cppc_get_rate_from_fbctrs(cpu, fb_ctrs_t0, fb_ctrs_t1); } +static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) +{ + struct cppc_cpudata *cpudata; + int ret; + + if (!boost_supported) { + pr_err("BOOST not supported by CPU or firmware\n"); + return -EINVAL; + } + + cpudata = all_cpu_data[policy->cpu]; + if (state) + policy->max = cppc_cpufreq_perf_to_khz(cpudata, + cpudata->perf_caps.highest_perf); + else + policy->max = cppc_cpufreq_perf_to_khz(cpudata, + cpudata->perf_caps.nominal_perf); + policy->cpuinfo.max_freq = policy->max; + + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + return ret; + + return 0; +} + static struct cpufreq_driver cppc_cpufreq_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = cppc_verify_policy, @@ -417,6 +451,7 @@ static unsigned int
Re: [PATCH v8 17/18] KVM: x86: Add kexec support for SEV Live Migration.
On Tue, May 5, 2020 at 2:21 PM Ashish Kalra wrote: > > From: Ashish Kalra > > Reset the host's page encryption bitmap related to kernel > specific page encryption status settings before we load a > new kernel by kexec. We cannot reset the complete > page encryption bitmap here as we need to retain the > UEFI/OVMF firmware specific settings. > > The host's page encryption bitmap is maintained for the > guest to keep the encrypted/decrypted state of the guest pages, > therefore we need to explicitly mark all shared pages as > encrypted again before rebooting into the new guest kernel. > > Signed-off-by: Ashish Kalra > --- > arch/x86/kernel/kvm.c | 28 > 1 file changed, 28 insertions(+) > > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index 4b29815de873..a8bc30d5b15b 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -35,6 +35,7 @@ > #include > #include > #include > +#include > > static int kvmapf = 1; > > @@ -358,6 +359,33 @@ static void kvm_pv_guest_cpu_reboot(void *unused) > */ > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) > wrmsrl(MSR_KVM_PV_EOI_EN, 0); > + /* > +* Reset the host's page encryption bitmap related to kernel > +* specific page encryption status settings before we load a > +* new kernel by kexec. NOTE: We cannot reset the complete > +* page encryption bitmap here as we need to retain the > +* UEFI/OVMF firmware specific settings. > +*/ > + if (sev_live_migration_enabled() & (smp_processor_id() == 0)) { > + int i; > + unsigned long nr_pages; > + > + for (i = 0; i < e820_table->nr_entries; i++) { > + struct e820_entry *entry = _table->entries[i]; > + unsigned long start_pfn; > + unsigned long end_pfn; > + > + if (entry->type != E820_TYPE_RAM) > + continue; What should the behavior be for other memory types that are not expected to be mucked with by firmware? Should we avoid resetting the enc status of pmem/pram pages? My intuition here is that we should only preserve the enc status of those bits that are set by the firmware. > + > + start_pfn = entry->addr >> PAGE_SHIFT; > + end_pfn = (entry->addr + entry->size) >> PAGE_SHIFT; > + nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE); > + > + kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS, > + entry->addr, nr_pages, 1); > + } > + } > kvm_pv_disable_apf(); > kvm_disable_steal_time(); > } > -- > 2.17.1 >
Re: [PATCH v8 18/18] KVM: SVM: Enable SEV live migration feature implicitly on Incoming VM(s).
On Tue, May 5, 2020 at 2:22 PM Ashish Kalra wrote: > > From: Ashish Kalra > > For source VM, live migration feature is enabled explicitly > when the guest is booting, for the incoming VM(s) it is implied. > This is required for handling A->B->C->... VM migrations case. > > Signed-off-by: Ashish Kalra > --- > arch/x86/kvm/svm/sev.c | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 6f69c3a47583..ba7c0ebfa1f3 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1592,6 +1592,13 @@ int svm_set_page_enc_bitmap(struct kvm *kvm, > if (ret) > goto unlock; > > + /* > +* For source VM, live migration feature is enabled > +* explicitly when the guest is booting, for the > +* incoming VM(s) it is implied. > +*/ > + sev_update_migration_flags(kvm, KVM_SEV_LIVE_MIGRATION_ENABLED); > + > bitmap_copy(sev->page_enc_bmap + BIT_WORD(gfn_start), bitmap, > (gfn_end - gfn_start)); > > -- > 2.17.1 > Reviewed-by: Steve Rutherford
Re: [PATCH v8 16/18] KVM: x86: Mark _bss_decrypted section variables as decrypted in page encryption bitmap.
On Tue, May 5, 2020 at 2:20 PM Ashish Kalra wrote: > > From: Ashish Kalra > > Ensure that _bss_decrypted section variables such as hv_clock_boot and > wall_clock are marked as decrypted in the page encryption bitmap if > sev liv migration is supported. > > Signed-off-by: Ashish Kalra > --- > arch/x86/kernel/kvmclock.c | 12 > 1 file changed, 12 insertions(+) > > diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c > index 34b18f6eeb2c..65777bf1218d 100644 > --- a/arch/x86/kernel/kvmclock.c > +++ b/arch/x86/kernel/kvmclock.c > @@ -334,6 +334,18 @@ void __init kvmclock_init(void) > pr_info("kvm-clock: Using msrs %x and %x", > msr_kvm_system_time, msr_kvm_wall_clock); > > + if (sev_live_migration_enabled()) { > + unsigned long nr_pages; > + /* > +* sizeof(hv_clock_boot) is already PAGE_SIZE aligned > +*/ > + early_set_mem_enc_dec_hypercall((unsigned long)hv_clock_boot, > + 1, 0); > + nr_pages = DIV_ROUND_UP(sizeof(wall_clock), PAGE_SIZE); > + early_set_mem_enc_dec_hypercall((unsigned long)_clock, > + nr_pages, 0); > + } > + > this_cpu_write(hv_clock_per_cpu, _clock_boot[0]); > kvm_register_clock("primary cpu clock"); > pvclock_set_pvti_cpu0_va(hv_clock_boot); > -- > 2.17.1 > Reviewed-by: Steve Rutherford
Re: [PATCH v8 14/18] EFI: Introduce the new AMD Memory Encryption GUID.
On Tue, May 5, 2020 at 2:20 PM Ashish Kalra wrote: > > From: Ashish Kalra > > Introduce a new AMD Memory Encryption GUID which is currently > used for defining a new UEFI enviroment variable which indicates > UEFI/OVMF support for the SEV live migration feature. This variable > is setup when UEFI/OVMF detects host/hypervisor support for SEV > live migration and later this variable is read by the kernel using > EFI runtime services to verify if OVMF supports the live migration > feature. > > Signed-off-by: Ashish Kalra > --- > include/linux/efi.h | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/include/linux/efi.h b/include/linux/efi.h > index 251f1f783cdf..2efb42ccf3a8 100644 > --- a/include/linux/efi.h > +++ b/include/linux/efi.h > @@ -358,6 +358,7 @@ void efi_native_runtime_setup(void); > > /* OEM GUIDs */ > #define DELLEMC_EFI_RCI2_TABLE_GUIDEFI_GUID(0x2d9f28a2, 0xa886, > 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55) > +#define MEM_ENCRYPT_GUID EFI_GUID(0x0cf29b71, 0x9e51, > 0x433a, 0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75) > > typedef struct { > efi_guid_t guid; > -- > 2.17.1 > Have you gotten this GUID upstreamed into edk2? Reviewed-by: Steve Rutherford
Re: [PATCH v8 15/18] KVM: x86: Add guest support for detecting and enabling SEV Live Migration feature.
On Tue, May 5, 2020 at 2:20 PM Ashish Kalra wrote: > > From: Ashish Kalra > > The guest support for detecting and enabling SEV Live migration > feature uses the following logic : > > - kvm_init_plaform() checks if its booted under the EFI > >- If not EFI, > > i) check for the KVM_FEATURE_CPUID > > ii) if CPUID reports that migration is support then issue wrmsrl > to enable the SEV migration support > >- If EFI, > > i) Check the KVM_FEATURE_CPUID. > > ii) If CPUID reports that migration is supported, then reads the UEFI > enviroment variable which > indicates OVMF support for live migration. > > iii) If variable is set then wrmsr to enable the SEV migration support. > > The EFI live migration check is done using a late_initcall() callback. > > Signed-off-by: Ashish Kalra > --- > arch/x86/include/asm/mem_encrypt.h | 11 ++ > arch/x86/kernel/kvm.c | 62 ++ > arch/x86/mm/mem_encrypt.c | 11 ++ > 3 files changed, 84 insertions(+) > > diff --git a/arch/x86/include/asm/mem_encrypt.h > b/arch/x86/include/asm/mem_encrypt.h > index 848ce43b9040..d10e92ae5ca1 100644 > --- a/arch/x86/include/asm/mem_encrypt.h > +++ b/arch/x86/include/asm/mem_encrypt.h > @@ -20,6 +20,7 @@ > > extern u64 sme_me_mask; > extern bool sev_enabled; > +extern bool sev_live_mig_enabled; > > void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, > unsigned long decrypted_kernel_vaddr, > @@ -42,6 +43,8 @@ void __init sme_enable(struct boot_params *bp); > > int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long > size); > int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long > size); > +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, > + bool enc); > > /* Architecture __weak replacement functions */ > void __init mem_encrypt_init(void); > @@ -55,6 +58,7 @@ bool sev_active(void); > #else /* !CONFIG_AMD_MEM_ENCRYPT */ > > #define sme_me_mask0ULL > +#define sev_live_mig_enabled false > > static inline void __init sme_early_encrypt(resource_size_t paddr, > unsigned long size) { } > @@ -76,6 +80,8 @@ static inline int __init > early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return > 0; } > static inline int __init > early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return > 0; } > +static inline void __init > +early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {} > > #define __bss_decrypted > > @@ -102,6 +108,11 @@ static inline u64 sme_get_me_mask(void) > return sme_me_mask; > } > > +static inline bool sev_live_migration_enabled(void) > +{ > + return sev_live_mig_enabled; > +} > + > #endif /* __ASSEMBLY__ */ > > #endif /* __X86_MEM_ENCRYPT_H__ */ > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index 6efe0410fb72..4b29815de873 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -24,6 +24,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -403,6 +404,53 @@ static inline void __set_percpu_decrypted(void *ptr, > unsigned long size) > early_set_memory_decrypted((unsigned long) ptr, size); > } > > +#ifdef CONFIG_EFI > +static bool setup_kvm_sev_migration(void) > +{ > + efi_char16_t efi_Sev_Live_Mig_support_name[] = > L"SevLiveMigrationEnabled"; > + efi_guid_t efi_variable_guid = MEM_ENCRYPT_GUID; > + efi_status_t status; > + unsigned long size; > + bool enabled; > + > + if (!sev_live_migration_enabled()) > + return false; > + > + size = sizeof(enabled); > + > + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { > + pr_info("setup_kvm_sev_migration: no efi\n"); > + return false; > + } > + > + /* Get variable contents into buffer */ > + status = efi.get_variable(efi_Sev_Live_Mig_support_name, > + _variable_guid, NULL, , ); > + > + if (status == EFI_NOT_FOUND) { > + pr_info("setup_kvm_sev_migration: variable not found\n"); > + return false; > + } > + > + if (status != EFI_SUCCESS) { > + pr_info("setup_kvm_sev_migration: get_variable fail\n"); > + return false; > + } > + > + if (enabled == 0) { > + pr_info("setup_kvm_sev_migration: live migration disabled in > OVMF\n"); > + return false; > + } > + > + pr_info("setup_kvm_sev_migration: live migration enabled in OVMF\n"); > + wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED); > + > + return true; > +} > + > +late_initcall(setup_kvm_sev_migration); > +#endif > + > /* > * Iterate through all possible CPUs and map the memory region
Re: [PATCH v8 12/18] KVM: SVM: Add support for static allocation of unified Page Encryption Bitmap.
On Tue, May 5, 2020 at 2:18 PM Ashish Kalra wrote: > > From: Ashish Kalra > > Add support for static allocation of the unified Page encryption bitmap by > extending kvm_arch_commit_memory_region() callack to add svm specific x86_ops > which can read the userspace provided memory region/memslots and calculate > the amount of guest RAM managed by the KVM and grow the bitmap based > on that information, i.e. the highest guest PA that is mapped by a memslot. > > Signed-off-by: Ashish Kalra > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/svm/sev.c | 35 + > arch/x86/kvm/svm/svm.c | 1 + > arch/x86/kvm/svm/svm.h | 1 + > arch/x86/kvm/x86.c | 5 + > 5 files changed, 43 insertions(+) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index fc74144d5ab0..b573ea85b57e 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1254,6 +1254,7 @@ struct kvm_x86_ops { > > bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > + void (*commit_memory_region)(struct kvm *kvm, enum kvm_mr_change > change); > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > unsigned long sz, unsigned long mode); > int (*get_page_enc_bitmap)(struct kvm *kvm, > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 30efc1068707..c0d7043a0627 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1377,6 +1377,41 @@ static int sev_resize_page_enc_bitmap(struct kvm *kvm, > unsigned long new_size) > return 0; > } > > +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change) > +{ > + struct kvm_memslots *slots; > + struct kvm_memory_slot *memslot; > + gfn_t start, end = 0; > + > + spin_lock(>mmu_lock); > + if (change == KVM_MR_CREATE) { > + slots = kvm_memslots(kvm); > + kvm_for_each_memslot(memslot, slots) { > + start = memslot->base_gfn; > + end = memslot->base_gfn + memslot->npages; > + /* > +* KVM memslots is a sorted list, starting with > +* the highest mapped guest PA, so pick the topmost > +* valid guest PA. > +*/ > + if (memslot->npages) > + break; > + } > + } > + spin_unlock(>mmu_lock); > + > + if (end) { > + /* > +* NORE: This callback is invoked in vm ioctl > +* set_user_memory_region, hence we can use a > +* mutex here. > +*/ > + mutex_lock(>lock); > + sev_resize_page_enc_bitmap(kvm, end); > + mutex_unlock(>lock); > + } > +} > + > int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > unsigned long npages, unsigned long enc) > { > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 501e82f5593c..442adbbb0641 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -4015,6 +4015,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { > > .check_nested_events = svm_check_nested_events, > > + .commit_memory_region = svm_commit_memory_region, > .page_enc_status_hc = svm_page_enc_status_hc, > .get_page_enc_bitmap = svm_get_page_enc_bitmap, > .set_page_enc_bitmap = svm_set_page_enc_bitmap, > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 2ebdcce50312..fd99e0a5417a 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -406,6 +406,7 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned long > gpa, > unsigned long npages, unsigned long enc); > int svm_get_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap > *bmap); > int svm_set_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap > *bmap); > +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change); > > /* avic.c */ > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index c4166d7a0493..8938de868d42 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -10133,6 +10133,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > kvm_mmu_change_mmu_pages(kvm, > kvm_mmu_calculate_default_mmu_pages(kvm)); > > + if (change == KVM_MR_CREATE || change == KVM_MR_DELETE) { > + if (kvm_x86_ops.commit_memory_region) > + kvm_x86_ops.commit_memory_region(kvm, change); Why not just call this every time (if it exists) and have the kvm_x86_op determine if it should do anything? It
Re: [PATCH v8 13/18] KVM: x86: Introduce new KVM_FEATURE_SEV_LIVE_MIGRATION feature & Custom MSR.
On Tue, May 5, 2020 at 2:19 PM Ashish Kalra wrote: > > From: Ashish Kalra > > Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check > for host-side support for SEV live migration. Also add a new custom > MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration > feature. > > Signed-off-by: Ashish Kalra > --- > Documentation/virt/kvm/cpuid.rst | 5 + > Documentation/virt/kvm/msr.rst | 10 ++ > arch/x86/include/uapi/asm/kvm_para.h | 5 + > arch/x86/kvm/svm/sev.c | 14 ++ > arch/x86/kvm/svm/svm.c | 16 > arch/x86/kvm/svm/svm.h | 2 ++ > 6 files changed, 52 insertions(+) > > diff --git a/Documentation/virt/kvm/cpuid.rst > b/Documentation/virt/kvm/cpuid.rst > index 01b081f6e7ea..0514523e00cd 100644 > --- a/Documentation/virt/kvm/cpuid.rst > +++ b/Documentation/virt/kvm/cpuid.rst > @@ -86,6 +86,11 @@ KVM_FEATURE_PV_SCHED_YIELD13 guest checks > this feature bit >before using paravirtualized >sched yield. > > +KVM_FEATURE_SEV_LIVE_MIGRATION14 guest checks this feature bit > before > + using the page encryption state > + hypercall to notify the page > state > + change > + > KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side >per-cpu warps are expeced in >kvmclock > diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst > index 33892036672d..7cd7786bbb03 100644 > --- a/Documentation/virt/kvm/msr.rst > +++ b/Documentation/virt/kvm/msr.rst > @@ -319,3 +319,13 @@ data: > > KVM guests can request the host not to poll on HLT, for example if > they are performing polling themselves. > + > +MSR_KVM_SEV_LIVE_MIG_EN: > +0x4b564d06 > + > + Control SEV Live Migration features. > + > +data: > +Bit 0 enables (1) or disables (0) host-side SEV Live Migration > feature. > +Bit 1 enables (1) or disables (0) support for SEV Live Migration > extensions. > +All other bits are reserved. > diff --git a/arch/x86/include/uapi/asm/kvm_para.h > b/arch/x86/include/uapi/asm/kvm_para.h > index 2a8e0b6b9805..d9d4953b42ad 100644 > --- a/arch/x86/include/uapi/asm/kvm_para.h > +++ b/arch/x86/include/uapi/asm/kvm_para.h > @@ -31,6 +31,7 @@ > #define KVM_FEATURE_PV_SEND_IPI11 > #define KVM_FEATURE_POLL_CONTROL 12 > #define KVM_FEATURE_PV_SCHED_YIELD 13 > +#define KVM_FEATURE_SEV_LIVE_MIGRATION 14 > > #define KVM_HINTS_REALTIME 0 > > @@ -50,6 +51,7 @@ > #define MSR_KVM_STEAL_TIME 0x4b564d03 > #define MSR_KVM_PV_EOI_EN 0x4b564d04 > #define MSR_KVM_POLL_CONTROL 0x4b564d05 > +#define MSR_KVM_SEV_LIVE_MIG_EN0x4b564d06 > > struct kvm_steal_time { > __u64 steal; > @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data { > #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK > #define KVM_PV_EOI_DISABLED 0x0 > > +#define KVM_SEV_LIVE_MIGRATION_ENABLED (1 << 0) > +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED(1 << 1) > + > #endif /* _UAPI_ASM_X86_KVM_PARA_H */ > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index c0d7043a0627..6f69c3a47583 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1469,6 +1469,17 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned > long gpa, > return 0; > } > > +void sev_update_migration_flags(struct kvm *kvm, u64 data) > +{ > + struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info; > + > + if (!sev_guest(kvm)) > + return; > + > + if (data & KVM_SEV_LIVE_MIGRATION_ENABLED) > + sev->live_migration_enabled = true; > +} > + > int svm_get_page_enc_bitmap(struct kvm *kvm, >struct kvm_page_enc_bitmap *bmap) > { > @@ -1481,6 +1492,9 @@ int svm_get_page_enc_bitmap(struct kvm *kvm, > if (!sev_guest(kvm)) > return -ENOTTY; > > + if (!sev->live_migration_enabled) > + return -EINVAL; > + > gfn_start = bmap->start_gfn; > gfn_end = gfn_start + bmap->num_pages; > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 442adbbb0641..a99f5457f244 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -2633,6 +2633,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct > msr_data *msr) > svm->msr_decfg = data; > break; > } > + case MSR_KVM_SEV_LIVE_MIG_EN: > + sev_update_migration_flags(vcpu->kvm, data); > + break; > case MSR_IA32_APICBASE: > if (kvm_vcpu_apicv_active(vcpu))
Re: [PATCH v8 11/18] KVM: x86: Introduce KVM_SET_PAGE_ENC_BITMAP ioctl
On Tue, May 5, 2020 at 2:18 PM Ashish Kalra wrote: > > From: Brijesh Singh > > The ioctl can be used to set page encryption bitmap for an > incoming guest. > > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: "H. Peter Anvin" > Cc: Paolo Bonzini > Cc: "Radim Krčmář" > Cc: Joerg Roedel > Cc: Borislav Petkov > Cc: Tom Lendacky > Cc: x...@kernel.org > Cc: k...@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Reviewed-by: Venu Busireddy > Signed-off-by: Brijesh Singh > Signed-off-by: Ashish Kalra > --- > Documentation/virt/kvm/api.rst | 44 + > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/svm/sev.c | 50 + > arch/x86/kvm/svm/svm.c | 1 + > arch/x86/kvm/svm/svm.h | 1 + > arch/x86/kvm/x86.c | 12 > include/uapi/linux/kvm.h| 1 + > 7 files changed, 111 insertions(+) > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > index ecad84086892..fa70017ee693 100644 > --- a/Documentation/virt/kvm/api.rst > +++ b/Documentation/virt/kvm/api.rst > @@ -4663,6 +4663,28 @@ or shared. The bitmap can be used during the guest > migration. If the page > is private then the userspace need to use SEV migration commands to transmit > the page. > > +4.126 KVM_SET_PAGE_ENC_BITMAP (vm ioctl) > +--- > + > +:Capability: basic > +:Architectures: x86 > +:Type: vm ioctl > +:Parameters: struct kvm_page_enc_bitmap (in/out) > +:Returns: 0 on success, -1 on error > + > +/* for KVM_SET_PAGE_ENC_BITMAP */ > +struct kvm_page_enc_bitmap { > + __u64 start_gfn; > + __u64 num_pages; > + union { > + void __user *enc_bitmap; /* one bit per page */ > + __u64 padding2; > + }; > +}; > + > +During the guest live migration the outgoing guest exports its page > encryption > +bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption > +bitmap for an incoming guest. > > 4.125 KVM_S390_PV_COMMAND > - > @@ -4717,6 +4739,28 @@ KVM_PV_VM_VERIFY >Verify the integrity of the unpacked image. Only if this succeeds, >KVM is allowed to start protected VCPUs. > > +4.126 KVM_SET_PAGE_ENC_BITMAP (vm ioctl) > +--- > + > +:Capability: basic > +:Architectures: x86 > +:Type: vm ioctl > +:Parameters: struct kvm_page_enc_bitmap (in/out) > +:Returns: 0 on success, -1 on error > + > +/* for KVM_SET_PAGE_ENC_BITMAP */ > +struct kvm_page_enc_bitmap { > + __u64 start_gfn; > + __u64 num_pages; > + union { > + void __user *enc_bitmap; /* one bit per page */ > + __u64 padding2; > + }; > +}; > + > +During the guest live migration the outgoing guest exports its page > encryption > +bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption > +bitmap for an incoming guest. > > 5. The kvm_run structure > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 9e428befb6a4..fc74144d5ab0 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1258,6 +1258,8 @@ struct kvm_x86_ops { > unsigned long sz, unsigned long mode); > int (*get_page_enc_bitmap)(struct kvm *kvm, > struct kvm_page_enc_bitmap *bmap); > + int (*set_page_enc_bitmap)(struct kvm *kvm, > + struct kvm_page_enc_bitmap *bmap); > }; > > struct kvm_x86_init_ops { > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 387045902470..30efc1068707 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1504,6 +1504,56 @@ int svm_get_page_enc_bitmap(struct kvm *kvm, > return ret; > } > > +int svm_set_page_enc_bitmap(struct kvm *kvm, > + struct kvm_page_enc_bitmap *bmap) > +{ > + struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info; > + unsigned long gfn_start, gfn_end; > + unsigned long *bitmap; > + unsigned long sz; > + int ret; > + > + if (!sev_guest(kvm)) > + return -ENOTTY; > + /* special case of resetting the complete bitmap */ > + if (!bmap->enc_bitmap) { > + mutex_lock(>lock); > + /* by default all pages are marked encrypted */ > + if (sev->page_enc_bmap_size) > + bitmap_fill(sev->page_enc_bmap, > + sev->page_enc_bmap_size); > + mutex_unlock(>lock); > + return 0; > + } > > + > + gfn_start = bmap->start_gfn; > + gfn_end = gfn_start + bmap->num_pages; > + > + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / 8; > + bitmap = kmalloc(sz, GFP_KERNEL); > + if (!bitmap) > + return -ENOMEM; > + > + ret =
Re: [PATCH v8 10/18] mm: x86: Invoke hypercall when page encryption status is changed
On Tue, May 5, 2020 at 2:18 PM Ashish Kalra wrote: > > From: Brijesh Singh > > Invoke a hypercall when a memory region is changed from encrypted -> > decrypted and vice versa. Hypervisor needs to know the page encryption > status during the guest migration. > > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: "H. Peter Anvin" > Cc: Paolo Bonzini > Cc: "Radim Krčmář" > Cc: Joerg Roedel > Cc: Borislav Petkov > Cc: Tom Lendacky > Cc: x...@kernel.org > Cc: k...@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Reviewed-by: Venu Busireddy > Signed-off-by: Brijesh Singh > Signed-off-by: Ashish Kalra > --- > arch/x86/include/asm/paravirt.h | 10 + > arch/x86/include/asm/paravirt_types.h | 2 + > arch/x86/kernel/paravirt.c| 1 + > arch/x86/mm/mem_encrypt.c | 57 ++- > arch/x86/mm/pat/set_memory.c | 7 > 5 files changed, 76 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h > index 694d8daf4983..8127b9c141bf 100644 > --- a/arch/x86/include/asm/paravirt.h > +++ b/arch/x86/include/asm/paravirt.h > @@ -78,6 +78,12 @@ static inline void paravirt_arch_exit_mmap(struct > mm_struct *mm) > PVOP_VCALL1(mmu.exit_mmap, mm); > } > > +static inline void page_encryption_changed(unsigned long vaddr, int npages, > + bool enc) > +{ > + PVOP_VCALL3(mmu.page_encryption_changed, vaddr, npages, enc); > +} > + > #ifdef CONFIG_PARAVIRT_XXL > static inline void load_sp0(unsigned long sp0) > { > @@ -946,6 +952,10 @@ static inline void paravirt_arch_dup_mmap(struct > mm_struct *oldmm, > static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) > { > } > + > +static inline void page_encryption_changed(unsigned long vaddr, int npages, > bool enc) > +{ > +} > #endif > #endif /* __ASSEMBLY__ */ > #endif /* _ASM_X86_PARAVIRT_H */ > diff --git a/arch/x86/include/asm/paravirt_types.h > b/arch/x86/include/asm/paravirt_types.h > index 732f62e04ddb..03bfd515c59c 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h > @@ -215,6 +215,8 @@ struct pv_mmu_ops { > > /* Hook for intercepting the destruction of an mm_struct. */ > void (*exit_mmap)(struct mm_struct *mm); > + void (*page_encryption_changed)(unsigned long vaddr, int npages, > + bool enc); > > #ifdef CONFIG_PARAVIRT_XXL > struct paravirt_callee_save read_cr2; > diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c > index c131ba4e70ef..840c02b23aeb 100644 > --- a/arch/x86/kernel/paravirt.c > +++ b/arch/x86/kernel/paravirt.c > @@ -367,6 +367,7 @@ struct paravirt_patch_template pv_ops = { > (void (*)(struct mmu_gather *, void > *))tlb_remove_page, > > .mmu.exit_mmap = paravirt_nop, > + .mmu.page_encryption_changed= paravirt_nop, > > #ifdef CONFIG_PARAVIRT_XXL > .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2), > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > index f4bd4b431ba1..c9800fa811f6 100644 > --- a/arch/x86/mm/mem_encrypt.c > +++ b/arch/x86/mm/mem_encrypt.c > @@ -19,6 +19,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -29,6 +30,7 @@ > #include > #include > #include > +#include > > #include "mm_internal.h" > > @@ -196,6 +198,47 @@ void __init sme_early_init(void) > swiotlb_force = SWIOTLB_FORCE; > } > > +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages, > + bool enc) > +{ > + unsigned long sz = npages << PAGE_SHIFT; > + unsigned long vaddr_end, vaddr_next; > + > + vaddr_end = vaddr + sz; > + > + for (; vaddr < vaddr_end; vaddr = vaddr_next) { > + int psize, pmask, level; > + unsigned long pfn; > + pte_t *kpte; > + > + kpte = lookup_address(vaddr, ); > + if (!kpte || pte_none(*kpte)) > + return; > + > + switch (level) { > + case PG_LEVEL_4K: > + pfn = pte_pfn(*kpte); > + break; > + case PG_LEVEL_2M: > + pfn = pmd_pfn(*(pmd_t *)kpte); > + break; > + case PG_LEVEL_1G: > + pfn = pud_pfn(*(pud_t *)kpte); > + break; > + default: > + return; > + } > + > + psize = page_level_size(level); > + pmask = page_level_mask(level); > + > + kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS, > + pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, > enc); > + > + vaddr_next = (vaddr & pmask) + psize; > + } > +} > +
Re: [PATCH v8 08/18] KVM: X86: Introduce KVM_HC_PAGE_ENC_STATUS hypercall
On Tue, May 5, 2020 at 2:17 PM Ashish Kalra wrote: > > From: Brijesh Singh > > This hypercall is used by the SEV guest to notify a change in the page > encryption status to the hypervisor. The hypercall should be invoked > only when the encryption attribute is changed from encrypted -> decrypted > and vice versa. By default all guest pages are considered encrypted. > > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: "H. Peter Anvin" > Cc: Paolo Bonzini > Cc: "Radim Krčmář" > Cc: Joerg Roedel > Cc: Borislav Petkov > Cc: Tom Lendacky > Cc: x...@kernel.org > Cc: k...@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Reviewed-by: Venu Busireddy > Signed-off-by: Brijesh Singh > Signed-off-by: Ashish Kalra > --- > Documentation/virt/kvm/hypercalls.rst | 15 + > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/kvm/svm/sev.c| 90 +++ > arch/x86/kvm/svm/svm.c| 2 + > arch/x86/kvm/svm/svm.h| 4 ++ > arch/x86/kvm/vmx/vmx.c| 1 + > arch/x86/kvm/x86.c| 6 ++ > include/uapi/linux/kvm_para.h | 1 + > 8 files changed, 121 insertions(+) > > diff --git a/Documentation/virt/kvm/hypercalls.rst > b/Documentation/virt/kvm/hypercalls.rst > index dbaf207e560d..ff5287e68e81 100644 > --- a/Documentation/virt/kvm/hypercalls.rst > +++ b/Documentation/virt/kvm/hypercalls.rst > @@ -169,3 +169,18 @@ a0: destination APIC ID > > :Usage example: When sending a call-function IPI-many to vCPUs, yield if > any of the IPI target vCPUs was preempted. > + > + > +8. KVM_HC_PAGE_ENC_STATUS > +- > +:Architecture: x86 > +:Status: active > +:Purpose: Notify the encryption status changes in guest page table (SEV > guest) > + > +a0: the guest physical address of the start page > +a1: the number of pages > +a2: encryption attribute > + > + Where: > + * 1: Encryption attribute is set > + * 0: Encryption attribute is cleared > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 42a2d0d3984a..4a8ee22f4f5b 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1254,6 +1254,8 @@ struct kvm_x86_ops { > > bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > + int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > + unsigned long sz, unsigned long mode); > }; > > struct kvm_x86_init_ops { > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 698704defbcd..f088467708f0 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1347,6 +1347,93 @@ static int sev_receive_finish(struct kvm *kvm, struct > kvm_sev_cmd *argp) > return ret; > } > > +static int sev_resize_page_enc_bitmap(struct kvm *kvm, unsigned long > new_size) > +{ > + struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info; > + unsigned long *map; > + unsigned long sz; > + > + if (sev->page_enc_bmap_size >= new_size) > + return 0; > + > + sz = ALIGN(new_size, BITS_PER_LONG) / 8; > + > + map = vmalloc(sz); > + if (!map) { > + pr_err_once("Failed to allocate encrypted bitmap size %lx\n", > + sz); > + return -ENOMEM; > + } > + > + /* mark the page encrypted (by default) */ > + memset(map, 0xff, sz); > + > + bitmap_copy(map, sev->page_enc_bmap, sev->page_enc_bmap_size); > + kvfree(sev->page_enc_bmap); > + > + sev->page_enc_bmap = map; > + sev->page_enc_bmap_size = new_size; > + > + return 0; > +} > + > +int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > + unsigned long npages, unsigned long enc) > +{ > + struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info; > + kvm_pfn_t pfn_start, pfn_end; > + gfn_t gfn_start, gfn_end; > + > + if (!sev_guest(kvm)) > + return -EINVAL; > + > + if (!npages) > + return 0; > + > + gfn_start = gpa_to_gfn(gpa); > + gfn_end = gfn_start + npages; > + > + /* out of bound access error check */ > + if (gfn_end <= gfn_start) > + return -EINVAL; > + > + /* lets make sure that gpa exist in our memslot */ > + pfn_start = gfn_to_pfn(kvm, gfn_start); > + pfn_end = gfn_to_pfn(kvm, gfn_end); > + > + if (is_error_noslot_pfn(pfn_start) && !is_noslot_pfn(pfn_start)) { > + /* > +* Allow guest MMIO range(s) to be added > +* to the page encryption bitmap. > +*/ > + return -EINVAL; > + } > + > + if (is_error_noslot_pfn(pfn_end) && !is_noslot_pfn(pfn_end)) { > + /* > +* Allow guest MMIO range(s) to be added > +*
Re: [PATCH 17/30] KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit
On 5/29/20 8:39 AM, Paolo Bonzini wrote: The control state changes on every L2->L0 vmexit, and we will have to serialize it in the nested state. So keep it up to date in svm->nested.ctl and just copy them back to the nested VMCB in nested_svm_vmexit. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 57 ++- arch/x86/kvm/svm/svm.c| 5 +++- arch/x86/kvm/svm/svm.h| 1 + 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1e5f460b5540..921466eba556 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -234,6 +234,34 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm, svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; } +/* + * Synchronize fields that are written by the processor, so that + * they can be copied back into the nested_vmcb. + */ +void sync_nested_vmcb_control(struct vcpu_svm *svm) +{ + u32 mask; + svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; + svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; + + /* Only a few fields of int_ctl are written by the processor. */ + mask = V_IRQ_MASK | V_TPR_MASK; + if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) && + is_intercept(svm, SVM_EXIT_VINTR)) { + /* +* In order to request an interrupt window, L0 is usurping +* svm->vmcb->control.int_ctl and possibly setting V_IRQ +* even if it was clear in L1's VMCB. Restoring it would be +* wrong. However, in this case V_IRQ will remain true until +* interrupt_window_interception calls svm_clear_vintr and +* restores int_ctl. We can just leave it aside. +*/ + mask &= ~V_IRQ_MASK; + } + svm->nested.ctl.int_ctl&= ~mask; + svm->nested.ctl.int_ctl|= svm->vmcb->control.int_ctl & mask; +} + static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb) { /* Load the nested guest state */ @@ -471,6 +499,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) /* Exit Guest-Mode */ leave_guest_mode(>vcpu); svm->nested.vmcb = 0; + WARN_ON_ONCE(svm->nested.nested_run_pending); /* in case we halted in L2 */ svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -497,8 +526,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.dr6= svm->vcpu.arch.dr6; nested_vmcb->save.cpl= vmcb->save.cpl; - nested_vmcb->control.int_ctl = vmcb->control.int_ctl; - nested_vmcb->control.int_vector= vmcb->control.int_vector; While it's not related to this patch, I am wondering if we need the following line in enter_svm_guest_mode(): svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; If int_vector is used only to trigger a #VMEXIT after we have decided to inject a virtual interrupt, we probably don't the vector value from the nested vmcb. nested_vmcb->control.int_state = vmcb->control.int_state; nested_vmcb->control.exit_code = vmcb->control.exit_code; nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; @@ -510,34 +537,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (svm->nrips_enabled) nested_vmcb->control.next_rip = vmcb->control.next_rip; - /* -* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have -* to make sure that we do not lose injected events. So check event_inj -* here and copy it to exit_int_info if it is valid. -* Exit_int_info and event_inj can't be both valid because the case -* below only happens on a VMRUN instruction intercept which has -* no valid exit_int_info set. -*/ - if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { - struct vmcb_control_area *nc = _vmcb->control; - - nc->exit_int_info = vmcb->control.event_inj; - nc->exit_int_info_err = vmcb->control.event_inj_err; - } - - nested_vmcb->control.tlb_ctl = 0; - nested_vmcb->control.event_inj = 0; - nested_vmcb->control.event_inj_err = 0; + nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl; + nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl; + nested_vmcb->control.event_inj = svm->nested.ctl.event_inj; + nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; nested_vmcb->control.pause_filter_count = svm->vmcb->control.pause_filter_count; nested_vmcb->control.pause_filter_thresh = svm->vmcb->control.pause_filter_thresh; - /* We always set V_INTR_MASKING and remember the old value in hflags */ - if
Re: [PATCH v8 09/18] KVM: x86: Introduce KVM_GET_PAGE_ENC_BITMAP ioctl
On Tue, May 5, 2020 at 2:17 PM Ashish Kalra wrote: > > From: Brijesh Singh > > The ioctl can be used to retrieve page encryption bitmap for a given > gfn range. > > Return the correct bitmap as per the number of pages being requested > by the user. Ensure that we only copy bmap->num_pages bytes in the > userspace buffer, if bmap->num_pages is not byte aligned we read > the trailing bits from the userspace and copy those bits as is. > > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: "H. Peter Anvin" > Cc: Paolo Bonzini > Cc: "Radim Krčmář" > Cc: Joerg Roedel > Cc: Borislav Petkov > Cc: Tom Lendacky > Cc: x...@kernel.org > Cc: k...@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Reviewed-by: Venu Busireddy > Signed-off-by: Brijesh Singh > Signed-off-by: Ashish Kalra > --- > Documentation/virt/kvm/api.rst | 27 + > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/kvm/svm/sev.c | 70 + > arch/x86/kvm/svm/svm.c | 1 + > arch/x86/kvm/svm/svm.h | 1 + > arch/x86/kvm/x86.c | 12 ++ > include/uapi/linux/kvm.h| 12 ++ > 7 files changed, 125 insertions(+) > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > index efbbe570aa9b..ecad84086892 100644 > --- a/Documentation/virt/kvm/api.rst > +++ b/Documentation/virt/kvm/api.rst > @@ -4636,6 +4636,33 @@ This ioctl resets VCPU registers and control > structures according to > the clear cpu reset definition in the POP. However, the cpu is not put > into ESA mode. This reset is a superset of the initial reset. > > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl) > +--- > + > +:Capability: basic > +:Architectures: x86 > +:Type: vm ioctl > +:Parameters: struct kvm_page_enc_bitmap (in/out) > +:Returns: 0 on success, -1 on error > + > +/* for KVM_GET_PAGE_ENC_BITMAP */ > +struct kvm_page_enc_bitmap { > + __u64 start_gfn; > + __u64 num_pages; > + union { > + void __user *enc_bitmap; /* one bit per page */ > + __u64 padding2; > + }; > +}; > + > +The encrypted VMs have the concept of private and shared pages. The private > +pages are encrypted with the guest-specific key, while the shared pages may > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can > +be used to get the bitmap indicating whether the guest page is private > +or shared. The bitmap can be used during the guest migration. If the page > +is private then the userspace need to use SEV migration commands to transmit > +the page. > + > > 4.125 KVM_S390_PV_COMMAND > - > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 4a8ee22f4f5b..9e428befb6a4 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1256,6 +1256,8 @@ struct kvm_x86_ops { > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > unsigned long sz, unsigned long mode); > + int (*get_page_enc_bitmap)(struct kvm *kvm, > + struct kvm_page_enc_bitmap *bmap); > }; > > struct kvm_x86_init_ops { > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index f088467708f0..387045902470 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -1434,6 +1434,76 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned > long gpa, > return 0; > } > > +int svm_get_page_enc_bitmap(struct kvm *kvm, > + struct kvm_page_enc_bitmap *bmap) > +{ > + struct kvm_sev_info *sev = _kvm_svm(kvm)->sev_info; > + unsigned long gfn_start, gfn_end; > + unsigned long sz, i, sz_bytes; > + unsigned long *bitmap; > + int ret, n; > + > + if (!sev_guest(kvm)) > + return -ENOTTY; > + > + gfn_start = bmap->start_gfn; > + gfn_end = gfn_start + bmap->num_pages; > + > + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE; > + bitmap = kmalloc(sz, GFP_KERNEL); > + if (!bitmap) > + return -ENOMEM; > + > + /* by default all pages are marked encrypted */ > + memset(bitmap, 0xff, sz); > + > + mutex_lock(>lock); > + if (sev->page_enc_bmap) { > + i = gfn_start; > + for_each_clear_bit_from(i, sev->page_enc_bmap, > + min(sev->page_enc_bmap_size, gfn_end)) gfn_end is not a size? I believe you want either gfn_end - gfn_start or bmap->num_pages. > + clear_bit(i - gfn_start, bitmap); > + } > + mutex_unlock(>lock); > + > + ret = -EFAULT; > + > + n = bmap->num_pages % BITS_PER_BYTE; > + sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE; > + > + /* > +* Return the correct bitmap as per
Re: [PATCH v2] rtc: rv3028: Add missed check for devm_regmap_init_i2c()
On 28/05/2020 18:39:50+0800, Chuhong Yuan wrote: > rv3028_probe() misses a check for devm_regmap_init_i2c(). > Add the missed check to fix it. > > Fixes: e6e7376cfd7b ("rtc: rv3028: add new driver") > Signed-off-by: Chuhong Yuan > --- > Changes in v2: > - Add fixes tag. > > drivers/rtc/rtc-rv3028.c | 2 ++ > 1 file changed, 2 insertions(+) > Actually, this is the one I applied. -- Alexandre Belloni, Bootlin Embedded Linux and Kernel engineering https://bootlin.com
Re: [PATCH] Revert "f2fs: fix quota_sync failure due to f2fs_lock_op"
On 2020/5/30 9:49, Jaegeuk Kim wrote: > On 05/30, Chao Yu wrote: >> On 2020/5/30 6:34, Jaegeuk Kim wrote: >>> On 05/29, Chao Yu wrote: Under heavy fsstress, we may triggle panic while issuing discard, because __check_sit_bitmap() detects that discard command may earse valid data blocks, the root cause is as below race stack described, since we removed lock when flushing quota data, quota data writeback may race with write_checkpoint(), so that it causes inconsistency in between cached discard entry and segment bitmap. - f2fs_write_checkpoint - block_operations - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH) - f2fs_flush_sit_entries - add_discard_addrs - __set_bit_le(i, (void *)de->discard_map); - f2fs_write_data_pages - f2fs_write_single_data_page : inode is quota one, cp_rwsem won't be locked - f2fs_do_write_data_page - f2fs_allocate_data_block - f2fs_wait_discard_bio : discard entry has not been added yet. - update_sit_entry - f2fs_clear_prefree_segments - f2fs_issue_discard : add discard entry This patch fixes this issue by reverting 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op"). Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op") >>> >>> The previous patch fixes quota_sync gets EAGAIN all the time. >>> How about this? It seems this works for fsstress test. >>> > > Then this? > > --- > fs/f2fs/segment.c | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index ebbadde6cbced..ed11dcf2d69ed 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -3107,6 +3107,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info > *sbi, struct page *page, > type = CURSEG_COLD_DATA; > } > > + /* > + * We need to wait for node_write to avoid block allocation during > + * checkpoint. This can only happen to quota writes which can cause > + * the below discard race condition. > + */ > + if (IS_DATASEG(type)) > + down_write(>node_write); > + > down_read(_I(sbi)->curseg_lock); > > mutex_lock(>curseg_mutex); > @@ -3174,6 +3182,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, > struct page *page, Minor thing: unlock order. if (IS_DATASEG(type)) up_write(>node_write); Could you merge the diff into this patch? > > if (put_pin_sem) > up_read(>pin_sem); > + > + if (IS_DATASEG(type)) > + up_write(>node_write); > } > > static void update_device_state(struct f2fs_io_info *fio) >
Re: [PATCH] rtc: rv3028: add the missed check for devm_regmap_init_i2c
On Wed, 27 May 2020 23:07:04 +0800, Chuhong Yuan wrote: > rv3028_probe() misses a check for devm_regmap_init_i2c(). > Add the missed check to fix it. Applied, thanks! [1/1] rtc: rv3028: add the missed check for devm_regmap_init_i2c Best regards, -- Alexandre Belloni
[PATCH net-next v3] hinic: add set_channels ethtool_ops support
add support to change TX/RX queue number with ethtool -L ethx combined Signed-off-by: Luo bin --- .../net/ethernet/huawei/hinic/hinic_ethtool.c | 40 +++ .../net/ethernet/huawei/hinic/hinic_main.c| 2 +- drivers/net/ethernet/huawei/hinic/hinic_tx.c | 5 +++ 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c index ace18d258049..efb02e03e7da 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c @@ -619,14 +619,37 @@ static void hinic_get_channels(struct net_device *netdev, struct hinic_dev *nic_dev = netdev_priv(netdev); struct hinic_hwdev *hwdev = nic_dev->hwdev; - channels->max_rx = hwdev->nic_cap.max_qps; - channels->max_tx = hwdev->nic_cap.max_qps; - channels->max_other = 0; - channels->max_combined = 0; - channels->rx_count = hinic_hwdev_num_qps(hwdev); - channels->tx_count = hinic_hwdev_num_qps(hwdev); - channels->other_count = 0; - channels->combined_count = 0; + channels->max_combined = nic_dev->max_qps; + channels->combined_count = hinic_hwdev_num_qps(hwdev); +} + +static int hinic_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + unsigned int count = channels->combined_count; + int err; + + netif_info(nic_dev, drv, netdev, "Set max combined queue number from %d to %d\n", + hinic_hwdev_num_qps(nic_dev->hwdev), count); + + if (netif_running(netdev)) { + netif_info(nic_dev, drv, netdev, "Restarting netdev\n"); + hinic_close(netdev); + + nic_dev->hwdev->nic_cap.num_qps = count; + + err = hinic_open(netdev); + if (err) { + netif_err(nic_dev, drv, netdev, + "Failed to open netdev\n"); + return -EFAULT; + } + } else { + nic_dev->hwdev->nic_cap.num_qps = count; + } + + return 0; } static int hinic_get_rss_hash_opts(struct hinic_dev *nic_dev, @@ -1219,6 +1242,7 @@ static const struct ethtool_ops hinic_ethtool_ops = { .get_ringparam = hinic_get_ringparam, .set_ringparam = hinic_set_ringparam, .get_channels = hinic_get_channels, + .set_channels = hinic_set_channels, .get_rxnfc = hinic_get_rxnfc, .set_rxnfc = hinic_set_rxnfc, .get_rxfh_key_size = hinic_get_rxfh_key_size, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index c8ab129a7ae8..e9e6f4c9309a 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -326,7 +326,6 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev) int i, node, err = 0; u16 num_cpus = 0; - nic_dev->max_qps = hinic_hwdev_max_num_qps(hwdev); if (nic_dev->max_qps <= 1) { nic_dev->flags &= ~HINIC_RSS_ENABLE; nic_dev->rss_limit = nic_dev->max_qps; @@ -1031,6 +1030,7 @@ static int nic_dev_init(struct pci_dev *pdev) nic_dev->rq_depth = HINIC_RQ_DEPTH; nic_dev->sriov_info.hwdev = hwdev; nic_dev->sriov_info.pdev = pdev; + nic_dev->max_qps = num_qps; sema_init(_dev->mgmt_lock, 1); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 4c66a0bc1b28..6da761d7a6ef 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -470,6 +470,11 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) struct hinic_txq *txq; struct hinic_qp *qp; + if (unlikely(!netif_carrier_ok(netdev))) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + txq = _dev->txqs[q_id]; qp = container_of(txq->sq, struct hinic_qp, sq); -- 2.17.1
Re: [PATCH] Revert "f2fs: fix quota_sync failure due to f2fs_lock_op"
On 05/30, Chao Yu wrote: > On 2020/5/30 6:34, Jaegeuk Kim wrote: > > On 05/29, Chao Yu wrote: > >> Under heavy fsstress, we may triggle panic while issuing discard, > >> because __check_sit_bitmap() detects that discard command may earse > >> valid data blocks, the root cause is as below race stack described, > >> since we removed lock when flushing quota data, quota data writeback > >> may race with write_checkpoint(), so that it causes inconsistency in > >> between cached discard entry and segment bitmap. > >> > >> - f2fs_write_checkpoint > >> - block_operations > >> - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH) > >> - f2fs_flush_sit_entries > >> - add_discard_addrs > >>- __set_bit_le(i, (void *)de->discard_map); > >>- f2fs_write_data_pages > >> - f2fs_write_single_data_page > >> : inode is quota one, > >> cp_rwsem won't be locked > >> - f2fs_do_write_data_page > >> - f2fs_allocate_data_block > >>- f2fs_wait_discard_bio > >> : discard entry has not > >> been added yet. > >>- update_sit_entry > >> - f2fs_clear_prefree_segments > >> - f2fs_issue_discard > >> : add discard entry > >> > >> This patch fixes this issue by reverting 435cbab95e39 ("f2fs: fix > >> quota_sync > >> failure due to f2fs_lock_op"). > >> > >> Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op") > > > > The previous patch fixes quota_sync gets EAGAIN all the time. > > How about this? It seems this works for fsstress test. > > Then this? --- fs/f2fs/segment.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ebbadde6cbced..ed11dcf2d69ed 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3107,6 +3107,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, type = CURSEG_COLD_DATA; } + /* +* We need to wait for node_write to avoid block allocation during +* checkpoint. This can only happen to quota writes which can cause +* the below discard race condition. +*/ + if (IS_DATASEG(type)) + down_write(>node_write); + down_read(_I(sbi)->curseg_lock); mutex_lock(>curseg_mutex); @@ -3174,6 +3182,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (put_pin_sem) up_read(>pin_sem); + + if (IS_DATASEG(type)) + up_write(>node_write); } static void update_device_state(struct f2fs_io_info *fio) -- 2.27.0.rc0.183.gde8f92d652-goog
[PATCH v3] virtio_vsock: Fix race condition in virtio_transport_recv_pkt
When client on the host tries to connect(SOCK_STREAM, O_NONBLOCK) to the server on the guest, there will be a panic on a ThunderX2 (armv8a server): [ 463.718844] Unable to handle kernel NULL pointer dereference at virtual address [ 463.718848] Mem abort info: [ 463.718849] ESR = 0x9644 [ 463.718852] EC = 0x25: DABT (current EL), IL = 32 bits [ 463.718853] SET = 0, FnV = 0 [ 463.718854] EA = 0, S1PTW = 0 [ 463.718855] Data abort info: [ 463.718856] ISV = 0, ISS = 0x0044 [ 463.718857] CM = 0, WnR = 1 [ 463.718859] user pgtable: 4k pages, 48-bit VAs, pgdp=008f6f6e9000 [ 463.718861] [] pgd= [ 463.718866] Internal error: Oops: 9644 [#1] SMP [...] [ 463.718977] CPU: 213 PID: 5040 Comm: vhost-5032 Tainted: G O 5.7.0-rc7+ #139 [ 463.718980] Hardware name: GIGABYTE R281-T91-00/MT91-FS1-00, BIOS F06 09/25/2018 [ 463.718982] pstate: 6049 (nZCv daif +PAN -UAO) [ 463.718995] pc : virtio_transport_recv_pkt+0x4c8/0xd40 [vmw_vsock_virtio_transport_common] [ 463.718999] lr : virtio_transport_recv_pkt+0x1fc/0xd40 [vmw_vsock_virtio_transport_common] [ 463.719000] sp : 80002dbe3c40 [...] [ 463.719025] Call trace: [ 463.719030] virtio_transport_recv_pkt+0x4c8/0xd40 [vmw_vsock_virtio_transport_common] [ 463.719034] vhost_vsock_handle_tx_kick+0x360/0x408 [vhost_vsock] [ 463.719041] vhost_worker+0x100/0x1a0 [vhost] [ 463.719048] kthread+0x128/0x130 [ 463.719052] ret_from_fork+0x10/0x18 The race condition is as follows: Task1Task2 == __sock_release virtio_transport_recv_pkt __vsock_release vsock_find_bound_socket (found sk) lock_sock_nested vsock_remove_sock sock_orphan sk_set_socket(sk, NULL) sk->sk_shutdown = SHUTDOWN_MASK ... release_sock lock_sock virtio_transport_recv_connecting sk->sk_socket->state (panic!) The root cause is that vsock_find_bound_socket can't hold the lock_sock, so there is a small race window between vsock_find_bound_socket() and lock_sock(). If __vsock_release() is running in another task, sk->sk_socket will be set to NULL inadvertently. This fixes it by checking sk->sk_shutdown(suggested by Stefano) after lock_sock since sk->sk_shutdown is set to SHUTDOWN_MASK under the protection of lock_sock_nested. Signed-off-by: Jia He Cc: sta...@vger.kernel.org Reviewed-by: Stefano Garzarella --- v3: - describe the fix of race condition more clearly - refine the commit log net/vmw_vsock/virtio_transport_common.c | 8 1 file changed, 8 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 69efc891885f..0edda1edf988 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1132,6 +1132,14 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, lock_sock(sk); + /* Check if sk has been released before lock_sock */ + if (sk->sk_shutdown == SHUTDOWN_MASK) { + (void)virtio_transport_reset_no_sock(t, pkt); + release_sock(sk); + sock_put(sk); + goto free_pkt; + } + /* Update CID in case it has changed after a transport reset event */ vsk->local_addr.svm_cid = dst.svm_cid; -- 2.17.1
Re: [PATCH] HID: usbhid: do not sleep when opening device
On Fri, May 29, 2020 at 06:22:49PM -0700, Guenter Roeck wrote: > On Fri, May 29, 2020 at 6:09 PM Dmitry Torokhov > wrote: > > > > On Fri, May 29, 2020 at 05:48:26PM -0700, Guenter Roeck wrote: > > > On Fri, May 29, 2020 at 4:50 PM Nicolas Boichat > > > wrote: > > > > > > > > On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov > > > > wrote: > > > > > > > > > > usbhid tries to give the device 50 milliseconds to drain its queues > > > > > when opening the device, but does it naively by simply sleeping in > > > > > open > > > > > handler, which slows down device probing (and thus may affect overall > > > > > boot time). > > > > > > > > > > However we do not need to sleep as we can instead mark a point of time > > > > > in the future when we should start processing the events. > > > > > > > > > > Reported-by: Nicolas Boichat > > > > > Signed-off-by: Dmitry Torokhov > > > > > --- > > > > > drivers/hid/usbhid/hid-core.c | 27 +++ > > > > > drivers/hid/usbhid/usbhid.h | 1 + > > > > > 2 files changed, 16 insertions(+), 12 deletions(-) > > > > > > > > > > diff --git a/drivers/hid/usbhid/hid-core.c > > > > > b/drivers/hid/usbhid/hid-core.c > > > > > index c7bc9db5b192..e69992e945b2 100644 > > > > > --- a/drivers/hid/usbhid/hid-core.c > > > > > +++ b/drivers/hid/usbhid/hid-core.c > > > > > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid) > > > > > set_bit(HID_NO_BANDWIDTH, > > > > > >iofl); > > > > > } else { > > > > > clear_bit(HID_NO_BANDWIDTH, >iofl); > > > > > + > > > > > + if (test_and_clear_bit(HID_RESUME_RUNNING, > > > > > + >iofl)) { > > > > > + /* > > > > > +* In case events are generated while > > > > > nobody was > > > > > +* listening, some are released when > > > > > the device > > > > > +* is re-opened. Wait 50 msec for the > > > > > queue to > > > > > +* empty before allowing events to go > > > > > through > > > > > +* hid. > > > > > +*/ > > > > > + usbhid->input_start_time = jiffies + > > > > > + > > > > > msecs_to_jiffies(50); > > > > > + } > > > > > } > > > > > } > > > > > spin_unlock_irqrestore(>lock, flags); > > > > > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb) > > > > > if (!test_bit(HID_OPENED, >iofl)) > > > > > break; > > > > > usbhid_mark_busy(usbhid); > > > > > - if (!test_bit(HID_RESUME_RUNNING, >iofl)) { > > > > > + if (!test_bit(HID_RESUME_RUNNING, >iofl) && > > > > > + time_after(jiffies, usbhid->input_start_time)) { > > > > > > > > Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 > > > > days...) > > > > > > > > > > time_after() is overflow-safe. That is why it is used and jiffies is > > > not compared directly. > > > > Well, it is overflow safe, but still can not measure more than 50 days, > > so if you have a device open for 50+ days there will be a 50msec gap > > where it may lose events. > > > > Or you could explicitly use 64-bit jiffies. Indeed. Jiri, Benjamin, do you have preference between jiffies64 and ktime_t? I guess jiffies64 is a tiny bit less expensive. Thanks. -- Dmitry
Re: [PATCH 1/2] rtc: goldfish: Use correct return value for goldfish_rtc_probe()
On Mon, 25 May 2020 09:39:47 +0800, Tiezhu Yang wrote: > When call function devm_platform_ioremap_resource(), we should use IS_ERR() > to check the return value and return PTR_ERR() if failed. Applied, thanks! [1/2] rtc: goldfish: Use correct return value for goldfish_rtc_probe() commit: e712bb5c45313b3ab35ece2fb0a44cdc49548bd9 [2/2] rtc: mpc5121: Use correct return value for mpc5121_rtc_probe() commit: 4c2a13a2d686a7412d862802156eebd0e15df7ad Best regards, -- Alexandre Belloni
Re: [PATCH net-next v2] hinic: add set_channels ethtool_ops support
On 2020/5/30 1:44, Jakub Kicinski wrote: On Thu, 28 May 2020 18:36:33 + Luo bin wrote: add support to change TX/RX queue number with ethtool -L Signed-off-by: Luo bin Luo bin, your patches continue to come with Date: header being in the past. Also suspiciously no time zone offset. Can you address this? +static int hinic_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + unsigned int count = channels->combined_count; + int err; + + if (!count) { + netif_err(nic_dev, drv, netdev, + "Unsupported combined_count: 0\n"); + return -EINVAL; + } This check has been added to the core since the last version of you patch: /* ensure there is at least one RX and one TX channel */ if (!channels.combined_count && (!channels.rx_count || !channels.tx_count)) return -EINVAL; + netif_info(nic_dev, drv, netdev, "Set max combined queue number from %d to %d\n", + hinic_hwdev_num_qps(nic_dev->hwdev), count); + + if (netif_running(netdev)) { + netif_info(nic_dev, drv, netdev, "Restarting netdev\n"); + hinic_close(netdev); + + nic_dev->hwdev->nic_cap.num_qps = count; + + err = hinic_open(netdev); + if (err) { + netif_err(nic_dev, drv, netdev, + "Failed to open netdev\n"); + return -EFAULT; + } + } else { + nic_dev->hwdev->nic_cap.num_qps = count; + } + + return 0; } Will fix. Thanks. .
Re: [PATCH] Revert "f2fs: fix quota_sync failure due to f2fs_lock_op"
On 2020/5/30 6:34, Jaegeuk Kim wrote: > On 05/29, Chao Yu wrote: >> Under heavy fsstress, we may triggle panic while issuing discard, >> because __check_sit_bitmap() detects that discard command may earse >> valid data blocks, the root cause is as below race stack described, >> since we removed lock when flushing quota data, quota data writeback >> may race with write_checkpoint(), so that it causes inconsistency in >> between cached discard entry and segment bitmap. >> >> - f2fs_write_checkpoint >> - block_operations >> - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH) >> - f2fs_flush_sit_entries >> - add_discard_addrs >>- __set_bit_le(i, (void *)de->discard_map); >> - f2fs_write_data_pages >> - f2fs_write_single_data_page >> : inode is quota one, >> cp_rwsem won't be locked >>- f2fs_do_write_data_page >> - f2fs_allocate_data_block >> - f2fs_wait_discard_bio >>: discard entry has not >> been added yet. >> - update_sit_entry >> - f2fs_clear_prefree_segments >> - f2fs_issue_discard >> : add discard entry >> >> This patch fixes this issue by reverting 435cbab95e39 ("f2fs: fix quota_sync >> failure due to f2fs_lock_op"). >> >> Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op") > > The previous patch fixes quota_sync gets EAGAIN all the time. > How about this? It seems this works for fsstress test. > > --- > fs/f2fs/segment.c | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index ebbadde6cbced..f67cffc38975e 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -3095,6 +3095,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info > *sbi, struct page *page, > struct curseg_info *curseg = CURSEG_I(sbi, type); > bool put_pin_sem = false; > > + /* > + * We need to wait for node_write to avoid block allocation during > + * checkpoint. This can only happen to quota writes which can cause > + * the below discard race condition. > + */ > + if (IS_DATASEG(type)) type is CURSEG_COLD_DATA_PINNED, IS_DATASEG(CURSEG_COLD_DATA_PINNED) should be false, then node_write lock will not be held, later type will be updated to CURSEG_COLD_DATA, then we will try to release node_write. Thanks, > + down_write(>node_write); > + > if (type == CURSEG_COLD_DATA) { > /* GC during CURSEG_COLD_DATA_PINNED allocation */ > if (down_read_trylock(>pin_sem)) { > @@ -3174,6 +3182,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, > struct page *page, > > if (put_pin_sem) > up_read(>pin_sem); > + > + if (IS_DATASEG(type)) > + up_write(>node_write); > } > > static void update_device_state(struct f2fs_io_info *fio) >
Re: 28307d938f ("percpu: make pcpu_alloc() aware of current gfp .."): BUG: kernel reboot-without-warning in boot stage
On Fri, May 29, 2020 at 01:32:28PM +0100, Filipe Manana wrote: > > > On 29/05/20 08:16, kernel test robot wrote: > > Greetings, > > > > 0day kernel testing robot got the below dmesg and the first bad commit is > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master > > > > commit 28307d938fb2e4056ed4c982c06d1503d7719813 > > Author: Filipe Manana > > AuthorDate: Thu May 7 18:36:10 2020 -0700 > > Commit: Linus Torvalds > > CommitDate: Thu May 7 19:27:21 2020 -0700 > > > > percpu: make pcpu_alloc() aware of current gfp context > > > > Since 5.7-rc1, on btrfs we have a percpu counter initialization for > > which we always pass a GFP_KERNEL gfp_t argument (this happens since > > commit 2992df73268f78 ("btrfs: Implement DREW lock")). > > > > That is safe in some contextes but not on others where allowing fs > > reclaim could lead to a deadlock because we are either holding some > > btrfs lock needed for a transaction commit or holding a btrfs > > transaction handle open. Because of that we surround the call to the > > function that initializes the percpu counter with a NOFS context using > > memalloc_nofs_save() (this is done at btrfs_init_fs_root()). > > > > However it turns out that this is not enough to prevent a possible > > deadlock because percpu_alloc() determines if it is in an atomic context > > by looking exclusively at the gfp flags passed to it (GFP_KERNEL in this > > case) and it is not aware that a NOFS context is set. > > > > Because percpu_alloc() thinks it is in a non atomic context it locks the > > pcpu_alloc_mutex. This can result in a btrfs deadlock when > > pcpu_balance_workfn() is running, has acquired that mutex and is waiting > > for reclaim, while the btrfs task that called percpu_counter_init() (and > > therefore percpu_alloc()) is holding either the btrfs commit_root > > semaphore or a transaction handle (done fs/btrfs/backref.c: > > iterate_extent_inodes()), which prevents reclaim from finishing as an > > attempt to commit the current btrfs transaction will deadlock. > > > > Lockdep reports this issue with the following trace: > > > > == > > WARNING: possible circular locking dependency detected > > 5.6.0-rc7-btrfs-next-77 #1 Not tainted > > -- > > kswapd0/91 is trying to acquire lock: > > 8938a3b3fdc8 (_node->mutex){+.+.}, at: > > __btrfs_release_delayed_node.part.0+0x3f/0x320 [btrfs] > > > > but task is already holding lock: > > b4f0dbc0 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 > > > > which lock already depends on the new lock. > > > > the existing dependency chain (in reverse order) is: > > > > -> #4 (fs_reclaim){+.+.}: > > fs_reclaim_acquire.part.0+0x25/0x30 > > __kmalloc+0x5f/0x3a0 > > pcpu_create_chunk+0x19/0x230 > > pcpu_balance_workfn+0x56a/0x680 > > process_one_work+0x235/0x5f0 > > worker_thread+0x50/0x3b0 > > kthread+0x120/0x140 > > ret_from_fork+0x3a/0x50 > > > > -> #3 (pcpu_alloc_mutex){+.+.}: > > __mutex_lock+0xa9/0xaf0 > > pcpu_alloc+0x480/0x7c0 > > __percpu_counter_init+0x50/0xd0 > > btrfs_drew_lock_init+0x22/0x70 [btrfs] > > btrfs_get_fs_root+0x29c/0x5c0 [btrfs] > > resolve_indirect_refs+0x120/0xa30 [btrfs] > > find_parent_nodes+0x50b/0xf30 [btrfs] > > btrfs_find_all_leafs+0x60/0xb0 [btrfs] > > iterate_extent_inodes+0x139/0x2f0 [btrfs] > > iterate_inodes_from_logical+0xa1/0xe0 [btrfs] > > btrfs_ioctl_logical_to_ino+0xb4/0x190 [btrfs] > > btrfs_ioctl+0x165a/0x3130 [btrfs] > > ksys_ioctl+0x87/0xc0 > > __x64_sys_ioctl+0x16/0x20 > > do_syscall_64+0x5c/0x260 > > entry_SYSCALL_64_after_hwframe+0x49/0xbe > > > > -> #2 (_info->commit_root_sem){}: > > down_write+0x38/0x70 > > btrfs_cache_block_group+0x2ec/0x500 [btrfs] > > find_free_extent+0xc6a/0x1600 [btrfs] > > btrfs_reserve_extent+0x9b/0x180 [btrfs] > > btrfs_alloc_tree_block+0xc1/0x350 [btrfs] > > alloc_tree_block_no_bg_flush+0x4a/0x60 [btrfs] > > __btrfs_cow_block+0x122/0x5a0 [btrfs] > > btrfs_cow_block+0x106/0x240 [btrfs] > > commit_cowonly_roots+0x55/0x310 [btrfs] > > btrfs_commit_transaction+0x509/0xb20 [btrfs] > > sync_filesystem+0x74/0x90 > > generic_shutdown_super+0x22/0x100 > > kill_anon_super+0x14/0x30 > >
Re: [PATCH] HID: usbhid: do not sleep when opening device
On Fri, May 29, 2020 at 6:09 PM Dmitry Torokhov wrote: > > On Fri, May 29, 2020 at 05:48:26PM -0700, Guenter Roeck wrote: > > On Fri, May 29, 2020 at 4:50 PM Nicolas Boichat > > wrote: > > > > > > On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov > > > wrote: > > > > > > > > usbhid tries to give the device 50 milliseconds to drain its queues > > > > when opening the device, but does it naively by simply sleeping in open > > > > handler, which slows down device probing (and thus may affect overall > > > > boot time). > > > > > > > > However we do not need to sleep as we can instead mark a point of time > > > > in the future when we should start processing the events. > > > > > > > > Reported-by: Nicolas Boichat > > > > Signed-off-by: Dmitry Torokhov > > > > --- > > > > drivers/hid/usbhid/hid-core.c | 27 +++ > > > > drivers/hid/usbhid/usbhid.h | 1 + > > > > 2 files changed, 16 insertions(+), 12 deletions(-) > > > > > > > > diff --git a/drivers/hid/usbhid/hid-core.c > > > > b/drivers/hid/usbhid/hid-core.c > > > > index c7bc9db5b192..e69992e945b2 100644 > > > > --- a/drivers/hid/usbhid/hid-core.c > > > > +++ b/drivers/hid/usbhid/hid-core.c > > > > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid) > > > > set_bit(HID_NO_BANDWIDTH, > > > > >iofl); > > > > } else { > > > > clear_bit(HID_NO_BANDWIDTH, >iofl); > > > > + > > > > + if (test_and_clear_bit(HID_RESUME_RUNNING, > > > > + >iofl)) { > > > > + /* > > > > +* In case events are generated while > > > > nobody was > > > > +* listening, some are released when > > > > the device > > > > +* is re-opened. Wait 50 msec for the > > > > queue to > > > > +* empty before allowing events to go > > > > through > > > > +* hid. > > > > +*/ > > > > + usbhid->input_start_time = jiffies + > > > > + > > > > msecs_to_jiffies(50); > > > > + } > > > > } > > > > } > > > > spin_unlock_irqrestore(>lock, flags); > > > > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb) > > > > if (!test_bit(HID_OPENED, >iofl)) > > > > break; > > > > usbhid_mark_busy(usbhid); > > > > - if (!test_bit(HID_RESUME_RUNNING, >iofl)) { > > > > + if (!test_bit(HID_RESUME_RUNNING, >iofl) && > > > > + time_after(jiffies, usbhid->input_start_time)) { > > > > > > Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 > > > days...) > > > > > > > time_after() is overflow-safe. That is why it is used and jiffies is > > not compared directly. > > Well, it is overflow safe, but still can not measure more than 50 days, > so if you have a device open for 50+ days there will be a 50msec gap > where it may lose events. > Or you could explicitly use 64-bit jiffies. Guenter > I guess we can switch to ktime(). A bit more expensive on 32 bits, but > in reality I do not think anyone would care. > > Thanks. > > -- > Dmitry
Re: [PATCH 1/2] rtc: snvs: Make SNVS clock always prepared
On Fri, 22 May 2020 10:19:55 +0800, Anson Huang wrote: > In IRQ handler, ONLY clock enable/disable is called due to > clock prepare can NOT be called in interrupt context, but > clock enable/disable will return failure if prepare count > is 0, to fix this issue, just make SNVS clock always prepared > there, the SNVS clock has no prepare function implemented, > so it won't impact anything. Applied, thanks! [1/2] rtc: snvs: Make SNVS clock always prepared commit: 20af67700bc39bccd838414128f63a72965de6e7 [2/2] rtc: snvs: Add necessary clock operations for RTC APIs commit: 4b957bde561f3a56865395be06f1be2c196b0b5e Best regards, -- Alexandre Belloni
Darowizna
Nazywam się SHANE MISSLER, wygrałem loterię o wartości 451 milionów dolarów i postanowiłem przekazać tobie kwotę 5500 000,00 €. Przekazuję wam tę darowiznę za miłość, którą mam dla ludzkości i za pomoc osobom dotkniętym pandemią w waszym kraju.Skontaktuj się ze mną, aby odebrać tę darowiznęDziękuję i Bóg zapłać.pozdrowieniaSHANE MISSLER
RE: [PATCH v2] virtio_vsock: Fix race condition in virtio_transport_recv_pkt
Hi Stefano > -Original Message- > From: Stefano Garzarella > Sent: Saturday, May 30, 2020 12:34 AM > To: Justin He > Cc: Stefan Hajnoczi ; David S. Miller > ; Jakub Kicinski ; > k...@vger.kernel.org; virtualizat...@lists.linux-foundation.org; > net...@vger.kernel.org; linux-kernel@vger.kernel.org; Kaly Xin > ; sta...@vger.kernel.org > Subject: Re: [PATCH v2] virtio_vsock: Fix race condition in > virtio_transport_recv_pkt > > On Fri, May 29, 2020 at 11:21:02PM +0800, Jia He wrote: > > When client tries to connect(SOCK_STREAM) the server in the guest with > > NONBLOCK mode, there will be a panic on a ThunderX2 (armv8a server): > > [ 463.718844][ T5040] Unable to handle kernel NULL pointer dereference > at virtual address > > [ 463.718848][ T5040] Mem abort info: > > [ 463.718849][ T5040] ESR = 0x9644 > > [ 463.718852][ T5040] EC = 0x25: DABT (current EL), IL = 32 bits > > [ 463.718853][ T5040] SET = 0, FnV = 0 > > [ 463.718854][ T5040] EA = 0, S1PTW = 0 > > [ 463.718855][ T5040] Data abort info: > > [ 463.718856][ T5040] ISV = 0, ISS = 0x0044 > > [ 463.718857][ T5040] CM = 0, WnR = 1 > > [ 463.718859][ T5040] user pgtable: 4k pages, 48-bit VAs, > pgdp=008f6f6e9000 > > [ 463.718861][ T5040] [] pgd= > > [ 463.718866][ T5040] Internal error: Oops: 9644 [#1] SMP > > [...] > > [ 463.718977][ T5040] CPU: 213 PID: 5040 Comm: vhost-5032 Tainted: G > O 5.7.0-rc7+ #139 > > [ 463.718980][ T5040] Hardware name: GIGABYTE R281-T91-00/MT91-FS1-00, > BIOS F06 09/25/2018 > > [ 463.718982][ T5040] pstate: 6049 (nZCv daif +PAN -UAO) > > [ 463.718995][ T5040] pc : virtio_transport_recv_pkt+0x4c8/0xd40 > [vmw_vsock_virtio_transport_common] > > [ 463.718999][ T5040] lr : virtio_transport_recv_pkt+0x1fc/0xd40 > [vmw_vsock_virtio_transport_common] > > [ 463.719000][ T5040] sp : 80002dbe3c40 > > [...] > > [ 463.719025][ T5040] Call trace: > > [ 463.719030][ T5040] virtio_transport_recv_pkt+0x4c8/0xd40 > [vmw_vsock_virtio_transport_common] > > [ 463.719034][ T5040] vhost_vsock_handle_tx_kick+0x360/0x408 > [vhost_vsock] > > [ 463.719041][ T5040] vhost_worker+0x100/0x1a0 [vhost] > > [ 463.719048][ T5040] kthread+0x128/0x130 > > [ 463.719052][ T5040] ret_from_fork+0x10/0x18 > ^ ^ > Maybe we can remove these two columns from the commit message. > > > > > The race condition as follows: > > Task1Task2 > > == > > __sock_release virtio_transport_recv_pkt > > __vsock_release vsock_find_bound_socket (found) > > lock_sock_nested > > vsock_remove_sock > > sock_orphan > > sk_set_socket(sk, NULL) > > Here we can add: > sk->sk_shutdown = SHUTDOWN_MASK; Indeed. This makes it more clearly -- Cheers, Justin (Jia He) > > > ... > > release_sock > > lock_sock > >virtio_transport_recv_connecting > > sk->sk_socket->state (panic) > > > > The root cause is that vsock_find_bound_socket can't hold the lock_sock, > > so there is a small race window between vsock_find_bound_socket() and > > lock_sock(). If there is __vsock_release() in another task, sk->sk_socket > > will be set to NULL inadvertently. > > > > This fixes it by checking sk->sk_shutdown. > > > > Signed-off-by: Jia He > > Cc: sta...@vger.kernel.org > > Cc: Stefano Garzarella > > --- > > v2: use lightweight checking suggested by Stefano Garzarella > > > > net/vmw_vsock/virtio_transport_common.c | 8 > > 1 file changed, 8 insertions(+) > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c > b/net/vmw_vsock/virtio_transport_common.c > > index 69efc891885f..0edda1edf988 100644 > > --- a/net/vmw_vsock/virtio_transport_common.c > > +++ b/net/vmw_vsock/virtio_transport_common.c > > @@ -1132,6 +1132,14 @@ void virtio_transport_recv_pkt(struct > virtio_transport *t, > > > > lock_sock(sk); > > > > +/* Check if sk has been released before lock_sock */ > > +if (sk->sk_shutdown == SHUTDOWN_MASK) { > > +(void)virtio_transport_reset_no_sock(t, pkt); > > +release_sock(sk); > > +sock_put(sk); > > +goto free_pkt; > > +} > > + > > /* Update CID in case it has changed after a transport reset event */ > > vsk->local_addr.svm_cid = dst.svm_cid; > > > > -- > > 2.17.1 > > > > Anyway, the patch LGTM, let see what David and other say. > > Reviewed-by: Stefano Garzarella > > Thanks, > Stefano IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: [PATCH v2 2/3] seccomp: Introduce addfd ioctl to seccomp user notifier
On Fri, May 29, 2020 at 12:31:37AM -0700, Kees Cook wrote: > On Thu, May 28, 2020 at 04:08:57AM -0700, Sargun Dhillon wrote: > > This adds a seccomp notifier ioctl which allows for the listener to "add" > > file descriptors to a process which originated a seccomp user > > notification. This allows calls like mount, and mknod to be "implemented", > > as the return value, and the arguments are data in memory. On the other > > hand, calls like connect can be "implemented" using pidfd_getfd. > > > > Unfortunately, there are calls which return file descriptors, like > > open, which are vulnerable to TOC-TOU attacks, and require that the > > more privileged supervisor can inspect the argument, and perform the > > syscall on behalf of the process generating the notifiation. This > > allows the file descriptor generated from that open call to be > > returned to the calling process. > > > > In addition, there is funcitonality to allow for replacement of > > specific file descriptors, following dup2-like semantics. > > > > Signed-off-by: Sargun Dhillon > > Suggested-by: Matt Denton > > This looks mostly really clean. When I've got more brain tomorrow I want to > double-check the locking, but I think the use of notify_lock and being > in the ioctl fully protects everything from any use-after-free-like > issues. > > Notes below... > > > +/* valid flags for seccomp_notif_addfd */ > > +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ > > Nit: please use BIT() > > > @@ -735,6 +770,41 @@ static u64 seccomp_next_notify_id(struct > > seccomp_filter *filter) > > return filter->notif->next_id++; > > } > > > > +static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) > > +{ > > + struct socket *sock; > > + int ret, err; > > + > > + /* > > +* Remove the notification, and reset the list pointers, indicating > > +* that it has been handled. > > +*/ > > + list_del_init(>list); > > + > > + ret = security_file_receive(addfd->file); > > + if (ret) > > + goto out; > > + > > + if (addfd->fd == -1) { > > + ret = get_unused_fd_flags(addfd->flags); > > + if (ret >= 0) > > + fd_install(ret, get_file(addfd->file)); > > + } else { > > + ret = replace_fd(addfd->fd, addfd->file, addfd->flags); > > + } > > + > > + /* These are the semantics from copying FDs via SCM_RIGHTS */ > > + sock = sock_from_file(addfd->file, ); > > + if (sock) { > > + sock_update_netprioidx(>sk->sk_cgrp_data); > > + sock_update_classid(>sk->sk_cgrp_data); > > + } > > This made my eye twitch. ;) I see this is borrowed from > scm_detach_fds()... this really feels like the kind of thing that will > quickly go out of sync. I think this "receive an fd" logic needs to be > lifted out of scm_detach_fds() so it and seccomp can share it. I'm not > sure how to parameterize it quite right, though. Perhaps: > > int file_receive(int fd, unsigned long flags, struct file *file) > { > struct socket *sock; > int ret; > > ret = security_file_receive(file); > if (ret) > return ret; > > /* Install the file. */ > if (fd == -1) { > ret = get_unused_fd_flags(flags); > if (ret >= 0) > fd_install(ret, get_file(file)); > } else { > ret = replace_fd(fd, file, flags); > } > > /* Bump the usage count. */ > sock = sock_from_file(addfd->file, ); > if (sock) { > sock_update_netprioidx(>sk->sk_cgrp_data); > sock_update_classid(>sk->sk_cgrp_data); > } > > return ret; > } > > > static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) > { > /* >* Remove the notification, and reset the list pointers, indicating >* that it has been handled. >*/ > list_del_init(>list); > addfd->ret = file_receive(addfd->fd, addfd->flags, addfd->file); > complete(>completion); > } > > scm_detach_fds() > ... > for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i i++, cmfptr++) > { > > err = file_receive(-1, MSG_CMSG_CLOEXEC & msg->msg_flags > ? O_CLOEXEC : 0, fp[i]); > if (err < 0) > break; > err = put_user(err, cmfptr); > if (err) > /* wat */ > } > ... > > I'm not sure on the put_user() failure, though. We could check early > for faults with a put_user(0, cmfptr) before the file_receive() call, or > we could just ignore it? I'm not sure what SCM does here. I guess > worst-case: > > int file_receive(int fd, unsigned long flags, struct file *file, >int __user *fdptr) > { > ... > ret = get_unused_fd_flags(flags); > if (ret >= 0) { > if (cmfptr) { >
[PATCH net-next 3/6] net: hns3: remove two unused macros in hclgevf_cmd.c
Macro hclgevf_ring_to_dma_dir and hclgevf_is_csq defined in hclgevf_cmd.c, but not used, so remove them. Signed-off-by: Huazhong Tan --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index f38d236..fec65239 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -11,9 +11,6 @@ #include "hclgevf_main.h" #include "hnae3.h" -#define hclgevf_is_csq(ring) ((ring)->flag & HCLGEVF_TYPE_CSQ) -#define hclgevf_ring_to_dma_dir(ring) (hclgevf_is_csq(ring) ? \ - DMA_TO_DEVICE : DMA_FROM_DEVICE) #define cmq_ring_to_dev(ring) (&(ring)->dev->pdev->dev) static int hclgevf_ring_space(struct hclgevf_cmq_ring *ring) -- 2.7.4
[PATCH net-next 1/6] net: hns3: fix a print format issue in hclge_mac_mdio_config()
Use %d to print int variable 'ret' in hclge_mac_mdio_config(). Signed-off-by: Huazhong Tan --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 696c5ae..e898207 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -155,7 +155,7 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev) ret = mdiobus_register(mdio_bus); if (ret) { dev_err(mdio_bus->parent, - "Failed to register MDIO bus ret = %#x\n", ret); + "failed to register MDIO bus, ret = %d\n", ret); return ret; } -- 2.7.4
[PATCH net-next 5/6] net: hns3: fix two coding style issues in hclgevf_main.c
Remove a redundant blank line in hclgevf_cmd_set_promisc_mode(), and fix a reverse xmas tree coding style issue in hclgevf_set_rss_tc_mode(). Reported-by: Jian Shen Signed-off-by: Huazhong Tan --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index a8c0e79..1b9578d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -669,8 +669,8 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) u16 tc_size[HCLGEVF_MAX_TC_NUM]; struct hclgevf_desc desc; u16 roundup_size; - int status; unsigned int i; + int status; req = (struct hclgevf_rss_tc_mode_cmd *)desc.data; @@ -1143,7 +1143,6 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, send_msg.en_mc = en_mc_pmc ? 1 : 0; ret = hclgevf_send_mbx_msg(hdev, _msg, false, NULL, 0); - if (ret) dev_err(>pdev->dev, "Set promisc mode fail, status is %d.\n", ret); -- 2.7.4
[PATCH net-next 2/6] net: hns3: remove an unused macro hclge_is_csq
Macro hclge_is_csq defined in hcgle_cmd.c has not been used, so remove it. Signed-off-by: Huazhong Tan --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 64a1d0bd..1d6c328 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -11,8 +11,6 @@ #include "hnae3.h" #include "hclge_main.h" -#define hclge_is_csq(ring) ((ring)->flag & HCLGE_TYPE_CSQ) - #define cmq_ring_to_dev(ring) (&(ring)->dev->pdev->dev) static int hclge_ring_space(struct hclge_cmq_ring *ring) -- 2.7.4
[PATCH net-next 6/6] net: hns3: remove some unused codes in hns3_nic_set_features()
NETIF_F_HW_VLAN_CTAG_FILTER is not set in netdev->hw_feature for the HNS3 driver, so the handler of NETIF_F_HW_VLAN_CTAG_FILTER in hns3_nic_set_features() won't be called, remove it. Reported-by: Jian Shen Signed-off-by: Huazhong Tan --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 9fe40c7..b14f2ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1544,12 +1544,6 @@ static int hns3_nic_set_features(struct net_device *netdev, return ret; } - if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) && - h->ae_algo->ops->enable_vlan_filter) { - enable = !!(features & NETIF_F_HW_VLAN_CTAG_FILTER); - h->ae_algo->ops->enable_vlan_filter(h, enable); - } - if ((changed & NETIF_F_HW_VLAN_CTAG_RX) && h->ae_algo->ops->enable_hw_strip_rxvtag) { enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); -- 2.7.4
[PATCH net-next 4/6] net: hns3: fix an incorrect comment for num_tqps in struct hclgevf_dev
struct hclgevf_dev stands for VF device, its field num_tqps indicates the number of VF's task queue pairs, so the comment is incorrect, replace 'PF' with 'VF'. Reported-by: Jian Shen Signed-off-by: Huazhong Tan --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 738de12..c1fac89 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -278,7 +278,7 @@ struct hclgevf_dev { struct semaphore reset_sem; /* protect reset process */ u32 fw_version; - u16 num_tqps; /* num task queue pairs of this PF */ + u16 num_tqps; /* num task queue pairs of this VF */ u16 alloc_rss_size; /* allocated RSS task queue */ u16 rss_size_max; /* HW defined max RSS task queue */ -- 2.7.4
[PATCH net-next 0/6] net: hns3: adds some cleanups for -next
There are some cleanups for the HNS3 ethernet driver, fix an incorrect print format, an incorrect comment and some coding style issues, also remove some unused codes and macros. Huazhong Tan (6): net: hns3: fix a print format issue in hclge_mac_mdio_config() net: hns3: remove an unused macro hclge_is_csq net: hns3: remove two unused macros in hclgevf_cmd.c net: hns3: fix an incorrect comment for num_tqps in struct hclgevf_dev net: hns3: fix two coding style issues in hclgevf_main.c net: hns3: remove some unused codes in hns3_nic_set_features() drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 6 -- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c| 2 -- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 3 --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +-- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 +- 6 files changed, 3 insertions(+), 15 deletions(-) -- 2.7.4
Re: [PATCH] HID: usbhid: do not sleep when opening device
On Fri, May 29, 2020 at 05:48:26PM -0700, Guenter Roeck wrote: > On Fri, May 29, 2020 at 4:50 PM Nicolas Boichat wrote: > > > > On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov > > wrote: > > > > > > usbhid tries to give the device 50 milliseconds to drain its queues > > > when opening the device, but does it naively by simply sleeping in open > > > handler, which slows down device probing (and thus may affect overall > > > boot time). > > > > > > However we do not need to sleep as we can instead mark a point of time > > > in the future when we should start processing the events. > > > > > > Reported-by: Nicolas Boichat > > > Signed-off-by: Dmitry Torokhov > > > --- > > > drivers/hid/usbhid/hid-core.c | 27 +++ > > > drivers/hid/usbhid/usbhid.h | 1 + > > > 2 files changed, 16 insertions(+), 12 deletions(-) > > > > > > diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c > > > index c7bc9db5b192..e69992e945b2 100644 > > > --- a/drivers/hid/usbhid/hid-core.c > > > +++ b/drivers/hid/usbhid/hid-core.c > > > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid) > > > set_bit(HID_NO_BANDWIDTH, >iofl); > > > } else { > > > clear_bit(HID_NO_BANDWIDTH, >iofl); > > > + > > > + if (test_and_clear_bit(HID_RESUME_RUNNING, > > > + >iofl)) { > > > + /* > > > +* In case events are generated while > > > nobody was > > > +* listening, some are released when the > > > device > > > +* is re-opened. Wait 50 msec for the > > > queue to > > > +* empty before allowing events to go > > > through > > > +* hid. > > > +*/ > > > + usbhid->input_start_time = jiffies + > > > + > > > msecs_to_jiffies(50); > > > + } > > > } > > > } > > > spin_unlock_irqrestore(>lock, flags); > > > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb) > > > if (!test_bit(HID_OPENED, >iofl)) > > > break; > > > usbhid_mark_busy(usbhid); > > > - if (!test_bit(HID_RESUME_RUNNING, >iofl)) { > > > + if (!test_bit(HID_RESUME_RUNNING, >iofl) && > > > + time_after(jiffies, usbhid->input_start_time)) { > > > > Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 > > days...) > > > > time_after() is overflow-safe. That is why it is used and jiffies is > not compared directly. Well, it is overflow safe, but still can not measure more than 50 days, so if you have a device open for 50+ days there will be a 50msec gap where it may lose events. I guess we can switch to ktime(). A bit more expensive on 32 bits, but in reality I do not think anyone would care. Thanks. -- Dmitry
RE: [PATCH] usb: gadget: f_acm: don't disable disabled EP
> > > @@ -425,9 +425,11 @@ static int acm_set_alt(struct usb_function *f, > > > unsigned > intf, unsigned alt) > > > /* we know alt == 0, so this is an activation or a reset */ > > > > > > if (intf == acm->ctrl_id) { > > > - dev_vdbg(>gadget->dev, > > > - "reset acm control interface %d\n", intf); > > > - usb_ep_disable(acm->notify); > > > + if (acm->notify->enabled) { > > > + dev_vdbg(>gadget->dev, > > > + "reset acm control interface %d\n", > > > intf); > > > + usb_ep_disable(acm->notify); > > > + } > > > > But it does not fix any issues, the usb_ep_disable checks 'enabled' flag. > > It generates spurious trace events if you enable them. > You mean the trace events from core.c? If it is, we could try to improve it and indicate it is already enabled or disabled. Peter
Re: [PATCH v2 0/2] fix missing handling of __user in nommu's uaccess()
Hi Luc, On 30/5/20 5:02 am, Luc Van Oostenryck wrote: I received a bug report for an unrelated patch when used with m68k-nommu. It appears that the origin of the problem is that __get_user() and __put_user() doesn't handle correctly __user. These 2 patches fix this. Note: this is only minimaly tested but is quite straightforward and since this only change __user annotation it will not change the generated code. Changes since v1: * fix typo: s/plan/plain/ * appease checkpatch with better style: s/__force*/__force */ * avoid excessive line length caused by the added cast. Luc Van Oostenryck (2): m68k,nommu: add missing __user in uaccess' __ptr() macro m68k,nommu: fix implicit cast from __user in __{get,put}_user_asm() arch/m68k/include/asm/uaccess_no.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) Looks good, thanks for the fixup. Applied to the m68knommu git tree, for-next branch. Regards Greg
[PATCH v11 2/2] mtd: rawnand: Add NAND controller support on Intel LGM SoC
From: Ramuthevar Vadivel Murugan This patch adds the new IP of Nand Flash Controller(NFC) support on Intel's Lightning Mountain(LGM) SoC. DMA is used for burst data transfer operation, also DMA HW supports aligned 32bit memory address and aligned data access by default. DMA burst of 8 supported. Data register used to support the read/write operation from/to device. NAND controller driver implements ->exec_op() to replace legacy hooks, these specific call-back method to execute NAND operations. Signed-off-by: Ramuthevar Vadivel Murugan --- drivers/mtd/nand/raw/Kconfig | 8 + drivers/mtd/nand/raw/Makefile| 1 + drivers/mtd/nand/raw/intel-nand-controller.c | 747 +++ 3 files changed, 756 insertions(+) create mode 100644 drivers/mtd/nand/raw/intel-nand-controller.c diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig index a80a46bb5b8b..75ab2afb78cf 100644 --- a/drivers/mtd/nand/raw/Kconfig +++ b/drivers/mtd/nand/raw/Kconfig @@ -457,6 +457,14 @@ config MTD_NAND_CADENCE Enable the driver for NAND flash on platforms using a Cadence NAND controller. +config MTD_NAND_INTEL_LGM + tristate "Support for NAND controller on Intel LGM SoC" + depends on OF || COMPILE_TEST + depends on HAS_IOMEM + help + Enables support for NAND Flash chips on Intel's LGM SoC. + NAND flash controller interfaced through the External Bus Unit. + comment "Misc" config MTD_SM_COMMON diff --git a/drivers/mtd/nand/raw/Makefile b/drivers/mtd/nand/raw/Makefile index 2d136b158fb7..bfc8fe4d2cb0 100644 --- a/drivers/mtd/nand/raw/Makefile +++ b/drivers/mtd/nand/raw/Makefile @@ -58,6 +58,7 @@ obj-$(CONFIG_MTD_NAND_TEGRA) += tegra_nand.o obj-$(CONFIG_MTD_NAND_STM32_FMC2) += stm32_fmc2_nand.o obj-$(CONFIG_MTD_NAND_MESON) += meson_nand.o obj-$(CONFIG_MTD_NAND_CADENCE) += cadence-nand-controller.o +obj-$(CONFIG_MTD_NAND_INTEL_LGM) += intel-nand-controller.o nand-objs := nand_base.o nand_legacy.o nand_bbt.o nand_timings.o nand_ids.o nand-objs += nand_onfi.o diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c new file mode 100644 index ..564d28978943 --- /dev/null +++ b/drivers/mtd/nand/raw/intel-nand-controller.c @@ -0,0 +1,747 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2020 Intel Corporation. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EBU_CLC0x000 +#define EBU_CLC_RST0xu + +#define EBU_ADDR_SEL(n)(0x20 + (n) * 4) +/* 5 bits 26:22 included for comparison in the ADDR_SELx */ +#define EBU_ADDR_MASK(x) ((x) << 4) +#define EBU_ADDR_SEL_REGEN 0x1 + +#define EBU_BUSCON(n) (0x60 + (n) * 4) +#define EBU_BUSCON_CMULT_V40x1 +#define EBU_BUSCON_RECOVC(n) ((n) << 2) +#define EBU_BUSCON_HOLDC(n)((n) << 4) +#define EBU_BUSCON_WAITRDC(n) ((n) << 6) +#define EBU_BUSCON_WAITWRC(n) ((n) << 8) +#define EBU_BUSCON_BCGEN_CS0x0 +#define EBU_BUSCON_SETUP_ENBIT(22) +#define EBU_BUSCON_ALEC0xC000 + +#define EBU_CON0x0B0 +#define EBU_CON_NANDM_EN BIT(0) +#define EBU_CON_NANDM_DIS 0x0 +#define EBU_CON_CSMUX_E_EN BIT(1) +#define EBU_CON_ALE_P_LOW BIT(2) +#define EBU_CON_CLE_P_LOW BIT(3) +#define EBU_CON_CS_P_LOW BIT(4) +#define EBU_CON_SE_P_LOW BIT(5) +#define EBU_CON_WP_P_LOW BIT(6) +#define EBU_CON_PRE_P_LOW BIT(7) +#define EBU_CON_IN_CS_S(n) ((n) << 8) +#define EBU_CON_OUT_CS_S(n)((n) << 10) +#define EBU_CON_LAT_EN_CS_P((0x3D) << 18) + +#define EBU_WAIT 0x0B4 +#define EBU_WAIT_RDBY BIT(0) +#define EBU_WAIT_WR_C BIT(3) + +#define HSNAND_CTL10x110 +#define HSNAND_CTL1_ADDR_SHIFT 24 + +#define HSNAND_CTL20x114 +#define HSNAND_CTL2_ADDR_SHIFT 8 +#define HSNAND_CTL2_CYC_N_V5 (0x2 << 16) + +#define HSNAND_INT_MSK_CTL 0x124 +#define HSNAND_INT_MSK_CTL_WR_CBIT(4) + +#define HSNAND_INT_STA 0x128 +#define HSNAND_INT_STA_WR_CBIT(4) + +#define HSNAND_CTL 0x130 +#define HSNAND_CTL_ENABLE_ECC BIT(0) +#define HSNAND_CTL_GO BIT(2) +#define HSNAND_CTL_CE_SEL_CS(n)BIT(3 + (n)) +#define HSNAND_CTL_RW_READ 0x0 +#define HSNAND_CTL_RW_WRITEBIT(10) +#define HSNAND_CTL_ECC_OFF_V8THBIT(11) +#define HSNAND_CTL_CKFF_EN 0x0 +#define HSNAND_CTL_MSG_EN BIT(17) + +#define HSNAND_PARA0 0x13c +#define HSNAND_PARA0_PAGE_V81920x3 +#define HSNAND_PARA0_PIB_V256 (0x3 << 4) +#define HSNAND_PARA0_BYP_EN_NP 0x0 +#define HSNAND_PARA0_BYP_DEC_NP0x0 +#define HSNAND_PARA0_TYPE_ONFI BIT(18)
[PATCH v11 1/2] dt-bindings: mtd: Add Nand Flash Controller support for Intel LGM SoC
From: Ramuthevar Vadivel Murugan Add YAML file for dt-bindings to support NAND Flash Controller on Intel's Lightning Mountain SoC. Signed-off-by: Ramuthevar Vadivel Murugan --- .../devicetree/bindings/mtd/intel,lgm-nand.yaml| 99 ++ 1 file changed, 99 insertions(+) create mode 100644 Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml diff --git a/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml b/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml new file mode 100644 index ..313daec4d783 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mtd/intel,lgm-nand.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Intel LGM SoC NAND Controller Device Tree Bindings + +allOf: + - $ref: "nand-controller.yaml" + +maintainers: + - Ramuthevar Vadivel Murugan + +properties: + compatible: +const: intel,lgm-nand + + reg: +maxItems: 6 + + reg-names: +items: + - const: ebunand + - const: hsnand + - const: nand_cs0 + - const: nand_cs1 + - const: addr_sel0 + - const: addr_sel1 + + clocks: +maxItems: 1 + + dmas: +maxItems: 2 + + dma-names: +items: + - const: tx + - const: rx + + "#address-cells": +const: 1 + + "#size-cells": +const: 0 + +patternProperties: + "^nand@[a-f0-9]+$": +type: object +properties: + reg: +minimum: 0 +maximum: 7 + + nand-ecc-mode: true + + nand-ecc-algo: +const: hw + +additionalProperties: false + +required: + - compatible + - reg + - reg-names + - clocks + - dmas + - dma-names + - "#address-cells" + - "#size-cells" + +additionalProperties: false + +examples: + - | +nand-controller@e0f0 { + compatible = "intel,lgm-nand"; + reg = <0xe0f0 0x100>, +<0xe100 0x300>, +<0xe140 0x8000>, +<0xe1c0 0x1000>, +<0x1740 0x4>, +<0x17c0 0x4>; + reg-names = "ebunand", "hsnand", "nand_cs0", "nand_cs1", +"addr_sel0", "addr_sel1"; + clocks = < 125>; + dmas = < 8>, < 9>; + dma-names = "tx", "rx"; + #address-cells = <1>; + #size-cells = <0>; + + nand@0 { +reg = <0>; +nand-ecc-mode = "hw"; + }; +}; + +... -- 2.11.0
Re: [PATCH 1/2] objtool: Rename rela to reloc
On Fri, May 29, 2020 at 06:22:00PM -0500, Josh Poimboeuf wrote: > On Fri, May 29, 2020 at 02:01:13PM -0700, Matt Helsley wrote: > > Before supporting additional relocation types rename the relevant > > types and functions from "rela" to "reloc". This work be done with > > the following regex: > > > > sed -i -e 's/struct rela/struct reloc/g' \ > > -e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \ > > -e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \ > > -e 's/relasec/relocsec/g' \ > > -e 's/rela_list/reloc_list/g' \ > > -e 's/rela_hash/reloc_hash/g' \ > > -e 's/add_rela/add_reloc/g' \ > > -e 's/rela->/reloc->/g' \ > > -e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \ > > -e 's/rela =/reloc =/g' \ > > -e 's/relas =/relocs =/g' \ > > -e 's/relas\[/relocs[/g' \ > > -e 's/relaname =/relocname =/g' \ > > -e 's/= rela\;/= reloc\;/g' \ > > -e 's/= relas\;/= relocs\;/g' \ > > -e 's/= relaname\;/= relocname\;/g' \ > > -e 's/, rela)/, reloc)/g' \ > > -e 's/, relaname/, relocname/g' \ > > -e 's/sec->rela/sec->reloc/g' \ > > -e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \ > > arch.h \ > > arch/x86/decode.c \ > > check.c \ > > check.h \ > > elf.c \ > > elf.h \ > > orc_gen.c \ > > special.c \ > > Holy regex! Thanks for doing that :-) So I was rebasing my future patches and I found a few spots where objtool warning strings and code comments weren't fixed-up to consistent. Here's the new, complete regex -- it includes the original changes and the missed bits (e.g. note the new substitution for @rela comments): sed -i -e 's/struct rela/struct reloc/g' \ -e 's/\([_\*]\)rela\(s\{0,1\}\)/\1reloc\2/g' \ -e 's/tmprela\(s\{0,1\}\)/tmpreloc\1/g' \ -e 's/relasec/relocsec/g' \ -e 's/rela_list/reloc_list/g' \ -e 's/rela_hash/reloc_hash/g' \ -e 's/add_rela/add_reloc/g' \ -e 's/rela->/reloc->/g' \ -e '/rela[,\.]/{ s/\([^\.>]\)rela\([\.,]\)/\1reloc\2/g ; }' \ -e 's/rela =/reloc =/g' \ -e 's/relas =/relocs =/g' \ -e 's/relas\[/relocs[/g' \ -e 's/relaname =/relocname =/g' \ -e 's/= rela\;/= reloc\;/g' \ -e 's/= relas\;/= relocs\;/g' \ -e 's/= relaname\;/= relocname\;/g' \ -e 's/, rela)/, reloc)/g' \ -e 's/\([ @]\)rela\([ "]\)/\1reloc\2/g' \ -e 's/ rela$/ reloc/g' \ -e 's/, relaname/, relocname/g' \ -e 's/sec->rela/sec->reloc/g' \ -e 's/(\(!\{0,1\}\)rela/(\1reloc/g' \ arch.h \ arch/x86/decode.c \ check.c \ check.h \ elf.c \ elf.h \ orc_gen.c \ special.c If you want to fixup this commit you could just re-run the regex after applying it but before applying the next commit. Cheers, -Matt Helsley
[PATCH v11 0/2] mtd: rawnand: Add NAND controller support on Intel LGM SoC
This patch adds the new IP of Nand Flash Controller(NFC) support on Intel's Lightning Mountain(LGM) SoC. DMA is used for burst data transfer operation, also DMA HW supports aligned 32bit memory address and aligned data access by default. DMA burst of 8 supported. Data register used to support the read/write operation from/to device. NAND controller also supports in-built HW ECC engine. NAND controller driver implements ->exec_op() to replace legacy hooks, these specific call-back method to execute NAND operations. Thanks Boris, Andy, Arnd and Rob for the review comments and suggestions. --- v11: - No Change v10: - No Change v9: - No change v8: - fix the kbuild bot warnings - correct the typo's v7: - indentation issue is fixed - add error check for retrieve the resource from dt v6: - update EBU_ADDR_SELx register base value build it from DT - Add tabs in in Kconfig v5: - replace by 'HSNAND_CLE_OFFS | HSNAND_CS_OFFS' to NAND_WRITE_CMD and NAND_WRITE_ADDR - remove the unused macros - update EBU_ADDR_MASK(x) macro - update the EBU_ADDR_SELx register values to be written v4: - add ebu_nand_cs structure for multiple-CS support - mask/offset encoding for 0x51 value - update macro HSNAND_CTL_ENABLE_ECC - drop the op argument and un-used macros. - updated the datatype and macros - add function disable nand module - remove ebu_host->dma_rx = NULL; - rename MMIO address range variables to ebu and hsnand - implement ->setup_data_interface() - update label err_cleanup_nand and err_cleanup_dma - add return value check in the nand_remove function - add/remove tabs and spaces as per coding standard - encoded CS ids by reg property v3: - Add depends on MACRO in Kconfig - file name update in Makefile - file name update to intel-nand-controller - modification of MACRO divided like EBU, HSNAND and NAND - add NAND_ALE_OFFS, NAND_CLE_OFFS and NAND_CS_OFFS - rename lgm_ to ebu_ and _va suffix is removed in the whole file - rename structure and varaibles as per review comments. - remove lgm_read_byte(), lgm_dev_ready() and cmd_ctrl() un-used function - update in exec_op() as per review comments - rename function lgm_dma_exit() by lgm_dma_cleanup() - hardcoded magic value for base and offset replaced by MACRO defined - mtd_device_unregister() + nand_cleanup() instead of nand_release() v2: - implement the ->exec_op() to replaces the legacy hook-up. - update the commit message - add MIPS maintainers and xway_nand driver author in CC v1: - initial version dt-bindings: mtd: Add Nand Flash Controller support for Intel LGM SoC --- v11: - Fixed the compatible issue with example 10: - fix bot errors v9: - Rob's review comments address - dual licensed - compatible change - add reg-names - drop clock-names and clock-cells - correct typo's v8: No change v7: - Rob's review comments addressed - dt-schema build issue fixed with upgraded dt-schema v6: - Rob's review comments addressed in YAML file - add addr_sel0 and addr_sel1 reg-names in YAML example v5: - add the example in YAML file v4: - No change v3: - No change v2: YAML compatible string update to intel, lgm-nand-controller v1: - initial version Ramuthevar Vadivel Murugan (2): dt-bindings: mtd: Add Nand Flash Controller support for Intel LGM SoC mtd: rawnand: Add NAND controller support on Intel LGM SoC .../devicetree/bindings/mtd/intel,lgm-nand.yaml| 99 +++ drivers/mtd/nand/raw/Kconfig | 8 + drivers/mtd/nand/raw/Makefile | 1 + drivers/mtd/nand/raw/intel-nand-controller.c | 747 + 4 files changed, 855 insertions(+) create mode 100644 Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml create mode 100644 drivers/mtd/nand/raw/intel-nand-controller.c -- 2.11.0
Re: [PATCH] HID: usbhid: do not sleep when opening device
On Fri, May 29, 2020 at 4:50 PM Nicolas Boichat wrote: > > On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov > wrote: > > > > usbhid tries to give the device 50 milliseconds to drain its queues > > when opening the device, but does it naively by simply sleeping in open > > handler, which slows down device probing (and thus may affect overall > > boot time). > > > > However we do not need to sleep as we can instead mark a point of time > > in the future when we should start processing the events. > > > > Reported-by: Nicolas Boichat > > Signed-off-by: Dmitry Torokhov > > --- > > drivers/hid/usbhid/hid-core.c | 27 +++ > > drivers/hid/usbhid/usbhid.h | 1 + > > 2 files changed, 16 insertions(+), 12 deletions(-) > > > > diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c > > index c7bc9db5b192..e69992e945b2 100644 > > --- a/drivers/hid/usbhid/hid-core.c > > +++ b/drivers/hid/usbhid/hid-core.c > > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid) > > set_bit(HID_NO_BANDWIDTH, >iofl); > > } else { > > clear_bit(HID_NO_BANDWIDTH, >iofl); > > + > > + if (test_and_clear_bit(HID_RESUME_RUNNING, > > + >iofl)) { > > + /* > > +* In case events are generated while > > nobody was > > +* listening, some are released when the > > device > > +* is re-opened. Wait 50 msec for the queue > > to > > +* empty before allowing events to go > > through > > +* hid. > > +*/ > > + usbhid->input_start_time = jiffies + > > + > > msecs_to_jiffies(50); > > + } > > } > > } > > spin_unlock_irqrestore(>lock, flags); > > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb) > > if (!test_bit(HID_OPENED, >iofl)) > > break; > > usbhid_mark_busy(usbhid); > > - if (!test_bit(HID_RESUME_RUNNING, >iofl)) { > > + if (!test_bit(HID_RESUME_RUNNING, >iofl) && > > + time_after(jiffies, usbhid->input_start_time)) { > > Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 > days...) > time_after() is overflow-safe. That is why it is used and jiffies is not compared directly. Guenter > > hid_input_report(urb->context, HID_INPUT_REPORT, > > urb->transfer_buffer, > > urb->actual_length, 1); > > @@ -714,17 +728,6 @@ static int usbhid_open(struct hid_device *hid) > > } > > > > usb_autopm_put_interface(usbhid->intf); > > - > > - /* > > -* In case events are generated while nobody was listening, > > -* some are released when the device is re-opened. > > -* Wait 50 msec for the queue to empty before allowing events > > -* to go through hid. > > -*/ > > - if (res == 0) > > - msleep(50); > > - > > - clear_bit(HID_RESUME_RUNNING, >iofl); > > return res; > > } > > > > diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h > > index 8620408bd7af..805949671b96 100644 > > --- a/drivers/hid/usbhid/usbhid.h > > +++ b/drivers/hid/usbhid/usbhid.h > > @@ -82,6 +82,7 @@ struct usbhid_device { > > > > spinlock_t lock;/* > > fifo spinlock */ > > unsigned long iofl; /* > > I/O flags (CTRL_RUNNING, OUT_RUNNING) */ > > + unsigned long input_start_time; /* > > When to start handling input, in jiffies */ > > struct timer_list io_retry; /* > > Retry timer */ > > unsigned long stop_retry; /* > > Time to give up, in jiffies */ > > unsigned int retry_delay; /* > > Delay length in ms */ > > -- > > 2.27.0.rc0.183.gde8f92d652-goog > > > > > > -- > > Dmitry
Re: [net-next 1/6] net: marvell: prestera: Add driver for Prestera family ASIC devices
Hi David, On Fri, May 29, 2020 at 05:18:39PM -0700, David Miller wrote: > > Please remove all of the __packed attributes. > > I looked at your data structures and all of them use fixed sized types > and are multiples of 4 so the __packed attribute is completely > unnecessary. > > The alignment attribute is also unnecessary so please remove that too. Some of the fields are u8, so I assume there might be holes added by the compiler ? Also these attributes guarantee some ABI compatibility with FW side, I will try to remove them and check but it sounds for me a bit risky.
[PATCH] lib: make a test module with get_count_order/long
A test module to make sure get_count_order/long returns the correct result. Signed-off-by: Wei Yang --- lib/Kconfig.debug | 13 ++ lib/Makefile | 2 + lib/test_getorder.c| 64 ++ tools/testing/selftests/lib/config | 1 + 4 files changed, 80 insertions(+) create mode 100644 lib/test_getorder.c diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c0ef216bb803..01e671151f42 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1999,6 +1999,19 @@ config TEST_BITOPS If unsure, say N. +config TEST_GETORDER + tristate "Test module for compilation of get_count_order operations" + depends on m + help + This builds the "test_getorder" module that is much like the + TEST_LKM module except that it does a basic exercise of the + get_count_order and get_count_order_long to make sure there are no + compiler warnings from C=1 sparse checker or -Wextra compilations. + It has no dependencies and doesn't run or load unless explicitly + requested by name. For example: modprobe test_getorder. + + If unsure, say N. + config TEST_VMALLOC tristate "Test module for stress/performance analysis of vmalloc allocator" default n diff --git a/lib/Makefile b/lib/Makefile index 0d942f7c7478..806d4df8f7c7 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -81,6 +81,8 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_BITOPS) += test_bitops.o CFLAGS_test_bitops.o += -Werror +obj-$(CONFIG_TEST_GETORDER) += test_getorder.o +CFLAGS_test_getorder.o += -Werror obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o diff --git a/lib/test_getorder.c b/lib/test_getorder.c new file mode 100644 index ..6492abc793af --- /dev/null +++ b/lib/test_getorder.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Author: Wei Yang + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +/* a tiny module only meant to test get_count_order/long */ +unsigned int order_comb[][2] = { + {0x0003, 2}, + {0x0004, 2}, + {0x1fff, 13}, + {0x2000, 13}, + {0x5000, 31}, + {0x8000, 31}, + {0x80003000, 32}, +}; + +unsigned long order_comb_long[][2] = { + {0x0003, 34}, + {0x0004, 34}, + {0x1fff, 45}, + {0x2000, 45}, + {0x5000, 63}, + {0x8000, 63}, + {0x80003000, 64}, +}; + +static int __init test_getorder_startup(void) +{ + int i; + + pr_warn("Loaded test module\n"); + for (i = 0; i < ARRAY_SIZE(order_comb); i++) { + if (order_comb[i][1] != get_count_order(order_comb[i][0])) + pr_warn("get_count_order wrong for %x\n", + order_comb[i][0]); + } + + for (i = 0; i < ARRAY_SIZE(order_comb_long); i++) { + if (order_comb_long[i][1] != + get_count_order_long(order_comb_long[i][0])) + pr_warn("get_count_order_long wrong for %lx\n", + order_comb_long[i][0]); + } + + return 0; +} + +static void __exit test_getorder_unstartup(void) +{ + pr_warn("Unloaded test module\n"); +} + +module_init(test_getorder_startup); +module_exit(test_getorder_unstartup); + +MODULE_AUTHOR("Wei Yang "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("get_count_order/long testing module"); diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index b80ee3f6e265..2ad467d34648 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -3,3 +3,4 @@ CONFIG_TEST_BITMAP=m CONFIG_PRIME_NUMBERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_BITOPS=m +CONFIG_TEST_GETORDER=m -- 2.23.0
Re: [PATCH v7 0/3] perf arm-spe: Add support for synthetic events
On Fri, May 29, 2020 at 01:18:30PM -0300, Arnaldo Carvalho de Melo wrote: > Em Fri, May 29, 2020 at 11:28:01PM +0800, Leo Yan escreveu: > > Hi James, > > > > On Fri, May 29, 2020 at 03:58:23PM +0100, James Clark wrote: > > > Hi Will and Leo, > > > > > > I've tested this on an Arm N1 machine and it looks good to me. > > > > This is great! Will respin the new patch set with adding your test tag > > and send to ML. Thanks a lot for the effort. > > > > Hi Will, Arnaldo, sorry for late replying you due to other works and > > thanks for suggestions in other emails. > > Np, please do it on top of my tmp.perf/core branch, it'll become > perf/core as soon as testing that is ongoing finishes. Understood, will do. Thanks, Leo
Re: [PATCH v10 1/2] dt-bindings: mtd: Add Nand Flash Controller support for Intel LGM SoC
Hi Rob, On 30/5/2020 3:31 am, Rob Herring wrote: On Thu, May 28, 2020 at 11:39:28PM +0800, Ramuthevar,Vadivel MuruganX wrote: From: Ramuthevar Vadivel Murugan Add YAML file for dt-bindings to support NAND Flash Controller on Intel's Lightning Mountain SoC. Signed-off-by: Ramuthevar Vadivel Murugan --- .../devicetree/bindings/mtd/intel,lgm-nand.yaml| 93 ++ 1 file changed, 93 insertions(+) create mode 100644 Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml diff --git a/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml b/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml new file mode 100644 index ..afecc9920e04 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/intel,lgm-nand.yaml @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id:http://devicetree.org/schemas/mtd/intel,lgm-nand.yaml# +$schema:http://devicetree.org/meta-schemas/core.yaml# + +title: Intel LGM SoC NAND Controller Device Tree Bindings + +allOf: + - $ref: "nand-controller.yaml" + +maintainers: + - Ramuthevar Vadivel Murugan + +properties: + compatible: +const: intel,lgm-nand-controller Doesn't match the example. Thank you for the review comments... if we add the compatible = intel,lgm-nand-controller it throws an error like below.. /builds/robherring/linux-dt-review/Documentation/devicetree/bindings/mtd/intel,lgm-nand.example.dt.yaml: nand-controller@e0f0: '#address-cells', '#size-cells' do not match any of the regexes: '^nand@[a-f0-9]+$', 'pinctrl-[0-9]+' /builds/robherring/linux-dt-review/Documentation/devicetree/bindings/mtd/intel,lgm-nand.example.dt.yaml: nand-controller@e0f0: nand@0: '#address-cells', '#size-cells', 'nand-on-flash-bbt' do not match any of the regexes: 'pinctrl-[0-9]+' referred from this file :Documentation/devicetree/bindings/mtd/nand-controller.yaml fixed the compatible and example doesn't match issue. Regards Vadivel
Re: [net-next 1/6] net: marvell: prestera: Add driver for Prestera family ASIC devices
Please remove all of the __packed attributes. I looked at your data structures and all of them use fixed sized types and are multiples of 4 so the __packed attribute is completely unnecessary. The alignment attribute is also unnecessary so please remove that too.
Re: [PATCH][net-next] nexthop: fix incorrect allocation failure on nhg->spare
From: Colin King Date: Thu, 28 May 2020 15:51:14 +0100 > @@ -1185,7 +1185,7 @@ static struct nexthop *nexthop_create_group(struct net > *net, > > /* spare group used for removals */ > nhg->spare = nexthop_grp_alloc(num_nh); I don't even see this line in the current net-next tree nor any references to ->spare. What am I missing?
Re: KASAN: use-after-free Read in ib_uverbs_remove_one
On Thu, May 28, 2020 at 07:33:16AM -0700, syzbot wrote: > Hello, > > syzbot found the following crash on: > > HEAD commit:c11d28ab Add linux-next specific files for 20200522 > git tree: linux-next > console output: https://syzkaller.appspot.com/x/log.txt?x=12fc3e7210 > kernel config: https://syzkaller.appspot.com/x/.config?x=3f6dbdea4159fb66 > dashboard link: https://syzkaller.appspot.com/bug?extid=478fd0d54412b8759e0d > compiler: gcc (GCC) 9.0.0 20181231 (experimental) > > Unfortunately, I don't have any reproducer for this crash yet. > > IMPORTANT: if you fix the bug, please add the following tag to the commit: > Reported-by: syzbot+478fd0d54412b8759...@syzkaller.appspotmail.com > > == > BUG: KASAN: use-after-free in ib_uverbs_remove_one+0x411/0x4e0 > drivers/infiniband/core/uverbs_main.c:1210 > Read of size 4 at addr 888096324578 by task syz-executor.2/15847 > > CPU: 1 PID: 15847 Comm: syz-executor.2 Not tainted > 5.7.0-rc6-next-20200522-syzkaller #0 > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS > Google 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:77 [inline] > dump_stack+0x18f/0x20d lib/dump_stack.c:118 > print_address_description.constprop.0.cold+0xd3/0x413 mm/kasan/report.c:383 > __kasan_report mm/kasan/report.c:513 [inline] > kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 > ib_uverbs_remove_one+0x411/0x4e0 drivers/infiniband/core/uverbs_main.c:1210 > remove_client_context+0xbe/0x110 drivers/infiniband/core/device.c:732 > disable_device+0x13b/0x230 drivers/infiniband/core/device.c:1278 > __ib_unregister_device+0x91/0x180 drivers/infiniband/core/device.c:1445 > ib_unregister_device_and_put+0x57/0x80 drivers/infiniband/core/device.c:1508 > nldev_dellink+0x20a/0x310 drivers/infiniband/core/nldev.c:1571 > rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline] > rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] > rdma_nl_rcv+0x586/0x900 drivers/infiniband/core/netlink.c:259 > netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] > netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329 > netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918 > sock_sendmsg_nosec net/socket.c:652 [inline] > sock_sendmsg+0xcf/0x120 net/socket.c:672 > sys_sendmsg+0x6e6/0x810 net/socket.c:2352 > ___sys_sendmsg+0x100/0x170 net/socket.c:2406 > __sys_sendmsg+0xe5/0x1b0 net/socket.c:2439 > do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295 > entry_SYSCALL_64_after_hwframe+0x49/0xb3 > RIP: 0033:0x45ca29 > Code: 0d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 > 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f > 83 db b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 > RSP: 002b:7fd6764d1c78 EFLAGS: 0246 ORIG_RAX: 002e > RAX: ffda RBX: 004ffa00 RCX: 0045ca29 > RDX: RSI: 2180 RDI: 0003 > RBP: 0078bf00 R08: R09: > R10: R11: 0246 R12: > R13: 09af R14: 004d61b0 R15: 7fd6764d26d4 > > Allocated by task 9061: > save_stack+0x1b/0x40 mm/kasan/common.c:48 > set_track mm/kasan/common.c:56 [inline] > __kasan_kmalloc mm/kasan/common.c:494 [inline] > __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:467 > kmem_cache_alloc_trace+0x153/0x7d0 mm/slab.c:3551 > kmalloc include/linux/slab.h:555 [inline] > kzalloc include/linux/slab.h:669 [inline] > ib_uverbs_add_one+0x80/0x7c0 drivers/infiniband/core/uverbs_main.c:1107 > add_client_context+0x400/0x5e0 drivers/infiniband/core/device.c:677 > enable_device_and_get+0x1cd/0x3b0 drivers/infiniband/core/device.c:1326 > ib_register_device drivers/infiniband/core/device.c:1392 [inline] > ib_register_device+0xa12/0xda0 drivers/infiniband/core/device.c:1353 > siw_device_register drivers/infiniband/sw/siw/siw_main.c:70 [inline] > siw_newlink drivers/infiniband/sw/siw/siw_main.c:565 [inline] > siw_newlink+0xd37/0x1240 drivers/infiniband/sw/siw/siw_main.c:542 > nldev_newlink+0x29e/0x420 drivers/infiniband/core/nldev.c:1541 > rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline] > rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] > rdma_nl_rcv+0x586/0x900 drivers/infiniband/core/netlink.c:259 > netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] > netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329 > netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918 > sock_sendmsg_nosec net/socket.c:652 [inline] > sock_sendmsg+0xcf/0x120 net/socket.c:672 > sys_sendmsg+0x6e6/0x810 net/socket.c:2352 > ___sys_sendmsg+0x100/0x170 net/socket.c:2406 > __sys_sendmsg+0xe5/0x1b0 net/socket.c:2439 > do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295 > entry_SYSCALL_64_after_hwframe+0x49/0xb3 > > Freed by task
Re: [PATCH net-next v2] dt-bindings: net: rename the bindings document for MediaTek STAR EMAC
From: Bartosz Golaszewski Date: Thu, 28 May 2020 15:59:02 +0200 > From: Bartosz Golaszewski > > The driver itself was renamed before getting merged into mainline, but > the binding document kept the old name. This makes both names consistent. > > Signed-off-by: Bartosz Golaszewski > --- > v1 -> v2: > - update the id field as well Applied, thank you.
Re: [PATCH] NFC: st21nfca: add missed kfree_skb() in an error path
From: Chuhong Yuan Date: Thu, 28 May 2020 18:20:37 +0800 > st21nfca_tm_send_atr_res() misses to call kfree_skb() in an error path. > Add the missed function call to fix it. > > Fixes: 1892bf844ea0 ("NFC: st21nfca: Adding P2P support to st21nfca in > Initiator & Target mode") > Signed-off-by: Chuhong Yuan Applied and queued up for -stable, thank you.
Re: [PATCHES] uaccess misc
On Fri, May 29, 2020 at 4:54 PM Linus Torvalds wrote: > > My only complaint was that kvm thing that I think should have gone even > further. Oh... And the cc list looks a bit odd. You cc'd fsdevel, but none of the patches were really to any filesystem code (ok, the pselect and binfmt thing is in fs/, but that's kind of incidental and more of a "we split core system calls up by area too" than any "it's filesystem code"). The kvm patch didn't have anybody from kvm-land cc'd, for example. I added some to my "this shouldn't use double underscores" reply, but.. Linus
Re: [PATCHES] uaccess misc
On Fri, May 29, 2020 at 4:26 PM Al Viro wrote: > > The stuff that doesn't fit anywhere else. Hopefully > saner marshalling for weird 7-argument syscalls (pselect6()), That looked fine to me, btw. Looks like an improvement even outside the "avoid __get_user()" and double STAC/CLAC issue. > low-hanging fruit in several binfmt, unsafe_put_user-based > x86 cp_stat64(), etc. - there's really no common topic here. My only complaint was that kvm thing that I think should have gone even further. Linus
Re: [PATCH 8/9] x86: kvm_hv_set_msr(): use __put_user() instead of 32bit __clear_user()
On Fri, May 29, 2020 at 4:27 PM Al Viro wrote: > a/arch/x86/kvm/hyperv.c > - if (__clear_user((void __user *)addr, sizeof(u32))) > + if (__put_user(0, (u32 __user *)addr)) I'm not doubting that this is a correct transformation and an improvement, but why is it using that double-underscore version in the first place? There's a __copy_to_user() in kvm_hv_set_msr_pw() in addition to this one in kvm_hv_set_msr(). Both go back to 2011 and commit 8b0cedff040b ("KVM: use __copy_to_user/__clear_user to write guest page") and both look purely like "pointlessly avoid the access_ok". All these KVM "optimizations" seem entirely pointless, since access_ok() isn't the problem. And the address _claims_ to be verified, but I'm not seeing it. There is not a single 'access_ok()' anywhere in arch/x86/kvm/ that I can see. It looks like the argument for the address being validated is that it comes from "gfn_to_hva()", which should only return host-virtual-addresses. That may be true. But "should" is not "does", and honestly, the cost of gfn_to_hva() is high enough that then using that as an argument for removing "access_ok()" smells. So I would suggest just removing all these completely bogus double-underscore versions. It's pointless, it's wrong, and it's unsafe. This isn't even some critical path, but even if it was, that user address check isn't where the problems are. Linus
Re: [PATCH] HID: usbhid: do not sleep when opening device
On Sat, May 30, 2020 at 3:59 AM Dmitry Torokhov wrote: > > usbhid tries to give the device 50 milliseconds to drain its queues > when opening the device, but does it naively by simply sleeping in open > handler, which slows down device probing (and thus may affect overall > boot time). > > However we do not need to sleep as we can instead mark a point of time > in the future when we should start processing the events. > > Reported-by: Nicolas Boichat > Signed-off-by: Dmitry Torokhov > --- > drivers/hid/usbhid/hid-core.c | 27 +++ > drivers/hid/usbhid/usbhid.h | 1 + > 2 files changed, 16 insertions(+), 12 deletions(-) > > diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c > index c7bc9db5b192..e69992e945b2 100644 > --- a/drivers/hid/usbhid/hid-core.c > +++ b/drivers/hid/usbhid/hid-core.c > @@ -95,6 +95,19 @@ static int hid_start_in(struct hid_device *hid) > set_bit(HID_NO_BANDWIDTH, >iofl); > } else { > clear_bit(HID_NO_BANDWIDTH, >iofl); > + > + if (test_and_clear_bit(HID_RESUME_RUNNING, > + >iofl)) { > + /* > +* In case events are generated while nobody > was > +* listening, some are released when the > device > +* is re-opened. Wait 50 msec for the queue to > +* empty before allowing events to go through > +* hid. > +*/ > + usbhid->input_start_time = jiffies + > + > msecs_to_jiffies(50); > + } > } > } > spin_unlock_irqrestore(>lock, flags); > @@ -280,7 +293,8 @@ static void hid_irq_in(struct urb *urb) > if (!test_bit(HID_OPENED, >iofl)) > break; > usbhid_mark_busy(usbhid); > - if (!test_bit(HID_RESUME_RUNNING, >iofl)) { > + if (!test_bit(HID_RESUME_RUNNING, >iofl) && > + time_after(jiffies, usbhid->input_start_time)) { Are we worried about jiffies overflowing (32-bit@1000Hz is "only" 49.7 days...) > hid_input_report(urb->context, HID_INPUT_REPORT, > urb->transfer_buffer, > urb->actual_length, 1); > @@ -714,17 +728,6 @@ static int usbhid_open(struct hid_device *hid) > } > > usb_autopm_put_interface(usbhid->intf); > - > - /* > -* In case events are generated while nobody was listening, > -* some are released when the device is re-opened. > -* Wait 50 msec for the queue to empty before allowing events > -* to go through hid. > -*/ > - if (res == 0) > - msleep(50); > - > - clear_bit(HID_RESUME_RUNNING, >iofl); > return res; > } > > diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h > index 8620408bd7af..805949671b96 100644 > --- a/drivers/hid/usbhid/usbhid.h > +++ b/drivers/hid/usbhid/usbhid.h > @@ -82,6 +82,7 @@ struct usbhid_device { > > spinlock_t lock;/* > fifo spinlock */ > unsigned long iofl; /* > I/O flags (CTRL_RUNNING, OUT_RUNNING) */ > + unsigned long input_start_time; /* > When to start handling input, in jiffies */ > struct timer_list io_retry; /* > Retry timer */ > unsigned long stop_retry; /* > Time to give up, in jiffies */ > unsigned int retry_delay; /* > Delay length in ms */ > -- > 2.27.0.rc0.183.gde8f92d652-goog > > > -- > Dmitry
Re: [PATCH v2] hwmon:(adm1275) Enable adm1278 ADM1278_TEMP1_EN
On 5/29/20, 11:56 AM, "Guenter Roeck" wrote: On 5/29/20 10:57 AM, Vijay Khemka wrote: > > > On 5/29/20, 5:47 AM, "Manikandan Elumalai" wrote: > > The adm1278 temperature sysfs attribute need it for one of the openbmc platform . > This functionality is not enabled by default, so PMON_CONFIG needs to be modified in order to enable it. > > Signed-off-by : Manikandan Elumalai > > v2: >- Add Signed-off-by. >- Removed ADM1278_TEMP1_EN check. > --- > drivers/hwmon/pmbus/adm1275.c | 21 + > 1 file changed, 17 insertions(+), 4 deletions(-) > > diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c > index 5caa37fb..ab5fceb 100644 > --- a/drivers/hwmon/pmbus/adm1275.c > +++ b/drivers/hwmon/pmbus/adm1275.c > @@ -666,7 +666,23 @@ static int adm1275_probe(struct i2c_client *client, > tindex = 3; > > info->func[0] |= PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT | > - PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT; > + PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | > + PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP; > + > + config = i2c_smbus_read_byte_data(client, ADM1275_PMON_CONFIG); > + if (config < 0) > + return config; > + > + /* Enable TEMP1 by default */ > + config |= ADM1278_TEMP1_EN; > + ret = i2c_smbus_write_byte_data(client, > + ADM1275_PMON_CONFIG, > + config); > + if (ret < 0) { > + dev_err(>dev, > + "Failed to enable temperature config\n"); > + return -ENODEV; > + } > You don't need this above code removing check as below should be enough to > populate sysfs entry you need. > You mean you are only interested in having the attribute, even if it doesn't report anything useful because monitoring is disabled in the chip ? Sorry, I don't understand. Can you elaborate ? Sorry for misinterpretation, No I don't mean that. This should be fine. Thanks, Guenter > /* Enable VOUT if not enabled (it is disabled by default) */ > if (!(config & ADM1278_VOUT_EN)) { > @@ -681,9 +697,6 @@ static int adm1275_probe(struct i2c_client *client, > } > } > > - if (config & ADM1278_TEMP1_EN) > - info->func[0] |= > - PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP; > if (config & ADM1278_VIN_EN) > info->func[0] |= PMBUS_HAVE_VIN; > break; > -- > 2.7.4 > > >
[PATCH 2/2] vhost: convert get_user_pages() --> pin_user_pages()
This code was using get_user_pages*(), in approximately a "Case 5" scenario (accessing the data within a page), using the categorization from [1]. That means that it's time to convert the get_user_pages*() + put_page() calls to pin_user_pages*() + unpin_user_pages() calls. There is some helpful background in [2]: basically, this is a small part of fixing a long-standing disconnect between pinning pages, and file systems' use of those pages. [1] Documentation/core-api/pin_user_pages.rst [2] "Explicit pinning of user-space pages": https://lwn.net/Articles/807108/ Cc: Michael S. Tsirkin Cc: Jason Wang Cc: k...@vger.kernel.org Cc: virtualizat...@lists.linux-foundation.org Cc: net...@vger.kernel.org Signed-off-by: John Hubbard --- drivers/vhost/vhost.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 21a59b598ed8..596132a96cd5 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1762,15 +1762,14 @@ static int set_bit_to_user(int nr, void __user *addr) int bit = nr + (log % PAGE_SIZE) * 8; int r; - r = get_user_pages_fast(log, 1, FOLL_WRITE, ); + r = pin_user_pages_fast(log, 1, FOLL_WRITE, ); if (r < 0) return r; BUG_ON(r != 1); base = kmap_atomic(page); set_bit(bit, base); kunmap_atomic(base); - set_page_dirty_lock(page); - put_page(page); + unpin_user_pages_dirty_lock(, 1, true); return 0; } -- 2.26.2
[PATCH 1/2] docs: mm/gup: pin_user_pages.rst: add a "case 5"
There are four cases listed in pin_user_pages.rst. These are intended to help developers figure out whether to use get_user_pages*(), or pin_user_pages*(). However, the four cases do not cover all the situations. For example, drivers/vhost/vhost.c has a "pin, write to page, set page dirty, unpin" case. Add a fifth case, to help explain that there is a general pattern that requires pin_user_pages*() API calls. Cc: Vlastimil Babka Cc: Jan Kara Cc: Jérôme Glisse Cc: Dave Chinner Cc: Jonathan Corbet Cc: linux-...@vger.kernel.org Cc: linux-fsde...@vger.kernel.org Signed-off-by: John Hubbard --- Documentation/core-api/pin_user_pages.rst | 20 1 file changed, 20 insertions(+) diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst index 4675b04e8829..b9f2688a2c67 100644 --- a/Documentation/core-api/pin_user_pages.rst +++ b/Documentation/core-api/pin_user_pages.rst @@ -171,6 +171,26 @@ If only struct page data (as opposed to the actual memory contents that a page is tracking) is affected, then normal GUP calls are sufficient, and neither flag needs to be set. +CASE 5: Pinning in order to write to the data within the page +- +Even though neither DMA nor Direct IO is involved, just a simple case of "pin, +access page's data, unpin" can cause a problem. Case 5 may be considered a +superset of Case 1, plus Case 2, plus anything that invokes that pattern. In +other words, if the code is neither Case 1 nor Case 2, it may still require +FOLL_PIN, for patterns like this: + +Correct (uses FOLL_PIN calls): +pin_user_pages() +access the data within the pages +set_page_dirty_lock() +unpin_user_pages() + +INCORRECT (uses FOLL_GET calls): +get_user_pages() +access the data within the pages +set_page_dirty_lock() +put_page() + page_maybe_dma_pinned(): the whole point of pinning === -- 2.26.2
[PATCH 0/2] vhost, docs: convert to pin_user_pages(), new "case 5"
Hi, It recently became clear to me that there are some get_user_pages*() callers that don't fit neatly into any of the four cases that are so far listed in pin_user_pages.rst. vhost.c is one of those. Add a Case 5 to the documentation, and refer to that when converting vhost.c. Thanks to Jan Kara for helping me (again) in understanding the interaction between get_user_pages() and page writeback [1]. This is based on today's mmotm, which has a nearby patch to pin_user_pages.rst that rewords cases 3 and 4. Note that I have only compile-tested the vhost.c patch, although that does also include cross-compiling for a few other arches. Any run-time testing would be greatly appreciated. [1] https://lore.kernel.org/r/20200529070343.gl14...@quack2.suse.cz John Hubbard (2): docs: mm/gup: pin_user_pages.rst: add a "case 5" vhost: convert get_user_pages() --> pin_user_pages() Documentation/core-api/pin_user_pages.rst | 20 drivers/vhost/vhost.c | 5 ++--- 2 files changed, 22 insertions(+), 3 deletions(-) -- 2.26.2
[PATCH 3/4] hpsa: get rid of compat_alloc_user_space()
From: Al Viro no need for building a native struct on kernel stack, copying it to userland one, then calling hpsa_ioctl() which copies it back into _another_ instance of the same struct. Signed-off-by: Al Viro --- drivers/scsi/hpsa.c | 80 - 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 64fd97272109..c7fbe56891ef 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -254,6 +254,10 @@ static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id); static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id); static int hpsa_ioctl(struct scsi_device *dev, unsigned int cmd, void __user *arg); +static int hpsa_passthru_ioctl(struct ctlr_info *h, + IOCTL_Command_struct *iocommand); +static int hpsa_big_passthru_ioctl(struct ctlr_info *h, + BIG_IOCTL_Command_struct *ioc); #ifdef CONFIG_COMPAT static int hpsa_compat_ioctl(struct scsi_device *dev, unsigned int cmd, @@ -6217,75 +6221,63 @@ static void cmd_free(struct ctlr_info *h, struct CommandList *c) static int hpsa_ioctl32_passthru(struct scsi_device *dev, unsigned int cmd, void __user *arg) { - IOCTL32_Command_struct __user *arg32 = - (IOCTL32_Command_struct __user *) arg; + struct ctlr_info *h = sdev_to_hba(dev); + IOCTL32_Command_struct __user *arg32 = arg; IOCTL_Command_struct arg64; - IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64)); int err; u32 cp; - memset(, 0, sizeof(arg64)); - err = 0; - err |= copy_from_user(_info, >LUN_info, - sizeof(arg64.LUN_info)); - err |= copy_from_user(, >Request, - sizeof(arg64.Request)); - err |= copy_from_user(_info, >error_info, - sizeof(arg64.error_info)); - err |= get_user(arg64.buf_size, >buf_size); - err |= get_user(cp, >buf); - arg64.buf = compat_ptr(cp); - err |= copy_to_user(p, , sizeof(arg64)); + if (!arg) + return -EINVAL; - if (err) + memset(, 0, sizeof(arg64)); + if (copy_from_user(, arg32, offsetof(IOCTL_Command_struct, buf))) + return -EFAULT; + if (get_user(cp, >buf)) return -EFAULT; + arg64.buf = compat_ptr(cp); - err = hpsa_ioctl(dev, CCISS_PASSTHRU, p); + if (atomic_dec_if_positive(>passthru_cmds_avail) < 0) + return -EAGAIN; + err = hpsa_passthru_ioctl(h, ); + atomic_inc(>passthru_cmds_avail); if (err) return err; - err |= copy_in_user(>error_info, >error_info, -sizeof(arg32->error_info)); - if (err) + if (copy_to_user(>error_info, _info, +sizeof(arg32->error_info))) return -EFAULT; - return err; + return 0; } static int hpsa_ioctl32_big_passthru(struct scsi_device *dev, unsigned int cmd, void __user *arg) { - BIG_IOCTL32_Command_struct __user *arg32 = - (BIG_IOCTL32_Command_struct __user *) arg; + struct ctlr_info *h = sdev_to_hba(dev); + BIG_IOCTL32_Command_struct __user *arg32 = arg; BIG_IOCTL_Command_struct arg64; - BIG_IOCTL_Command_struct __user *p = - compat_alloc_user_space(sizeof(arg64)); int err; u32 cp; + if (!arg) + return -EINVAL; memset(, 0, sizeof(arg64)); - err = 0; - err |= copy_from_user(_info, >LUN_info, - sizeof(arg64.LUN_info)); - err |= copy_from_user(, >Request, - sizeof(arg64.Request)); - err |= copy_from_user(_info, >error_info, - sizeof(arg64.error_info)); - err |= get_user(arg64.buf_size, >buf_size); - err |= get_user(arg64.malloc_size, >malloc_size); - err |= get_user(cp, >buf); - arg64.buf = compat_ptr(cp); - err |= copy_to_user(p, , sizeof(arg64)); - - if (err) + if (copy_from_user(, arg32, + offsetof(BIG_IOCTL32_Command_struct, buf))) return -EFAULT; + if (get_user(cp, >buf)) + return -EFAULT; + arg64.buf = compat_ptr(cp); - err = hpsa_ioctl(dev, CCISS_BIG_PASSTHRU, p); + if (atomic_dec_if_positive(>passthru_cmds_avail) < 0) + return -EAGAIN; + err = hpsa_big_passthru_ioctl(h, ); + atomic_inc(>passthru_cmds_avail); if (err) return err; - err |= copy_in_user(>error_info, >error_info, -sizeof(arg32->error_info)); - if (err) + if (copy_to_user(>error_info, _info, +sizeof(arg32->error_info))) return -EFAULT; - return err; + return 0; }
[PATCH 1/4] hpsa passthrough: lift {BIG_,}IOCTL_Command_struct copy{in,out} into hpsa_ioctl()
From: Al Viro Signed-off-by: Al Viro --- drivers/scsi/hpsa.c | 116 +--- 1 file changed, 56 insertions(+), 60 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 1e9302e99d05..3344a06c938e 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -6358,37 +6358,33 @@ static int hpsa_getdrivver_ioctl(struct ctlr_info *h, void __user *argp) return 0; } -static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) +static int hpsa_passthru_ioctl(struct ctlr_info *h, + IOCTL_Command_struct *iocommand) { - IOCTL_Command_struct iocommand; struct CommandList *c; char *buff = NULL; u64 temp64; int rc = 0; - if (!argp) - return -EINVAL; if (!capable(CAP_SYS_RAWIO)) return -EPERM; - if (copy_from_user(, argp, sizeof(iocommand))) - return -EFAULT; - if ((iocommand.buf_size < 1) && - (iocommand.Request.Type.Direction != XFER_NONE)) { + if ((iocommand->buf_size < 1) && + (iocommand->Request.Type.Direction != XFER_NONE)) { return -EINVAL; } - if (iocommand.buf_size > 0) { - buff = kmalloc(iocommand.buf_size, GFP_KERNEL); + if (iocommand->buf_size > 0) { + buff = kmalloc(iocommand->buf_size, GFP_KERNEL); if (buff == NULL) return -ENOMEM; - if (iocommand.Request.Type.Direction & XFER_WRITE) { + if (iocommand->Request.Type.Direction & XFER_WRITE) { /* Copy the data into the buffer we created */ - if (copy_from_user(buff, iocommand.buf, - iocommand.buf_size)) { + if (copy_from_user(buff, iocommand->buf, + iocommand->buf_size)) { rc = -EFAULT; goto out_kfree; } } else { - memset(buff, 0, iocommand.buf_size); + memset(buff, 0, iocommand->buf_size); } } c = cmd_alloc(h); @@ -6398,23 +6394,23 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) c->scsi_cmd = SCSI_CMD_BUSY; /* Fill in Command Header */ c->Header.ReplyQueue = 0; /* unused in simple mode */ - if (iocommand.buf_size > 0) { /* buffer to fill */ + if (iocommand->buf_size > 0) { /* buffer to fill */ c->Header.SGList = 1; c->Header.SGTotal = cpu_to_le16(1); } else { /* no buffers to fill */ c->Header.SGList = 0; c->Header.SGTotal = cpu_to_le16(0); } - memcpy(>Header.LUN, _info, sizeof(c->Header.LUN)); + memcpy(>Header.LUN, >LUN_info, sizeof(c->Header.LUN)); /* Fill in Request block */ - memcpy(>Request, , + memcpy(>Request, >Request, sizeof(c->Request)); /* Fill in the scatter gather information */ - if (iocommand.buf_size > 0) { + if (iocommand->buf_size > 0) { temp64 = dma_map_single(>pdev->dev, buff, - iocommand.buf_size, DMA_BIDIRECTIONAL); + iocommand->buf_size, DMA_BIDIRECTIONAL); if (dma_mapping_error(>pdev->dev, (dma_addr_t) temp64)) { c->SG[0].Addr = cpu_to_le64(0); c->SG[0].Len = cpu_to_le32(0); @@ -6422,12 +6418,12 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) goto out; } c->SG[0].Addr = cpu_to_le64(temp64); - c->SG[0].Len = cpu_to_le32(iocommand.buf_size); + c->SG[0].Len = cpu_to_le32(iocommand->buf_size); c->SG[0].Ext = cpu_to_le32(HPSA_SG_LAST); /* not chaining */ } rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT); - if (iocommand.buf_size > 0) + if (iocommand->buf_size > 0) hpsa_pci_unmap(h->pdev, c, 1, DMA_BIDIRECTIONAL); check_ioctl_unit_attention(h, c); if (rc) { @@ -6436,16 +6432,12 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) } /* Copy the error information out */ - memcpy(_info, c->err_info, - sizeof(iocommand.error_info)); - if (copy_to_user(argp, , sizeof(iocommand))) { - rc = -EFAULT; - goto out; - } - if ((iocommand.Request.Type.Direction & XFER_READ) && - iocommand.buf_size > 0) { + memcpy(>error_info, c->err_info, + sizeof(iocommand->error_info)); + if ((iocommand->Request.Type.Direction & XFER_READ) && +
[PATCH 4/4] hpsa_ioctl(): tidy up a bit
From: Al Viro Signed-off-by: Al Viro --- drivers/scsi/hpsa.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index c7fbe56891ef..81d0414e2117 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -6577,14 +6577,11 @@ static void check_ioctl_unit_attention(struct ctlr_info *h, * ioctl */ static int hpsa_ioctl(struct scsi_device *dev, unsigned int cmd, - void __user *arg) + void __user *argp) { - struct ctlr_info *h; - void __user *argp = (void __user *)arg; + struct ctlr_info *h = sdev_to_hba(dev); int rc; - h = sdev_to_hba(dev); - switch (cmd) { case CCISS_DEREGDISK: case CCISS_REGNEWDISK: -- 2.11.0