[PATCH RFC V4 2/3] KVM: X86: Adding arbitrary data pointer in kvm memslot iterator functions
This will help sharing data into the slot_level_handler callback. In my case I need to a share a counter for the pages traversed to use it in some bitmap. Being able to send arbitrary memory pointer into the slot_level_handler callback made it easy. Signed-off-by: Ahmed Abd El Mawgood --- arch/x86/kvm/mmu.c | 65 +++--- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d594690d8b95..77661530b2c4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1418,7 +1418,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect) static bool __rmap_write_protect(struct kvm *kvm, struct kvm_rmap_head *rmap_head, -bool pt_protect) +bool pt_protect, void *data) { u64 *sptep; struct rmap_iterator iter; @@ -1457,7 +1457,8 @@ static bool wrprot_ad_disabled_spte(u64 *sptep) * - W bit on ad-disabled SPTEs. * Returns true iff any D or W bits were cleared. */ -static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) +static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, + void *data) { u64 *sptep; struct rmap_iterator iter; @@ -1483,7 +1484,8 @@ static bool spte_set_dirty(u64 *sptep) return mmu_spte_update(sptep, spte); } -static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) +static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, + void *data) { u64 *sptep; struct rmap_iterator iter; @@ -1515,7 +1517,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, while (mask) { rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), PT_PAGE_TABLE_LEVEL, slot); - __rmap_write_protect(kvm, rmap_head, false); + __rmap_write_protect(kvm, rmap_head, false, NULL); /* clear the first set bit */ mask &= mask - 1; @@ -1541,7 +1543,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, while (mask) { rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), PT_PAGE_TABLE_LEVEL, slot); - __rmap_clear_dirty(kvm, rmap_head); + __rmap_clear_dirty(kvm, rmap_head, NULL); /* clear the first set bit */ mask &= mask - 1; @@ -1594,7 +1596,8 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { rmap_head = __gfn_to_rmap(gfn, i, slot); - write_protected |= __rmap_write_protect(kvm, rmap_head, true); + write_protected |= __rmap_write_protect(kvm, rmap_head, true, + NULL); } return write_protected; @@ -1608,7 +1611,8 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn); } -static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) +static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, + void *data) { u64 *sptep; struct rmap_iterator iter; @@ -1628,7 +1632,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { - return kvm_zap_rmapp(kvm, rmap_head); + return kvm_zap_rmapp(kvm, rmap_head, NULL); } static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, @@ -5086,13 +5090,15 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) } /* The return value indicates if tlb flush on all vcpus is needed. */ -typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); +typedef bool (*slot_level_handler) (struct kvm *kvm, + struct kvm_rmap_head *rmap_head, void *data); /* The caller should hold mmu-lock before calling this function. */ static __always_inline bool slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, - gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) + gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb, + void *data) { struct slot_rmap_walk_iterator iterator; bool flush = false; @@ -5100,7 +5106,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, end_gfn, )
[PATCH RFC V4 1/3] KVM: X86: Memory ROE documentation
Following up with my previous threads on KVM assisted Anti rootkit protections. The current version doesn't address the attacks involving pages remapping. It is still design in progress, nevertheless, it will be in my later patch sets. Signed-off-by: Ahmed Abd El Mawgood --- Documentation/virtual/kvm/hypercalls.txt | 14 ++ 1 file changed, 14 insertions(+) diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index a890529c63ed..affd997eabfe 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -121,3 +121,17 @@ compute the CLOCK_REALTIME for its clock, at the same instant. Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. + +7. KVM_HC_HMROE + +Architecture: x86 +Status: active +Purpose: Hypercall used to apply Read-Only Enforcement to guest pages +Usage: + a0: Start address aligned to page boundary. + a1: Number of pages to be protected. +This hypercall lets a guest kernel have part of its read/write memory +converted into read-only. This action is irreversible. KVM_HC_HMROE can +not be triggered from guest Ring 3 (user mode). The reason is that user +mode malicious software can make use of it to enforce read only protection +on an arbitrary memory page thus crashing the kernel. -- 2.16.4 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Memory Read Only Enforcement: VMM assisted kernel rootkit mitigation for KVM V4
Here is change log from V3 To V4: - Fixing spelling/grammar mistakes suggested by Randy Dunlap - Changing the hypercall interface to be able to process multiple pages per one hypercall also suggested by Randy Dunlap. It turns out that this will save lots of vmexist/memory slot flushes when protecting many pages. [PATCH RFC V4 1/3] KVM: X86: Memory ROE documentation [PATCH RFC V4 2/3] KVM: X86: Adding arbitrary data pointer in kvm memslot iterator functions [PATCH RFC V4 3/3] KVM: X86: Adding skeleton for Memory ROE Summary: Documentation/virtual/kvm/hypercalls.txt | 14 arch/x86/include/asm/kvm_host.h | 11 ++- arch/x86/kvm/Kconfig | 7 ++ arch/x86/kvm/mmu.c | 127 ++- arch/x86/kvm/x86.c | 104 - include/linux/kvm_host.h | 3 + include/uapi/linux/kvm_para.h| 1 + virt/kvm/kvm_main.c | 29 ++- 8 files changed, 254 insertions(+), 42 deletions(-) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 3/3] [RFC V3] KVM: X86: Adding skeleton for Memory ROE
On 20 July 2018 at 03:28, Jann Horn wrote: > On Fri, Jul 20, 2018 at 2:26 AM Ahmed Soliman > wrote: >> >> On 20 July 2018 at 00:59, Jann Horn wrote: >> > On Thu, Jul 19, 2018 at 11:40 PM Ahmed Abd El Mawgood >> >> > Why are you implementing this in the kernel, instead of doing it in >> > host userspace? >> >> I thought about implementing it completely in QEMU but It won't be >> possible for few reasons: >> >> - After talking to QEMU folks I came up to conclusion that it when it >> comes to managing memory allocated for guest, it is always better to let >> KVM handles everything, unless there is a good reason to play with that >> memory chunk inside QEMU itself. > > Why? It seems to me like it'd be easier to add a way to mprotect() > guest pages to readonly via virtio or whatever in QEMU than to add > kernel code? I did an early prototype with mprotect(), But then mprotect() didn't do exactly what I wanted, The goal here is to prevent the guest from writing to protected page but allow the host to do if it ever needs to at the same time. mprotect() will either allow both host and guest, or prevent both host and guest. Even though I can not come up with a use case where one might need to allow host to read/write to a page but prevent guest from writing to that page, I think that it is a limitation that will cost complete redesign if it proves that this kind of behavior is undesired. Also mprotect is kind of inflexible. Writing to mprotected pages would immediately trigger SIGSEGV and then userspace process will have to handle that fault in order to control the situation. That sounded to me more like a little hack than a solid design. > And if you ever want to support VM snapshotting/resumption, you'll > need support for restoring the protection flags from QEMU anyway. I never thought about that, but thanks for letting me know. I will keep that in my TODO list. >> - But actually there is a good reason for implementing ROE in kernel space, >> it is that ROE is architecture dependent to great extent. > > How so? The host component just has to make pages in guest memory > readonly, right? As far as I can tell, from QEMU, it'd more or less be > a matter of calling mprotect() a few times? (Plus potentially some > hooks to prevent other virtio code from crashing by attempting to > access protected pages - but you'd need that anyway, no matter where > the protection for the guest is enforced.) I don't think that virtio would crash that way, because host should be able write to memory as it wants. but yet I see where there is this going, probably I can add hooks so that virtio can respect the read only flags. >> I should have >> emphasized that the only currently supported architecture is X86. I am >> not sure how deep the dependency on architecture goes. But as for now >> the current set of patches does a SPTE enumeration as part of the process. >> To my best knowledge, this isn't exposed outside arch/x68/kvm let alone >> having a host user space interface for it. Also the way I am planning to >> protect TLB from malicious gva -> gpa mapping is by knowing that in x86 >> it is possible to VMEXIT on page faults, I am not sure if it will safe to >> assume that all kvm supported architectures will behave this way. > > You mean EPT faults, right? If so: I think all architectures have to > support that - there are already other reasons why random guest memory > accesses can fault. In particular, the host can page out guest memory. > I think that's the case on all architectures? Here my lack of full knowledge kicks in, I am not sure whether is EPT fault or guest pf is what I want to capture validate. I think X86 can vm exit on both. Due to nature of ROE, guest user space code can not have ROE because it is irreversible, so it will be safe to assume that only pages that are not swappable are the one's I would care about. still lots of the details are blurry for me. But what I was trying to say is that there is always differences based on architecture that is why it will be better to do things in kernel module if we decided not to use mprotect method. >> For these reasons I thought it will be better if arch dependent stuff (the >> mechanism implementation) is kept in arch/*/kvm folder and with minimal >> modifications to virt/kvm/* after setting a kconfig variable to enable ROE. >> But I left room for the user space app using kvm to decide the rightful >> policy >> for handling ROE violations. The way it works by KVM_EXIT_MMIO error to user >> space, keeping all the architectural details hidden away from user space. >> >> A last note is that I didn't create this from scratch, instead I extended >> KVM_MEM_READONLY implementation to also allow R/O per page instead >> R/O per whole slot which is already done in kernel space. > > But then you still have to also do something about virtio code in QEMU > that might write to those pages, right? Probably yes, still I haven't fully planned that yet. But I
Re: [RFC 0/4] Virtio uses DMA API for all devices
On Fri, Jul 20, 2018 at 09:29:37AM +0530, Anshuman Khandual wrote: > This patch series is the follow up on the discussions we had before about > the RFC titled [RFC,V2] virtio: Add platform specific DMA API translation > for virito devices (https://patchwork.kernel.org/patch/10417371/). There > were suggestions about doing away with two different paths of transactions > with the host/QEMU, first being the direct GPA and the other being the DMA > API based translations. > > First patch attempts to create a direct GPA mapping based DMA operations > structure called 'virtio_direct_dma_ops' with exact same implementation > of the direct GPA path which virtio core currently has but just wrapped in > a DMA API format. Virtio core must use 'virtio_direct_dma_ops' instead of > the arch default in absence of VIRTIO_F_IOMMU_PLATFORM flag to preserve the > existing semantics. The second patch does exactly that inside the function > virtio_finalize_features(). The third patch removes the default direct GPA > path from virtio core forcing it to use DMA API callbacks for all devices. > Now with that change, every device must have a DMA operations structure > associated with it. The fourth patch adds an additional hook which gives > the platform an opportunity to do yet another override if required. This > platform hook can be used on POWER Ultravisor based protected guests to > load up SWIOTLB DMA callbacks to do the required (as discussed previously > in the above mentioned thread how host is allowed to access only parts of > the guest GPA range) bounce buffering into the shared memory for all I/O > scatter gather buffers to be consumed on the host side. > > Please go through these patches and review whether this approach broadly > makes sense. I will appreciate suggestions, inputs, comments regarding > the patches or the approach in general. Thank you. I like how patches 1-3 look. Could you test performance with/without to see whether the extra indirection through use of DMA ops causes a measurable slow-down? > Anshuman Khandual (4): > virtio: Define virtio_direct_dma_ops structure > virtio: Override device's DMA OPS with virtio_direct_dma_ops selectively > virtio: Force virtio core to use DMA API callbacks for all virtio devices > virtio: Add platform specific DMA API translation for virito devices > > arch/powerpc/include/asm/dma-mapping.h | 6 +++ > arch/powerpc/platforms/pseries/iommu.c | 6 +++ > drivers/virtio/virtio.c| 72 > ++ > drivers/virtio/virtio_pci_common.h | 3 ++ > drivers/virtio/virtio_ring.c | 65 +- > 5 files changed, 89 insertions(+), 63 deletions(-) > > -- > 2.9.3 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC 4/4] virtio: Add platform specific DMA API translation for virito devices
On Fri, Jul 20, 2018 at 09:29:41AM +0530, Anshuman Khandual wrote: >Subject: Re: [RFC 4/4] virtio: Add platform specific DMA API translation for > virito devices s/virito/virtio/ > This adds a hook which a platform can define in order to allow it to > override virtio device's DMA OPS irrespective of whether it has the > flag VIRTIO_F_IOMMU_PLATFORM set or not. We want to use this to do > bounce-buffering of data on the new secure pSeries platform, currently > under development, where a KVM host cannot access all of the memory > space of a secure KVM guest. The host can only access the pages which > the guest has explicitly requested to be shared with the host, thus > the virtio implementation in the guest has to copy data to and from > shared pages. > > With this hook, the platform code in the secure guest can force the > use of swiotlb for virtio buffers, with a back-end for swiotlb which > will use a pool of pre-allocated shared pages. Thus all data being > sent or received by virtio devices will be copied through pages which > the host has access to. > > Signed-off-by: Anshuman Khandual > --- > arch/powerpc/include/asm/dma-mapping.h | 6 ++ > arch/powerpc/platforms/pseries/iommu.c | 6 ++ > drivers/virtio/virtio.c| 7 +++ > 3 files changed, 19 insertions(+) > > diff --git a/arch/powerpc/include/asm/dma-mapping.h > b/arch/powerpc/include/asm/dma-mapping.h > index 8fa3945..bc5a9d3 100644 > --- a/arch/powerpc/include/asm/dma-mapping.h > +++ b/arch/powerpc/include/asm/dma-mapping.h > @@ -116,3 +116,9 @@ extern u64 __dma_get_required_mask(struct device *dev); > > #endif /* __KERNEL__ */ > #endif /* _ASM_DMA_MAPPING_H */ > + > +#define platform_override_dma_ops platform_override_dma_ops > + > +struct virtio_device; > + > +extern void platform_override_dma_ops(struct virtio_device *vdev); > diff --git a/arch/powerpc/platforms/pseries/iommu.c > b/arch/powerpc/platforms/pseries/iommu.c > index 06f0296..5773bc7 100644 > --- a/arch/powerpc/platforms/pseries/iommu.c > +++ b/arch/powerpc/platforms/pseries/iommu.c > @@ -38,6 +38,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -1396,3 +1397,8 @@ static int __init disable_multitce(char *str) > __setup("multitce=", disable_multitce); > > machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init); > + > +void platform_override_dma_ops(struct virtio_device *vdev) > +{ > + /* Override vdev->parent.dma_ops if required */ > +} > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c > index 6b13987..432c332 100644 > --- a/drivers/virtio/virtio.c > +++ b/drivers/virtio/virtio.c > @@ -168,6 +168,12 @@ EXPORT_SYMBOL_GPL(virtio_add_status); > > const struct dma_map_ops virtio_direct_dma_ops; > > +#ifndef platform_override_dma_ops > +static inline void platform_override_dma_ops(struct virtio_device *vdev) > +{ > +} > +#endif > + > int virtio_finalize_features(struct virtio_device *dev) > { > int ret = dev->config->finalize_features(dev); > @@ -179,6 +185,7 @@ int virtio_finalize_features(struct virtio_device *dev) > if (virtio_has_iommu_quirk(dev)) > set_dma_ops(dev->dev.parent, _direct_dma_ops); > > + platform_override_dma_ops(dev); Is there a single place where virtio_has_iommu_quirk is called now? If so, we could put this into virtio_has_iommu_quirk then. > if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) > return 0; > > -- > 2.9.3 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v36 0/5] Virtio-balloon: support free page reporting
On Fri, Jul 20, 2018 at 04:33:00PM +0800, Wei Wang wrote: > This patch series is separated from the previous "Virtio-balloon > Enhancement" series. The new feature, VIRTIO_BALLOON_F_FREE_PAGE_HINT, > implemented by this series enables the virtio-balloon driver to report > hints of guest free pages to the host. It can be used to accelerate live > migration of VMs. Here is an introduction of this usage: > > Live migration needs to transfer the VM's memory from the source machine > to the destination round by round. For the 1st round, all the VM's memory > is transferred. From the 2nd round, only the pieces of memory that were > written by the guest (after the 1st round) are transferred. One method > that is popularly used by the hypervisor to track which part of memory is > written is to write-protect all the guest memory. > > This feature enables the optimization by skipping the transfer of guest > free pages during VM live migration. It is not concerned that the memory > pages are used after they are given to the hypervisor as a hint of the > free pages, because they will be tracked by the hypervisor and transferred > in the subsequent round if they are used and written. > > * Tests > - Test Environment > Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz > Guest: 8G RAM, 4 vCPU > Migration setup: migrate_set_speed 100G, migrate_set_downtime 2 second Can we split out patches 1 and 2? They seem appropriate for this release ... > - Test Results > - Idle Guest Live Migration Time (results are averaged over 10 runs): > - Optimization v.s. Legacy = 409ms vs 1757ms --> ~77% reduction > (setting page poisoning zero and enabling ksm don't affect the > comparison result) > - Guest with Linux Compilation Workload (make bzImage -j4): > - Live Migration Time (average) > Optimization v.s. Legacy = 1407ms v.s. 2528ms --> ~44% reduction > - Linux Compilation Time > Optimization v.s. Legacy = 5min4s v.s. 5min12s > --> no obvious difference > > ChangeLog: > v35->v36: > - remove the mm patch, as Linus has a suggestion to get free page > addresses via allocation, instead of reading from the free page > list. > - virtio-balloon: > - replace oom notifier with shrinker; > - the guest to host communication interface remains the same as > v32. > - allocate free page blocks and send to host one by one, and free > them after sending all the pages. > > For ChangeLogs from v22 to v35, please reference > https://lwn.net/Articles/759413/ > > For ChangeLogs before v21, please reference > https://lwn.net/Articles/743660/ > > Wei Wang (5): > virtio-balloon: remove BUG() in init_vqs > virtio_balloon: replace oom notifier with shrinker > virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT > mm/page_poison: expose page_poisoning_enabled to kernel modules > virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON > > drivers/virtio/virtio_balloon.c | 456 > ++-- > include/uapi/linux/virtio_balloon.h | 7 + > mm/page_poison.c| 6 + > 3 files changed, 394 insertions(+), 75 deletions(-) > > -- > 2.7.4 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.17 003/101] x86/paravirt: Make native_save_fl() extern inline
4.17-stable review patch. If anyone has any objections, please let me know. -- From: Nick Desaulniers commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream. native_save_fl() is marked static inline, but by using it as a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined. paravirt's use of native_save_fl() also requires that no GPRs other than %rax are clobbered. Compilers have different heuristics which they use to emit stack guard code, the emittance of which can break paravirt's callee saved assumption by clobbering %rcx. Marking a function definition extern inline means that if this version cannot be inlined, then the out-of-line version will be preferred. By having the out-of-line version be implemented in assembly, it cannot be instrumented with a stack protector, which might violate custom calling conventions that code like paravirt rely on. The semantics of extern inline has changed since gnu89. This means that folks using GCC versions >= 5.1 may see symbol redefinition errors at link time for subdirs that override KBUILD_CFLAGS (making the C standard used implicit) regardless of this patch. This has been cleaned up earlier in the patch set, but is left as a note in the commit message for future travelers. Reports: https://lkml.org/lkml/2018/5/7/534 https://github.com/ClangBuiltLinux/linux/issues/16 Discussion: https://bugs.llvm.org/show_bug.cgi?id=37512 https://lkml.org/lkml/2018/5/24/1371 Thanks to the many folks that participated in the discussion. Debugged-by: Alistair Strachan Debugged-by: Matthias Kaehlcke Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Tom Stellar Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: j...@perches.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: thomas.lenda...@amd.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h |2 +- arch/x86/kernel/Makefile|1 + arch/x86/kernel/irqflags.S | 26 ++ 3 files changed, 28 insertions(+), 1 deletion(-) --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,7 +13,7 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +extern inline unsigned long native_save_fl(void) { unsigned long flags; --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,6 +61,7 @@ obj-y += alternative.o i8253.o hw_brea obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.17 002/101] x86/asm: Add _ASM_ARG* constants for argument registers to
4.17-stable review patch. If anyone has any objections, please let me know. -- From: H. Peter Anvin commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream. i386 and x86-64 uses different registers for arguments; make them available so we don't have to #ifdef in the actual code. Native size and specified size (q, l, w, b) versions are provided. Signed-off-by: H. Peter Anvin Signed-off-by: Nick Desaulniers Reviewed-by: Sedat Dilek Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: a...@arndb.de Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: j...@perches.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: thomas.lenda...@amd.com Cc: tstel...@redhat.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 59 + 1 file changed, 59 insertions(+) --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -46,6 +46,65 @@ #define _ASM_SI__ASM_REG(si) #define _ASM_DI__ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.17 001/101] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations
4.17-stable review patch. If anyone has any objections, please let me know. -- From: Nick Desaulniers commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream. Functions marked extern inline do not emit an externally visible function when the gnu89 C standard is used. Some KBUILD Makefiles overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without an explicit C standard specified, the default is gnu11. Since c99, the semantics of extern inline have changed such that an externally visible function is always emitted. This can lead to multiple definition errors of extern inline functions at link time of compilation units whose build files have removed an explicit C standard compiler flag for users of GCC 5.1+ or Clang. Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: sedat.di...@gmail.com Cc: thomas.lenda...@amd.com Cc: tstel...@redhat.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,25 +66,40 @@ #endif /* + * Feature detection for gnu_inline (gnu89 extern inline semantics). Either + * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics, + * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not + * defined so the gnu89 semantics are the default. + */ +#ifdef __GNUC_STDC_INLINE__ +# define __gnu_inline __attribute__((gnu_inline)) +#else +# define __gnu_inline +#endif + +/* * Force always-inline if the user requests it so via the .config, * or if gcc is too old. * GCC does not warn about unused static inline functions for * -Wunused-function. This turns out to avoid the need for complex #ifdef * directives. Suppress the warning in clang as well by using "unused" * function attribute, which is redundant but not harmful for gcc. + * Prefer gnu_inline, so that extern inline functions do not emit an + * externally visible function. This makes extern inline behave as per gnu89 + * semantics rather than c99. This prevents multiple symbol definition errors + * of extern inline functions at link time. + * A lot of inline functions can cause havoc with function tracing. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline,unused)) notrace -#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace -#define __inline __inline __attribute__((always_inline,unused)) notrace +#define inline \ + inline __attribute__((always_inline, unused)) notrace __gnu_inline #else -/* A lot of inline functions can cause havoc with function tracing */ -#define inline inline __attribute__((unused)) notrace -#define __inline__ __inline__ __attribute__((unused)) notrace -#define __inline __inline __attribute__((unused)) notrace +#define inline inline __attribute__((unused)) notrace __gnu_inline #endif +#define __inline__ inline +#define __inline inline #define __always_inlineinline __attribute__((always_inline)) #define noinline __attribute__((noinline)) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.14 03/92] x86/paravirt: Make native_save_fl() extern inline
4.14-stable review patch. If anyone has any objections, please let me know. -- From: Nick Desaulniers commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream. native_save_fl() is marked static inline, but by using it as a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined. paravirt's use of native_save_fl() also requires that no GPRs other than %rax are clobbered. Compilers have different heuristics which they use to emit stack guard code, the emittance of which can break paravirt's callee saved assumption by clobbering %rcx. Marking a function definition extern inline means that if this version cannot be inlined, then the out-of-line version will be preferred. By having the out-of-line version be implemented in assembly, it cannot be instrumented with a stack protector, which might violate custom calling conventions that code like paravirt rely on. The semantics of extern inline has changed since gnu89. This means that folks using GCC versions >= 5.1 may see symbol redefinition errors at link time for subdirs that override KBUILD_CFLAGS (making the C standard used implicit) regardless of this patch. This has been cleaned up earlier in the patch set, but is left as a note in the commit message for future travelers. Reports: https://lkml.org/lkml/2018/5/7/534 https://github.com/ClangBuiltLinux/linux/issues/16 Discussion: https://bugs.llvm.org/show_bug.cgi?id=37512 https://lkml.org/lkml/2018/5/24/1371 Thanks to the many folks that participated in the discussion. Debugged-by: Alistair Strachan Debugged-by: Matthias Kaehlcke Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Tom Stellar Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: j...@perches.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: thomas.lenda...@amd.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h |2 +- arch/x86/kernel/Makefile|1 + arch/x86/kernel/irqflags.S | 26 ++ 3 files changed, 28 insertions(+), 1 deletion(-) --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,7 +13,7 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +extern inline unsigned long native_save_fl(void) { unsigned long flags; --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,6 +58,7 @@ obj-y += alternative.o i8253.o pci-nom obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.14 02/92] x86/asm: Add _ASM_ARG* constants for argument registers to
4.14-stable review patch. If anyone has any objections, please let me know. -- From: H. Peter Anvin commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream. i386 and x86-64 uses different registers for arguments; make them available so we don't have to #ifdef in the actual code. Native size and specified size (q, l, w, b) versions are provided. Signed-off-by: H. Peter Anvin Signed-off-by: Nick Desaulniers Reviewed-by: Sedat Dilek Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: a...@arndb.de Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: j...@perches.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: thomas.lenda...@amd.com Cc: tstel...@redhat.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 59 + 1 file changed, 59 insertions(+) --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -46,6 +46,65 @@ #define _ASM_SI__ASM_REG(si) #define _ASM_DI__ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.14 01/92] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations
4.14-stable review patch. If anyone has any objections, please let me know. -- From: Nick Desaulniers commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream. Functions marked extern inline do not emit an externally visible function when the gnu89 C standard is used. Some KBUILD Makefiles overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without an explicit C standard specified, the default is gnu11. Since c99, the semantics of extern inline have changed such that an externally visible function is always emitted. This can lead to multiple definition errors of extern inline functions at link time of compilation units whose build files have removed an explicit C standard compiler flag for users of GCC 5.1+ or Clang. Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: sedat.di...@gmail.com Cc: thomas.lenda...@amd.com Cc: tstel...@redhat.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,25 +66,40 @@ #endif /* + * Feature detection for gnu_inline (gnu89 extern inline semantics). Either + * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics, + * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not + * defined so the gnu89 semantics are the default. + */ +#ifdef __GNUC_STDC_INLINE__ +# define __gnu_inline __attribute__((gnu_inline)) +#else +# define __gnu_inline +#endif + +/* * Force always-inline if the user requests it so via the .config, * or if gcc is too old. * GCC does not warn about unused static inline functions for * -Wunused-function. This turns out to avoid the need for complex #ifdef * directives. Suppress the warning in clang as well by using "unused" * function attribute, which is redundant but not harmful for gcc. + * Prefer gnu_inline, so that extern inline functions do not emit an + * externally visible function. This makes extern inline behave as per gnu89 + * semantics rather than c99. This prevents multiple symbol definition errors + * of extern inline functions at link time. + * A lot of inline functions can cause havoc with function tracing. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline,unused)) notrace -#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace -#define __inline __inline __attribute__((always_inline,unused)) notrace +#define inline \ + inline __attribute__((always_inline, unused)) notrace __gnu_inline #else -/* A lot of inline functions can cause havoc with function tracing */ -#define inline inline __attribute__((unused)) notrace -#define __inline__ __inline__ __attribute__((unused)) notrace -#define __inline __inline __attribute__((unused)) notrace +#define inline inline __attribute__((unused)) notrace __gnu_inline #endif +#define __inline__ inline +#define __inline inline #define __always_inlineinline __attribute__((always_inline)) #define noinline __attribute__((noinline)) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.9 07/66] x86/paravirt: Make native_save_fl() extern inline
4.9-stable review patch. If anyone has any objections, please let me know. -- From: Nick Desaulniers commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream. native_save_fl() is marked static inline, but by using it as a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined. paravirt's use of native_save_fl() also requires that no GPRs other than %rax are clobbered. Compilers have different heuristics which they use to emit stack guard code, the emittance of which can break paravirt's callee saved assumption by clobbering %rcx. Marking a function definition extern inline means that if this version cannot be inlined, then the out-of-line version will be preferred. By having the out-of-line version be implemented in assembly, it cannot be instrumented with a stack protector, which might violate custom calling conventions that code like paravirt rely on. The semantics of extern inline has changed since gnu89. This means that folks using GCC versions >= 5.1 may see symbol redefinition errors at link time for subdirs that override KBUILD_CFLAGS (making the C standard used implicit) regardless of this patch. This has been cleaned up earlier in the patch set, but is left as a note in the commit message for future travelers. Reports: https://lkml.org/lkml/2018/5/7/534 https://github.com/ClangBuiltLinux/linux/issues/16 Discussion: https://bugs.llvm.org/show_bug.cgi?id=37512 https://lkml.org/lkml/2018/5/24/1371 Thanks to the many folks that participated in the discussion. Debugged-by: Alistair Strachan Debugged-by: Matthias Kaehlcke Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Tom Stellar Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: j...@perches.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: thomas.lenda...@amd.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irqflags.h |2 +- arch/x86/kernel/Makefile|1 + arch/x86/kernel/irqflags.S | 26 ++ 3 files changed, 28 insertions(+), 1 deletion(-) --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -12,7 +12,7 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +extern inline unsigned long native_save_fl(void) { unsigned long flags; --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -56,6 +56,7 @@ obj-y += alternative.o i8253.o pci-nom obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.9 06/66] x86/asm: Add _ASM_ARG* constants for argument registers to
4.9-stable review patch. If anyone has any objections, please let me know. -- From: H. Peter Anvin commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream. i386 and x86-64 uses different registers for arguments; make them available so we don't have to #ifdef in the actual code. Native size and specified size (q, l, w, b) versions are provided. Signed-off-by: H. Peter Anvin Signed-off-by: Nick Desaulniers Reviewed-by: Sedat Dilek Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: a...@arndb.de Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: j...@perches.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: thomas.lenda...@amd.com Cc: tstel...@redhat.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 59 + 1 file changed, 59 insertions(+) --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -45,6 +45,65 @@ #define _ASM_SI__ASM_REG(si) #define _ASM_DI__ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.9 05/66] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations
4.9-stable review patch. If anyone has any objections, please let me know. -- From: Nick Desaulniers commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream. Functions marked extern inline do not emit an externally visible function when the gnu89 C standard is used. Some KBUILD Makefiles overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without an explicit C standard specified, the default is gnu11. Since c99, the semantics of extern inline have changed such that an externally visible function is always emitted. This can lead to multiple definition errors of extern inline functions at link time of compilation units whose build files have removed an explicit C standard compiler flag for users of GCC 5.1+ or Clang. Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: sedat.di...@gmail.com Cc: thomas.lenda...@amd.com Cc: tstel...@redhat.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -65,25 +65,40 @@ #endif /* + * Feature detection for gnu_inline (gnu89 extern inline semantics). Either + * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics, + * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not + * defined so the gnu89 semantics are the default. + */ +#ifdef __GNUC_STDC_INLINE__ +# define __gnu_inline __attribute__((gnu_inline)) +#else +# define __gnu_inline +#endif + +/* * Force always-inline if the user requests it so via the .config, * or if gcc is too old. * GCC does not warn about unused static inline functions for * -Wunused-function. This turns out to avoid the need for complex #ifdef * directives. Suppress the warning in clang as well by using "unused" * function attribute, which is redundant but not harmful for gcc. + * Prefer gnu_inline, so that extern inline functions do not emit an + * externally visible function. This makes extern inline behave as per gnu89 + * semantics rather than c99. This prevents multiple symbol definition errors + * of extern inline functions at link time. + * A lot of inline functions can cause havoc with function tracing. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline,unused)) notrace -#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace -#define __inline __inline __attribute__((always_inline,unused)) notrace +#define inline \ + inline __attribute__((always_inline, unused)) notrace __gnu_inline #else -/* A lot of inline functions can cause havoc with function tracing */ -#define inline inline __attribute__((unused)) notrace -#define __inline__ __inline__ __attribute__((unused)) notrace -#define __inline __inline __attribute__((unused)) notrace +#define inline inline __attribute__((unused)) notrace __gnu_inline #endif +#define __inline__ inline +#define __inline inline #define __always_inlineinline __attribute__((always_inline)) #define noinline __attribute__((noinline)) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.4 05/31] x86/asm: Add _ASM_ARG* constants for argument registers to
4.4-stable review patch. If anyone has any objections, please let me know. -- From: H. Peter Anvin commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream. i386 and x86-64 uses different registers for arguments; make them available so we don't have to #ifdef in the actual code. Native size and specified size (q, l, w, b) versions are provided. Signed-off-by: H. Peter Anvin Signed-off-by: Nick Desaulniers Reviewed-by: Sedat Dilek Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: a...@arndb.de Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: j...@perches.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: thomas.lenda...@amd.com Cc: tstel...@redhat.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 59 + 1 file changed, 59 insertions(+) --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -44,6 +44,65 @@ #define _ASM_SI__ASM_REG(si) #define _ASM_DI__ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* Exception table entry */ #ifdef __ASSEMBLY__ # define _ASM_EXTABLE(from,to) \ ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 4.4 04/31] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations
4.4-stable review patch. If anyone has any objections, please let me know. -- From: Nick Desaulniers commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream. Functions marked extern inline do not emit an externally visible function when the gnu89 C standard is used. Some KBUILD Makefiles overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without an explicit C standard specified, the default is gnu11. Since c99, the semantics of extern inline have changed such that an externally visible function is always emitted. This can lead to multiple definition errors of extern inline functions at link time of compilation units whose build files have removed an explicit C standard compiler flag for users of GCC 5.1+ or Clang. Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a...@redhat.com Cc: akata...@vmware.com Cc: a...@linux-foundation.org Cc: andrea.pa...@amarulasolutions.com Cc: ard.biesheu...@linaro.org Cc: aryabi...@virtuozzo.com Cc: astrac...@google.com Cc: boris.ostrov...@oracle.com Cc: brijesh.si...@amd.com Cc: caoj.f...@cn.fujitsu.com Cc: ge...@linux-m68k.org Cc: ghackm...@google.com Cc: gre...@linuxfoundation.org Cc: jan.kis...@siemens.com Cc: jarkko.sakki...@linux.intel.com Cc: jpoim...@redhat.com Cc: keesc...@google.com Cc: kirill.shute...@linux.intel.com Cc: kstew...@linuxfoundation.org Cc: linux-...@vger.kernel.org Cc: linux-kbu...@vger.kernel.org Cc: manojgu...@google.com Cc: mawil...@microsoft.com Cc: michal.l...@markovi.net Cc: mj...@google.com Cc: m...@chromium.org Cc: pombreda...@nexb.com Cc: rient...@google.com Cc: rost...@goodmis.org Cc: sedat.di...@gmail.com Cc: thomas.lenda...@amd.com Cc: tstel...@redhat.com Cc: tw...@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.dea...@arm.com Cc: yamada.masah...@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulni...@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 29 ++--- 1 file changed, 22 insertions(+), 7 deletions(-) --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -65,25 +65,40 @@ #endif /* + * Feature detection for gnu_inline (gnu89 extern inline semantics). Either + * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics, + * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not + * defined so the gnu89 semantics are the default. + */ +#ifdef __GNUC_STDC_INLINE__ +# define __gnu_inline __attribute__((gnu_inline)) +#else +# define __gnu_inline +#endif + +/* * Force always-inline if the user requests it so via the .config, * or if gcc is too old. * GCC does not warn about unused static inline functions for * -Wunused-function. This turns out to avoid the need for complex #ifdef * directives. Suppress the warning in clang as well by using "unused" * function attribute, which is redundant but not harmful for gcc. + * Prefer gnu_inline, so that extern inline functions do not emit an + * externally visible function. This makes extern inline behave as per gnu89 + * semantics rather than c99. This prevents multiple symbol definition errors + * of extern inline functions at link time. + * A lot of inline functions can cause havoc with function tracing. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline,unused)) notrace -#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace -#define __inline __inline __attribute__((always_inline,unused)) notrace +#define inline \ + inline __attribute__((always_inline, unused)) notrace __gnu_inline #else -/* A lot of inline functions can cause havoc with function tracing */ -#define inline inline __attribute__((unused)) notrace -#define __inline__ __inline__ __attribute__((unused)) notrace -#define __inline __inline __attribute__((unused)) notrace +#define inline inline __attribute__((unused)) notrace __gnu_inline #endif +#define __inline__ inline +#define __inline inline #define __always_inlineinline __attribute__((always_inline)) #define noinline __attribute__((noinline)) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v36 3/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the support of reporting hints of guest free pages to host via virtio-balloon. Currenlty, only free page blocks of MAX_ORDER - 1 are reported. They are obtained one by one from the mm free list via the regular allocation function. The allocated pages are given back to mm after they are put onto the vq. Host requests the guest to report free page hints by sending a new cmd id to the guest via the free_page_report_cmd_id configuration register. When the guest starts to report, it first sends a start cmd to host via the free page vq, which acks to host the cmd id received. When the guest finishes reporting free pages, a stop cmd is sent to host via the vq. TODO: - Add a batch page allocation API to amortize the allocation overhead. Signed-off-by: Wei Wang Signed-off-by: Liang Li Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Andrew Morton Cc: Linus Torvalds --- drivers/virtio/virtio_balloon.c | 331 +--- include/uapi/linux/virtio_balloon.h | 4 + 2 files changed, 307 insertions(+), 28 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index c6fd406..82cd497 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -42,6 +42,14 @@ #define DEFAULT_BALLOON_PAGES_TO_SHRINK 256 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 +#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ +__GFP_NOMEMALLOC) +/* The order of free page blocks to report to host */ +#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1) +/* The size of a free page block in bytes */ +#define VIRTIO_BALLOON_FREE_PAGE_SIZE \ + (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT)) + static unsigned long balloon_pages_to_shrink = DEFAULT_BALLOON_PAGES_TO_SHRINK; module_param(balloon_pages_to_shrink, ulong, 0600); MODULE_PARM_DESC(balloon_pages_to_shrink, "pages to free on memory presure"); @@ -50,9 +58,22 @@ MODULE_PARM_DESC(balloon_pages_to_shrink, "pages to free on memory presure"); static struct vfsmount *balloon_mnt; #endif +enum virtio_balloon_vq { + VIRTIO_BALLOON_VQ_INFLATE, + VIRTIO_BALLOON_VQ_DEFLATE, + VIRTIO_BALLOON_VQ_STATS, + VIRTIO_BALLOON_VQ_FREE_PAGE, + VIRTIO_BALLOON_VQ_MAX +}; + struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; + + /* Balloon's own wq for cpu-intensive work items */ + struct workqueue_struct *balloon_wq; + /* The free page reporting work item submitted to the balloon wq */ + struct work_struct report_free_page_work; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; @@ -62,6 +83,16 @@ struct virtio_balloon { spinlock_t stop_update_lock; bool stop_update; + /* The list of allocated free pages, waiting to be given back to mm */ + struct list_head free_page_list; + spinlock_t free_page_list_lock; + /* The cmd id received from host */ + u32 cmd_id_received; + /* The cmd id that is actively in use */ + __virtio32 cmd_id_active; + /* Buffer to store the stop sign */ + __virtio32 cmd_id_stop; + /* Waiting for host to ack the pages we released. */ wait_queue_head_t acked; @@ -325,17 +356,6 @@ static void stats_handle_request(struct virtio_balloon *vb) virtqueue_kick(vq); } -static void virtballoon_changed(struct virtio_device *vdev) -{ - struct virtio_balloon *vb = vdev->priv; - unsigned long flags; - - spin_lock_irqsave(>stop_update_lock, flags); - if (!vb->stop_update) - queue_work(system_freezable_wq, >update_balloon_size_work); - spin_unlock_irqrestore(>stop_update_lock, flags); -} - static inline s64 towards_target(struct virtio_balloon *vb) { s64 target; @@ -352,6 +372,52 @@ static inline s64 towards_target(struct virtio_balloon *vb) return target - vb->num_pages; } +static void virtballoon_changed(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + unsigned long flags; + s64 diff = towards_target(vb); + + if (diff) { + spin_lock_irqsave(>stop_update_lock, flags); + if (!vb->stop_update) + queue_work(system_freezable_wq, + >update_balloon_size_work); + spin_unlock_irqrestore(>stop_update_lock, flags); + } + + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + virtio_cread(vdev, struct virtio_balloon_config, +free_page_report_cmd_id, >cmd_id_received); + if (vb->cmd_id_received !=
[PATCH v36 5/5] virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the guest is using page poisoning. Guest writes to the poison_val config field to tell host about the page poisoning value that is in use. Suggested-by: Michael S. Tsirkin Signed-off-by: Wei Wang Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Andrew Morton --- drivers/virtio/virtio_balloon.c | 10 ++ include/uapi/linux/virtio_balloon.h | 3 +++ 2 files changed, 13 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 82cd497..6340cc1 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -814,6 +814,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; + __u32 poison_val; int err; if (!vdev->config->get) { @@ -883,6 +884,11 @@ static int virtballoon_probe(struct virtio_device *vdev) VIRTIO_BALLOON_CMD_ID_STOP); spin_lock_init(>free_page_list_lock); INIT_LIST_HEAD(>free_page_list); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { + memset(_val, PAGE_POISON, sizeof(poison_val)); + virtio_cwrite(vb->vdev, struct virtio_balloon_config, + poison_val, _val); + } } err = virtio_balloon_register_shrinker(vb); @@ -979,6 +985,9 @@ static int virtballoon_restore(struct virtio_device *vdev) static int virtballoon_validate(struct virtio_device *vdev) { + if (!page_poisoning_enabled()) + __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON); + __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); return 0; } @@ -988,6 +997,7 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, VIRTIO_BALLOON_F_FREE_PAGE_HINT, + VIRTIO_BALLOON_F_PAGE_POISON, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 18ee430..80a7b7e 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -35,6 +35,7 @@ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_FREE_PAGE_HINT3 /* VQ to report free pages */ +#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -47,6 +48,8 @@ struct virtio_balloon_config { __u32 actual; /* Free page report command id, readonly by guest */ __u32 free_page_report_cmd_id; + /* Stores PAGE_POISON if page poisoning is in use */ + __u32 poison_val; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ -- 2.7.4 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v36 4/5] mm/page_poison: expose page_poisoning_enabled to kernel modules
In some usages, e.g. virtio-balloon, a kernel module needs to know if page poisoning is in use. This patch exposes the page_poisoning_enabled function to kernel modules. Signed-off-by: Wei Wang Cc: Andrew Morton Cc: Michal Hocko Cc: Michael S. Tsirkin Acked-by: Andrew Morton --- mm/page_poison.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/mm/page_poison.c b/mm/page_poison.c index aa2b3d3..830f604 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf) } early_param("page_poison", early_page_poison_param); +/** + * page_poisoning_enabled - check if page poisoning is enabled + * + * Return true if page poisoning is enabled, or false if not. + */ bool page_poisoning_enabled(void) { /* @@ -29,6 +34,7 @@ bool page_poisoning_enabled(void) (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && debug_pagealloc_enabled())); } +EXPORT_SYMBOL_GPL(page_poisoning_enabled); static void poison_page(struct page *page) { -- 2.7.4 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v36 1/5] virtio-balloon: remove BUG() in init_vqs
It's a bit overkill to use BUG when failing to add an entry to the stats_vq in init_vqs. So remove it and just return the error to the caller to bail out nicely. Signed-off-by: Wei Wang Cc: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 6b237e3..9356a1a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -455,9 +455,13 @@ static int init_vqs(struct virtio_balloon *vb) num_stats = update_balloon_stats(vb); sg_init_one(, vb->stats, sizeof(vb->stats[0]) * num_stats); - if (virtqueue_add_outbuf(vb->stats_vq, , 1, vb, GFP_KERNEL) - < 0) - BUG(); + err = virtqueue_add_outbuf(vb->stats_vq, , 1, vb, + GFP_KERNEL); + if (err) { + dev_warn(>vdev->dev, "%s: add stat_vq failed\n", +__func__); + return err; + } virtqueue_kick(vb->stats_vq); } return 0; -- 2.7.4 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH v36 2/5] virtio_balloon: replace oom notifier with shrinker
The OOM notifier is getting deprecated to use for the reasons mentioned here by Michal Hocko: https://lkml.org/lkml/2018/7/12/314 This patch replaces the virtio-balloon oom notifier with a shrinker to release balloon pages on memory pressure. In addition, the bug in the replaced virtballoon_oom_notify that only VIRTIO_BALLOON_ARRAY_PFNS_MAX (i.e 256) balloon pages can be freed though the user has specified more than that number is fixed in the shrinker_scan function. Signed-off-by: Wei Wang Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Andrew Morton Cc: Linus Torvalds --- drivers/virtio/virtio_balloon.c | 113 +++- 1 file changed, 65 insertions(+), 48 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 9356a1a..c6fd406 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -40,12 +39,12 @@ */ #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 -#define OOM_VBALLOON_DEFAULT_PAGES 256 +#define DEFAULT_BALLOON_PAGES_TO_SHRINK 256 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 -static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; -module_param(oom_pages, int, S_IRUSR | S_IWUSR); -MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); +static unsigned long balloon_pages_to_shrink = DEFAULT_BALLOON_PAGES_TO_SHRINK; +module_param(balloon_pages_to_shrink, ulong, 0600); +MODULE_PARM_DESC(balloon_pages_to_shrink, "pages to free on memory presure"); #ifdef CONFIG_BALLOON_COMPACTION static struct vfsmount *balloon_mnt; @@ -86,8 +85,8 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; - /* To register callback in oom notifier call chain */ - struct notifier_block nb; + /* To register a shrinker to shrink memory upon memory pressure */ + struct shrinker shrinker; }; static struct virtio_device_id id_table[] = { @@ -365,38 +364,6 @@ static void update_balloon_size(struct virtio_balloon *vb) ); } -/* - * virtballoon_oom_notify - release pages when system is under severe - * memory pressure (called from out_of_memory()) - * @self : notifier block struct - * @dummy: not used - * @parm : returned - number of freed pages - * - * The balancing of memory by use of the virtio balloon should not cause - * the termination of processes while there are pages in the balloon. - * If virtio balloon manages to release some memory, it will make the - * system return and retry the allocation that forced the OOM killer - * to run. - */ -static int virtballoon_oom_notify(struct notifier_block *self, - unsigned long dummy, void *parm) -{ - struct virtio_balloon *vb; - unsigned long *freed; - unsigned num_freed_pages; - - vb = container_of(self, struct virtio_balloon, nb); - if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) - return NOTIFY_OK; - - freed = parm; - num_freed_pages = leak_balloon(vb, oom_pages); - update_balloon_size(vb); - *freed += num_freed_pages; - - return NOTIFY_OK; -} - static void update_balloon_stats_func(struct work_struct *work) { struct virtio_balloon *vb; @@ -548,6 +515,61 @@ static struct file_system_type balloon_fs = { #endif /* CONFIG_BALLOON_COMPACTION */ +static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, + struct shrink_control *sc) +{ + unsigned long pages_to_free = balloon_pages_to_shrink, + pages_freed = 0; + struct virtio_balloon *vb = container_of(shrinker, + struct virtio_balloon, shrinker); + + /* +* One invocation of leak_balloon can deflate at most +* VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it +* multiple times to deflate pages till reaching +* balloon_pages_to_shrink pages. +*/ + while (vb->num_pages && pages_to_free) { + pages_to_free = balloon_pages_to_shrink - pages_freed; + pages_freed += leak_balloon(vb, pages_to_free); + } + update_balloon_size(vb); + + return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE; +} + +static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, + struct shrink_control *sc) +{ + struct virtio_balloon *vb = container_of(shrinker, + struct virtio_balloon, shrinker); + + /* +* We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to handle the +* case when shrinker needs to be invoked to relieve memory pressure. +
[PATCH v36 0/5] Virtio-balloon: support free page reporting
This patch series is separated from the previous "Virtio-balloon Enhancement" series. The new feature, VIRTIO_BALLOON_F_FREE_PAGE_HINT, implemented by this series enables the virtio-balloon driver to report hints of guest free pages to the host. It can be used to accelerate live migration of VMs. Here is an introduction of this usage: Live migration needs to transfer the VM's memory from the source machine to the destination round by round. For the 1st round, all the VM's memory is transferred. From the 2nd round, only the pieces of memory that were written by the guest (after the 1st round) are transferred. One method that is popularly used by the hypervisor to track which part of memory is written is to write-protect all the guest memory. This feature enables the optimization by skipping the transfer of guest free pages during VM live migration. It is not concerned that the memory pages are used after they are given to the hypervisor as a hint of the free pages, because they will be tracked by the hypervisor and transferred in the subsequent round if they are used and written. * Tests - Test Environment Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz Guest: 8G RAM, 4 vCPU Migration setup: migrate_set_speed 100G, migrate_set_downtime 2 second - Test Results - Idle Guest Live Migration Time (results are averaged over 10 runs): - Optimization v.s. Legacy = 409ms vs 1757ms --> ~77% reduction (setting page poisoning zero and enabling ksm don't affect the comparison result) - Guest with Linux Compilation Workload (make bzImage -j4): - Live Migration Time (average) Optimization v.s. Legacy = 1407ms v.s. 2528ms --> ~44% reduction - Linux Compilation Time Optimization v.s. Legacy = 5min4s v.s. 5min12s --> no obvious difference ChangeLog: v35->v36: - remove the mm patch, as Linus has a suggestion to get free page addresses via allocation, instead of reading from the free page list. - virtio-balloon: - replace oom notifier with shrinker; - the guest to host communication interface remains the same as v32. - allocate free page blocks and send to host one by one, and free them after sending all the pages. For ChangeLogs from v22 to v35, please reference https://lwn.net/Articles/759413/ For ChangeLogs before v21, please reference https://lwn.net/Articles/743660/ Wei Wang (5): virtio-balloon: remove BUG() in init_vqs virtio_balloon: replace oom notifier with shrinker virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT mm/page_poison: expose page_poisoning_enabled to kernel modules virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON drivers/virtio/virtio_balloon.c | 456 ++-- include/uapi/linux/virtio_balloon.h | 7 + mm/page_poison.c| 6 + 3 files changed, 394 insertions(+), 75 deletions(-) -- 2.7.4 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization