[PATCH RFC V4 2/3] KVM: X86: Adding arbitrary data pointer in kvm memslot iterator functions

2018-07-20 Thread Ahmed Abd El Mawgood
This will help sharing data into the slot_level_handler callback. In my
case I need to a share a counter for the pages traversed to use it in some
bitmap. Being able to send arbitrary memory pointer into the
slot_level_handler callback made it easy.

Signed-off-by: Ahmed Abd El Mawgood 
---
 arch/x86/kvm/mmu.c | 65 +++---
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d594690d8b95..77661530b2c4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1418,7 +1418,7 @@ static bool spte_write_protect(u64 *sptep, bool 
pt_protect)
 
 static bool __rmap_write_protect(struct kvm *kvm,
 struct kvm_rmap_head *rmap_head,
-bool pt_protect)
+bool pt_protect, void *data)
 {
u64 *sptep;
struct rmap_iterator iter;
@@ -1457,7 +1457,8 @@ static bool wrprot_ad_disabled_spte(u64 *sptep)
  * - W bit on ad-disabled SPTEs.
  * Returns true iff any D or W bits were cleared.
  */
-static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head 
*rmap_head)
+static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head 
*rmap_head,
+   void *data)
 {
u64 *sptep;
struct rmap_iterator iter;
@@ -1483,7 +1484,8 @@ static bool spte_set_dirty(u64 *sptep)
return mmu_spte_update(sptep, spte);
 }
 
-static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+   void *data)
 {
u64 *sptep;
struct rmap_iterator iter;
@@ -1515,7 +1517,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm 
*kvm,
while (mask) {
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + 
__ffs(mask),
  PT_PAGE_TABLE_LEVEL, slot);
-   __rmap_write_protect(kvm, rmap_head, false);
+   __rmap_write_protect(kvm, rmap_head, false, NULL);
 
/* clear the first set bit */
mask &= mask - 1;
@@ -1541,7 +1543,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
while (mask) {
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + 
__ffs(mask),
  PT_PAGE_TABLE_LEVEL, slot);
-   __rmap_clear_dirty(kvm, rmap_head);
+   __rmap_clear_dirty(kvm, rmap_head, NULL);
 
/* clear the first set bit */
mask &= mask - 1;
@@ -1594,7 +1596,8 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 
for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
rmap_head = __gfn_to_rmap(gfn, i, slot);
-   write_protected |= __rmap_write_protect(kvm, rmap_head, true);
+   write_protected |= __rmap_write_protect(kvm, rmap_head, true,
+   NULL);
}
 
return write_protected;
@@ -1608,7 +1611,8 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 
gfn)
return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
 }
 
-static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+   void *data)
 {
u64 *sptep;
struct rmap_iterator iter;
@@ -1628,7 +1632,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct 
kvm_rmap_head *rmap_head,
   struct kvm_memory_slot *slot, gfn_t gfn, int level,
   unsigned long data)
 {
-   return kvm_zap_rmapp(kvm, rmap_head);
+   return kvm_zap_rmapp(kvm, rmap_head, NULL);
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
@@ -5086,13 +5090,15 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
 }
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head 
*rmap_head);
+typedef bool (*slot_level_handler) (struct kvm *kvm,
+   struct kvm_rmap_head *rmap_head, void *data);
 
 /* The caller should hold mmu-lock before calling this function. */
 static __always_inline bool
 slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, int start_level, int end_level,
-   gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
+   gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb,
+   void *data)
 {
struct slot_rmap_walk_iterator iterator;
bool flush = false;
@@ -5100,7 +5106,7 @@ slot_handle_level_range(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
end_gfn, ) 

[PATCH RFC V4 1/3] KVM: X86: Memory ROE documentation

2018-07-20 Thread Ahmed Abd El Mawgood
Following up with my previous threads on KVM assisted Anti rootkit
protections.
The current version doesn't address the attacks involving pages
remapping. It is still design in progress, nevertheless, it will be in
my later patch sets.

Signed-off-by: Ahmed Abd El Mawgood 
---
 Documentation/virtual/kvm/hypercalls.txt | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/virtual/kvm/hypercalls.txt 
b/Documentation/virtual/kvm/hypercalls.txt
index a890529c63ed..affd997eabfe 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -121,3 +121,17 @@ compute the CLOCK_REALTIME for its clock, at the same 
instant.
 
 Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
 or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+7. KVM_HC_HMROE
+
+Architecture: x86
+Status: active
+Purpose: Hypercall used to apply Read-Only Enforcement to guest pages
+Usage:
+ a0: Start address aligned to page boundary.
+ a1: Number of pages to be protected.
+This hypercall lets a guest kernel have part of its read/write memory
+converted into read-only.  This action is irreversible. KVM_HC_HMROE can
+not be triggered from guest Ring 3 (user mode). The reason is that user
+mode malicious software can make use of it to enforce read only protection
+on an arbitrary memory page thus crashing the kernel.
-- 
2.16.4

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Memory Read Only Enforcement: VMM assisted kernel rootkit mitigation for KVM V4

2018-07-20 Thread Ahmed Abd El Mawgood
Here is change log from V3 To V4:
- Fixing spelling/grammar mistakes suggested by Randy Dunlap
- Changing the hypercall interface to be able to process multiple pages
  per one hypercall also suggested by Randy Dunlap. It turns out that
  this will save lots of vmexist/memory slot flushes when protecting many
  pages.

[PATCH RFC V4 1/3] KVM: X86: Memory ROE documentation
[PATCH RFC V4 2/3] KVM: X86: Adding arbitrary data pointer in kvm memslot 
iterator functions
[PATCH RFC V4 3/3] KVM: X86: Adding skeleton for Memory ROE

Summary:

 Documentation/virtual/kvm/hypercalls.txt |  14 
 arch/x86/include/asm/kvm_host.h  |  11 ++-
 arch/x86/kvm/Kconfig |   7 ++
 arch/x86/kvm/mmu.c   | 127 ++-
 arch/x86/kvm/x86.c   | 104 -
 include/linux/kvm_host.h |   3 +
 include/uapi/linux/kvm_para.h|   1 +
 virt/kvm/kvm_main.c  |  29 ++-
 8 files changed, 254 insertions(+), 42 deletions(-)

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 3/3] [RFC V3] KVM: X86: Adding skeleton for Memory ROE

2018-07-20 Thread Ahmed Soliman
On 20 July 2018 at 03:28, Jann Horn  wrote:
> On Fri, Jul 20, 2018 at 2:26 AM Ahmed Soliman
>  wrote:
>>
>> On 20 July 2018 at 00:59, Jann Horn  wrote:
>> > On Thu, Jul 19, 2018 at 11:40 PM Ahmed Abd El Mawgood
>>
>> > Why are you implementing this in the kernel, instead of doing it in
>> > host userspace?
>>
>> I thought about implementing it completely in QEMU but It won't be
>> possible for few reasons:
>>
>> - After talking to QEMU folks I came up to conclusion that it when it
>>  comes to managing memory allocated for guest, it is always better to let
>>  KVM handles everything, unless there is a good reason to play with that
>>  memory chunk inside QEMU itself.
>
> Why? It seems to me like it'd be easier to add a way to mprotect()
> guest pages to readonly via virtio or whatever in QEMU than to add
> kernel code?

I did an early prototype with mprotect(), But then mprotect() didn't do exactly
what I wanted, The goal here is to prevent the guest from writing to protected
page but allow the host to do if it ever needs to at the same time.
mprotect() will
either allow both host and guest, or prevent both host and guest. Even though I
can not come up with a use case where one might need to allow host to read/write
to a page but prevent guest from writing to that page, I think that it
is a limitation
that will cost complete redesign if it proves that this kind of
behavior is undesired.
Also mprotect is kind of inflexible. Writing to mprotected pages would
immediately
trigger SIGSEGV and then userspace process will have to handle that
fault in order
to control the situation. That sounded to me more like a little hack
than a solid design.


> And if you ever want to support VM snapshotting/resumption, you'll
> need support for restoring the protection flags from QEMU anyway.

I never thought about that, but thanks for letting me know. I will keep that in
my TODO list.


>> - But actually there is a good reason for implementing ROE in kernel space,
>>  it is that ROE is architecture dependent to great extent.
>
> How so? The host component just has to make pages in guest memory
> readonly, right? As far as I can tell, from QEMU, it'd more or less be
> a matter of calling mprotect() a few times? (Plus potentially some
> hooks to prevent other virtio code from crashing by attempting to
> access protected pages - but you'd need that anyway, no matter where
> the protection for the guest is enforced.)

I don't think that virtio would crash that way, because host should be
able write to memory
as it wants. but yet I see where there is this going, probably I can
add hooks so that virtio
can respect the read only flags.


>> I should have
>>  emphasized that the only currently supported architecture is X86. I am
>>  not sure how deep the dependency on architecture goes. But as for now
>>  the current set of patches does a SPTE enumeration as part of the process.
>>  To my best knowledge, this isn't exposed outside arch/x68/kvm let alone
>>  having a host user space interface for it. Also the way I am planning to
>>  protect TLB from malicious gva -> gpa mapping is by knowing that in x86
>>  it is possible to VMEXIT on page faults, I am not sure if it will safe to
>>  assume that all kvm supported architectures will behave this way.
>
> You mean EPT faults, right? If so: I think all architectures have to
> support that - there are already other reasons why random guest memory
> accesses can fault. In particular, the host can page out guest memory.
> I think that's the case on all architectures?

Here my lack of full knowledge kicks in, I am not sure whether is EPT fault or
guest pf is what I want to capture validate. I think X86 can vm exit
on both. Due to
nature of ROE, guest user space code can not have ROE because it is
irreversible, so it will be safe to assume that only pages that are
not swappable
are the one's I would care about. still lots of the details are blurry for me.
But what I was trying to say is that there is always differences based
on architecture
that is why it will be better to do things in kernel module if we
decided not to use
mprotect method.


>> For these reasons I thought it will be better if arch dependent stuff (the
>> mechanism implementation) is kept in arch/*/kvm folder and with minimal
>> modifications to virt/kvm/* after setting a kconfig variable to enable ROE.
>> But I left room for the user space app using kvm to decide the rightful 
>> policy
>> for handling ROE violations. The way it works by KVM_EXIT_MMIO error to user
>> space, keeping all the architectural details hidden away from user space.
>>
>> A last note is that I didn't create this from scratch, instead I extended
>> KVM_MEM_READONLY implementation to also allow R/O per page instead
>> R/O per whole slot which is already done in kernel space.
>
> But then you still have to also do something about virtio code in QEMU
> that might write to those pages, right?

Probably yes, still I haven't fully planned that yet. But I 

Re: [RFC 0/4] Virtio uses DMA API for all devices

2018-07-20 Thread Michael S. Tsirkin
On Fri, Jul 20, 2018 at 09:29:37AM +0530, Anshuman Khandual wrote:
> This patch series is the follow up on the discussions we had before about
> the RFC titled [RFC,V2] virtio: Add platform specific DMA API translation
> for virito devices (https://patchwork.kernel.org/patch/10417371/). There
> were suggestions about doing away with two different paths of transactions
> with the host/QEMU, first being the direct GPA and the other being the DMA
> API based translations.
> 
> First patch attempts to create a direct GPA mapping based DMA operations
> structure called 'virtio_direct_dma_ops' with exact same implementation
> of the direct GPA path which virtio core currently has but just wrapped in
> a DMA API format. Virtio core must use 'virtio_direct_dma_ops' instead of
> the arch default in absence of VIRTIO_F_IOMMU_PLATFORM flag to preserve the
> existing semantics. The second patch does exactly that inside the function
> virtio_finalize_features(). The third patch removes the default direct GPA
> path from virtio core forcing it to use DMA API callbacks for all devices.
> Now with that change, every device must have a DMA operations structure
> associated with it. The fourth patch adds an additional hook which gives
> the platform an opportunity to do yet another override if required. This
> platform hook can be used on POWER Ultravisor based protected guests to
> load up SWIOTLB DMA callbacks to do the required (as discussed previously
> in the above mentioned thread how host is allowed to access only parts of
> the guest GPA range) bounce buffering into the shared memory for all I/O
> scatter gather buffers to be consumed on the host side.
> 
> Please go through these patches and review whether this approach broadly
> makes sense. I will appreciate suggestions, inputs, comments regarding
> the patches or the approach in general. Thank you.

I like how patches 1-3 look. Could you test performance
with/without to see whether the extra indirection through
use of DMA ops causes a measurable slow-down?

> Anshuman Khandual (4):
>   virtio: Define virtio_direct_dma_ops structure
>   virtio: Override device's DMA OPS with virtio_direct_dma_ops selectively
>   virtio: Force virtio core to use DMA API callbacks for all virtio devices
>   virtio: Add platform specific DMA API translation for virito devices
> 
>  arch/powerpc/include/asm/dma-mapping.h |  6 +++
>  arch/powerpc/platforms/pseries/iommu.c |  6 +++
>  drivers/virtio/virtio.c| 72 
> ++
>  drivers/virtio/virtio_pci_common.h |  3 ++
>  drivers/virtio/virtio_ring.c   | 65 +-
>  5 files changed, 89 insertions(+), 63 deletions(-)
> 
> -- 
> 2.9.3
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC 4/4] virtio: Add platform specific DMA API translation for virito devices

2018-07-20 Thread Michael S. Tsirkin
On Fri, Jul 20, 2018 at 09:29:41AM +0530, Anshuman Khandual wrote:
>Subject: Re: [RFC 4/4] virtio: Add platform specific DMA API translation for
> virito devices

s/virito/virtio/

> This adds a hook which a platform can define in order to allow it to
> override virtio device's DMA OPS irrespective of whether it has the
> flag VIRTIO_F_IOMMU_PLATFORM set or not. We want to use this to do
> bounce-buffering of data on the new secure pSeries platform, currently
> under development, where a KVM host cannot access all of the memory
> space of a secure KVM guest.  The host can only access the pages which
> the guest has explicitly requested to be shared with the host, thus
> the virtio implementation in the guest has to copy data to and from
> shared pages.
> 
> With this hook, the platform code in the secure guest can force the
> use of swiotlb for virtio buffers, with a back-end for swiotlb which
> will use a pool of pre-allocated shared pages.  Thus all data being
> sent or received by virtio devices will be copied through pages which
> the host has access to.
> 
> Signed-off-by: Anshuman Khandual 
> ---
>  arch/powerpc/include/asm/dma-mapping.h | 6 ++
>  arch/powerpc/platforms/pseries/iommu.c | 6 ++
>  drivers/virtio/virtio.c| 7 +++
>  3 files changed, 19 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/dma-mapping.h 
> b/arch/powerpc/include/asm/dma-mapping.h
> index 8fa3945..bc5a9d3 100644
> --- a/arch/powerpc/include/asm/dma-mapping.h
> +++ b/arch/powerpc/include/asm/dma-mapping.h
> @@ -116,3 +116,9 @@ extern u64 __dma_get_required_mask(struct device *dev);
>  
>  #endif /* __KERNEL__ */
>  #endif   /* _ASM_DMA_MAPPING_H */
> +
> +#define platform_override_dma_ops platform_override_dma_ops
> +
> +struct virtio_device;
> +
> +extern void platform_override_dma_ops(struct virtio_device *vdev);
> diff --git a/arch/powerpc/platforms/pseries/iommu.c 
> b/arch/powerpc/platforms/pseries/iommu.c
> index 06f0296..5773bc7 100644
> --- a/arch/powerpc/platforms/pseries/iommu.c
> +++ b/arch/powerpc/platforms/pseries/iommu.c
> @@ -38,6 +38,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -1396,3 +1397,8 @@ static int __init disable_multitce(char *str)
>  __setup("multitce=", disable_multitce);
>  
>  machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
> +
> +void platform_override_dma_ops(struct virtio_device *vdev)
> +{
> + /* Override vdev->parent.dma_ops if required */
> +}
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index 6b13987..432c332 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -168,6 +168,12 @@ EXPORT_SYMBOL_GPL(virtio_add_status);
>  
>  const struct dma_map_ops virtio_direct_dma_ops;
>  
> +#ifndef platform_override_dma_ops
> +static inline void platform_override_dma_ops(struct virtio_device *vdev)
> +{
> +}
> +#endif
> +
>  int virtio_finalize_features(struct virtio_device *dev)
>  {
>   int ret = dev->config->finalize_features(dev);
> @@ -179,6 +185,7 @@ int virtio_finalize_features(struct virtio_device *dev)
>   if (virtio_has_iommu_quirk(dev))
>   set_dma_ops(dev->dev.parent, _direct_dma_ops);
>  
> + platform_override_dma_ops(dev);

Is there a single place where virtio_has_iommu_quirk is called now?
If so, we could put this into virtio_has_iommu_quirk then.

>   if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
>   return 0;
>  
> -- 
> 2.9.3
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v36 0/5] Virtio-balloon: support free page reporting

2018-07-20 Thread Michael S. Tsirkin
On Fri, Jul 20, 2018 at 04:33:00PM +0800, Wei Wang wrote:
> This patch series is separated from the previous "Virtio-balloon
> Enhancement" series. The new feature, VIRTIO_BALLOON_F_FREE_PAGE_HINT,  
> implemented by this series enables the virtio-balloon driver to report
> hints of guest free pages to the host. It can be used to accelerate live
> migration of VMs. Here is an introduction of this usage:
> 
> Live migration needs to transfer the VM's memory from the source machine
> to the destination round by round. For the 1st round, all the VM's memory
> is transferred. From the 2nd round, only the pieces of memory that were
> written by the guest (after the 1st round) are transferred. One method
> that is popularly used by the hypervisor to track which part of memory is
> written is to write-protect all the guest memory.
> 
> This feature enables the optimization by skipping the transfer of guest
> free pages during VM live migration. It is not concerned that the memory
> pages are used after they are given to the hypervisor as a hint of the
> free pages, because they will be tracked by the hypervisor and transferred
> in the subsequent round if they are used and written.
> 
> * Tests
> - Test Environment
> Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz
> Guest: 8G RAM, 4 vCPU
> Migration setup: migrate_set_speed 100G, migrate_set_downtime 2 second

Can we split out patches 1 and 2? They seem appropriate for this
release ...

> - Test Results
> - Idle Guest Live Migration Time (results are averaged over 10 runs):
> - Optimization v.s. Legacy = 409ms vs 1757ms --> ~77% reduction
>   (setting page poisoning zero and enabling ksm don't affect the
>  comparison result)
> - Guest with Linux Compilation Workload (make bzImage -j4):
> - Live Migration Time (average)
>   Optimization v.s. Legacy = 1407ms v.s. 2528ms --> ~44% reduction
> - Linux Compilation Time
>   Optimization v.s. Legacy = 5min4s v.s. 5min12s
>   --> no obvious difference
> 
> ChangeLog:
> v35->v36:
> - remove the mm patch, as Linus has a suggestion to get free page
>   addresses via allocation, instead of reading from the free page
>   list.
> - virtio-balloon:
> - replace oom notifier with shrinker;
> - the guest to host communication interface remains the same as
>   v32.
>   - allocate free page blocks and send to host one by one, and free
>   them after sending all the pages.
> 
> For ChangeLogs from v22 to v35, please reference
> https://lwn.net/Articles/759413/
> 
> For ChangeLogs before v21, please reference
> https://lwn.net/Articles/743660/
> 
> Wei Wang (5):
>   virtio-balloon: remove BUG() in init_vqs
>   virtio_balloon: replace oom notifier with shrinker
>   virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
>   mm/page_poison: expose page_poisoning_enabled to kernel modules
>   virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
> 
>  drivers/virtio/virtio_balloon.c | 456 
> ++--
>  include/uapi/linux/virtio_balloon.h |   7 +
>  mm/page_poison.c|   6 +
>  3 files changed, 394 insertions(+), 75 deletions(-)
> 
> -- 
> 2.7.4
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.17 003/101] x86/paravirt: Make native_save_fl() extern inline

2018-07-20 Thread Greg Kroah-Hartman
4.17-stable review patch.  If anyone has any objections, please let me know.

--

From: Nick Desaulniers 

commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream.

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

Debugged-by: Alistair Strachan 
Debugged-by: Matthias Kaehlcke 
Suggested-by: Arnd Bergmann 
Suggested-by: H. Peter Anvin 
Suggested-by: Tom Stellar 
Reported-by: Sedat Dilek 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Desaulniers 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: j...@perches.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: thomas.lenda...@amd.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/include/asm/irqflags.h |2 +-
 arch/x86/kernel/Makefile|1 +
 arch/x86/kernel/irqflags.S  |   26 ++
 3 files changed, 28 insertions(+), 1 deletion(-)

--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,7 +13,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
unsigned long flags;
 
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,6 +61,7 @@ obj-y += alternative.o i8253.o hw_brea
 obj-y  += tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y  += pci-iommu_table.o
 obj-y  += resource.o
+obj-y  += irqflags.o
 
 obj-y  += process.o
 obj-y  += fpu/
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include 
+#include 
+#include 
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+   pushf
+   pop %_ASM_AX
+   ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+   push %_ASM_ARG1
+   popf
+   ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.17 002/101] x86/asm: Add _ASM_ARG* constants for argument registers to

2018-07-20 Thread Greg Kroah-Hartman
4.17-stable review patch.  If anyone has any objections, please let me know.

--

From: H. Peter Anvin 

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin 
Signed-off-by: Nick Desaulniers 
Reviewed-by: Sedat Dilek 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: a...@arndb.de
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: j...@perches.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: thomas.lenda...@amd.com
Cc: tstel...@redhat.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/include/asm/asm.h |   59 +
 1 file changed, 59 insertions(+)

--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -46,6 +46,65 @@
 #define _ASM_SI__ASM_REG(si)
 #define _ASM_DI__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1  _ASM_AX
+#define _ASM_ARG2  _ASM_DX
+#define _ASM_ARG3  _ASM_CX
+
+#define _ASM_ARG1L eax
+#define _ASM_ARG2L edx
+#define _ASM_ARG3L ecx
+
+#define _ASM_ARG1W ax
+#define _ASM_ARG2W dx
+#define _ASM_ARG3W cx
+
+#define _ASM_ARG1B al
+#define _ASM_ARG2B dl
+#define _ASM_ARG3B cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1  _ASM_DI
+#define _ASM_ARG2  _ASM_SI
+#define _ASM_ARG3  _ASM_DX
+#define _ASM_ARG4  _ASM_CX
+#define _ASM_ARG5  r8
+#define _ASM_ARG6  r9
+
+#define _ASM_ARG1Q rdi
+#define _ASM_ARG2Q rsi
+#define _ASM_ARG3Q rdx
+#define _ASM_ARG4Q rcx
+#define _ASM_ARG5Q r8
+#define _ASM_ARG6Q r9
+
+#define _ASM_ARG1L edi
+#define _ASM_ARG2L esi
+#define _ASM_ARG3L edx
+#define _ASM_ARG4L ecx
+#define _ASM_ARG5L r8d
+#define _ASM_ARG6L r9d
+
+#define _ASM_ARG1W di
+#define _ASM_ARG2W si
+#define _ASM_ARG3W dx
+#define _ASM_ARG4W cx
+#define _ASM_ARG5W r8w
+#define _ASM_ARG6W r9w
+
+#define _ASM_ARG1B dil
+#define _ASM_ARG2B sil
+#define _ASM_ARG3B dl
+#define _ASM_ARG4B cl
+#define _ASM_ARG5B r8b
+#define _ASM_ARG6B r9b
+
+#endif
+
 /*
  * Macros to generate condition code outputs from inline assembly,
  * The output operand must be type "bool".


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.17 001/101] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

2018-07-20 Thread Greg Kroah-Hartman
4.17-stable review patch.  If anyone has any objections, please let me know.

--

From: Nick Desaulniers 

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann 
Suggested-by: H. Peter Anvin 
Suggested-by: Joe Perches 
Signed-off-by: Nick Desaulniers 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: sedat.di...@gmail.com
Cc: thomas.lenda...@amd.com
Cc: tstel...@redhat.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/compiler-gcc.h |   29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -66,25 +66,40 @@
 #endif
 
 /*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline  __attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
  * GCC does not warn about unused static inline functions for
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||   \
 !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline  __attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__  __attribute__((always_inline,unused)) notrace
-#define __inline __inline  __attribute__((always_inline,unused)) notrace
+#define inline \
+   inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline  __attribute__((unused)) notrace
-#define __inline__ __inline__  __attribute__((unused)) notrace
-#define __inline __inline  __attribute__((unused)) notrace
+#define inline inline  __attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inlineinline __attribute__((always_inline))
 #define  noinline  __attribute__((noinline))
 


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.14 03/92] x86/paravirt: Make native_save_fl() extern inline

2018-07-20 Thread Greg Kroah-Hartman
4.14-stable review patch.  If anyone has any objections, please let me know.

--

From: Nick Desaulniers 

commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream.

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

Debugged-by: Alistair Strachan 
Debugged-by: Matthias Kaehlcke 
Suggested-by: Arnd Bergmann 
Suggested-by: H. Peter Anvin 
Suggested-by: Tom Stellar 
Reported-by: Sedat Dilek 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Desaulniers 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: j...@perches.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: thomas.lenda...@amd.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/include/asm/irqflags.h |2 +-
 arch/x86/kernel/Makefile|1 +
 arch/x86/kernel/irqflags.S  |   26 ++
 3 files changed, 28 insertions(+), 1 deletion(-)

--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,7 +13,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
unsigned long flags;
 
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -58,6 +58,7 @@ obj-y += alternative.o i8253.o pci-nom
 obj-y  += tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y  += pci-iommu_table.o
 obj-y  += resource.o
+obj-y  += irqflags.o
 
 obj-y  += process.o
 obj-y  += fpu/
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include 
+#include 
+#include 
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+   pushf
+   pop %_ASM_AX
+   ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+   push %_ASM_ARG1
+   popf
+   ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.14 02/92] x86/asm: Add _ASM_ARG* constants for argument registers to

2018-07-20 Thread Greg Kroah-Hartman
4.14-stable review patch.  If anyone has any objections, please let me know.

--

From: H. Peter Anvin 

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin 
Signed-off-by: Nick Desaulniers 
Reviewed-by: Sedat Dilek 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: a...@arndb.de
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: j...@perches.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: thomas.lenda...@amd.com
Cc: tstel...@redhat.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/include/asm/asm.h |   59 +
 1 file changed, 59 insertions(+)

--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -46,6 +46,65 @@
 #define _ASM_SI__ASM_REG(si)
 #define _ASM_DI__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1  _ASM_AX
+#define _ASM_ARG2  _ASM_DX
+#define _ASM_ARG3  _ASM_CX
+
+#define _ASM_ARG1L eax
+#define _ASM_ARG2L edx
+#define _ASM_ARG3L ecx
+
+#define _ASM_ARG1W ax
+#define _ASM_ARG2W dx
+#define _ASM_ARG3W cx
+
+#define _ASM_ARG1B al
+#define _ASM_ARG2B dl
+#define _ASM_ARG3B cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1  _ASM_DI
+#define _ASM_ARG2  _ASM_SI
+#define _ASM_ARG3  _ASM_DX
+#define _ASM_ARG4  _ASM_CX
+#define _ASM_ARG5  r8
+#define _ASM_ARG6  r9
+
+#define _ASM_ARG1Q rdi
+#define _ASM_ARG2Q rsi
+#define _ASM_ARG3Q rdx
+#define _ASM_ARG4Q rcx
+#define _ASM_ARG5Q r8
+#define _ASM_ARG6Q r9
+
+#define _ASM_ARG1L edi
+#define _ASM_ARG2L esi
+#define _ASM_ARG3L edx
+#define _ASM_ARG4L ecx
+#define _ASM_ARG5L r8d
+#define _ASM_ARG6L r9d
+
+#define _ASM_ARG1W di
+#define _ASM_ARG2W si
+#define _ASM_ARG3W dx
+#define _ASM_ARG4W cx
+#define _ASM_ARG5W r8w
+#define _ASM_ARG6W r9w
+
+#define _ASM_ARG1B dil
+#define _ASM_ARG2B sil
+#define _ASM_ARG3B dl
+#define _ASM_ARG4B cl
+#define _ASM_ARG5B r8b
+#define _ASM_ARG6B r9b
+
+#endif
+
 /*
  * Macros to generate condition code outputs from inline assembly,
  * The output operand must be type "bool".


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.14 01/92] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

2018-07-20 Thread Greg Kroah-Hartman
4.14-stable review patch.  If anyone has any objections, please let me know.

--

From: Nick Desaulniers 

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann 
Suggested-by: H. Peter Anvin 
Suggested-by: Joe Perches 
Signed-off-by: Nick Desaulniers 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: sedat.di...@gmail.com
Cc: thomas.lenda...@amd.com
Cc: tstel...@redhat.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/compiler-gcc.h |   29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -66,25 +66,40 @@
 #endif
 
 /*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline  __attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
  * GCC does not warn about unused static inline functions for
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||   \
 !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline  __attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__  __attribute__((always_inline,unused)) notrace
-#define __inline __inline  __attribute__((always_inline,unused)) notrace
+#define inline \
+   inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline  __attribute__((unused)) notrace
-#define __inline__ __inline__  __attribute__((unused)) notrace
-#define __inline __inline  __attribute__((unused)) notrace
+#define inline inline  __attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inlineinline __attribute__((always_inline))
 #define  noinline  __attribute__((noinline))
 


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.9 07/66] x86/paravirt: Make native_save_fl() extern inline

2018-07-20 Thread Greg Kroah-Hartman
4.9-stable review patch.  If anyone has any objections, please let me know.

--

From: Nick Desaulniers 

commit d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 upstream.

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

Debugged-by: Alistair Strachan 
Debugged-by: Matthias Kaehlcke 
Suggested-by: Arnd Bergmann 
Suggested-by: H. Peter Anvin 
Suggested-by: Tom Stellar 
Reported-by: Sedat Dilek 
Tested-by: Sedat Dilek 
Signed-off-by: Nick Desaulniers 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: j...@perches.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: thomas.lenda...@amd.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/include/asm/irqflags.h |2 +-
 arch/x86/kernel/Makefile|1 +
 arch/x86/kernel/irqflags.S  |   26 ++
 3 files changed, 28 insertions(+), 1 deletion(-)

--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -12,7 +12,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
unsigned long flags;
 
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -56,6 +56,7 @@ obj-y += alternative.o i8253.o pci-nom
 obj-y  += tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y  += pci-iommu_table.o
 obj-y  += resource.o
+obj-y  += irqflags.o
 
 obj-y  += process.o
 obj-y  += fpu/
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include 
+#include 
+#include 
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+   pushf
+   pop %_ASM_AX
+   ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+   push %_ASM_ARG1
+   popf
+   ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.9 06/66] x86/asm: Add _ASM_ARG* constants for argument registers to

2018-07-20 Thread Greg Kroah-Hartman
4.9-stable review patch.  If anyone has any objections, please let me know.

--

From: H. Peter Anvin 

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin 
Signed-off-by: Nick Desaulniers 
Reviewed-by: Sedat Dilek 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: a...@arndb.de
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: j...@perches.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: thomas.lenda...@amd.com
Cc: tstel...@redhat.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/include/asm/asm.h |   59 +
 1 file changed, 59 insertions(+)

--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -45,6 +45,65 @@
 #define _ASM_SI__ASM_REG(si)
 #define _ASM_DI__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1  _ASM_AX
+#define _ASM_ARG2  _ASM_DX
+#define _ASM_ARG3  _ASM_CX
+
+#define _ASM_ARG1L eax
+#define _ASM_ARG2L edx
+#define _ASM_ARG3L ecx
+
+#define _ASM_ARG1W ax
+#define _ASM_ARG2W dx
+#define _ASM_ARG3W cx
+
+#define _ASM_ARG1B al
+#define _ASM_ARG2B dl
+#define _ASM_ARG3B cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1  _ASM_DI
+#define _ASM_ARG2  _ASM_SI
+#define _ASM_ARG3  _ASM_DX
+#define _ASM_ARG4  _ASM_CX
+#define _ASM_ARG5  r8
+#define _ASM_ARG6  r9
+
+#define _ASM_ARG1Q rdi
+#define _ASM_ARG2Q rsi
+#define _ASM_ARG3Q rdx
+#define _ASM_ARG4Q rcx
+#define _ASM_ARG5Q r8
+#define _ASM_ARG6Q r9
+
+#define _ASM_ARG1L edi
+#define _ASM_ARG2L esi
+#define _ASM_ARG3L edx
+#define _ASM_ARG4L ecx
+#define _ASM_ARG5L r8d
+#define _ASM_ARG6L r9d
+
+#define _ASM_ARG1W di
+#define _ASM_ARG2W si
+#define _ASM_ARG3W dx
+#define _ASM_ARG4W cx
+#define _ASM_ARG5W r8w
+#define _ASM_ARG6W r9w
+
+#define _ASM_ARG1B dil
+#define _ASM_ARG2B sil
+#define _ASM_ARG3B dl
+#define _ASM_ARG4B cl
+#define _ASM_ARG5B r8b
+#define _ASM_ARG6B r9b
+
+#endif
+
 /*
  * Macros to generate condition code outputs from inline assembly,
  * The output operand must be type "bool".


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.9 05/66] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

2018-07-20 Thread Greg Kroah-Hartman
4.9-stable review patch.  If anyone has any objections, please let me know.

--

From: Nick Desaulniers 

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann 
Suggested-by: H. Peter Anvin 
Suggested-by: Joe Perches 
Signed-off-by: Nick Desaulniers 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: sedat.di...@gmail.com
Cc: thomas.lenda...@amd.com
Cc: tstel...@redhat.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/compiler-gcc.h |   29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -65,25 +65,40 @@
 #endif
 
 /*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline  __attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
  * GCC does not warn about unused static inline functions for
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||   \
 !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline  __attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__  __attribute__((always_inline,unused)) notrace
-#define __inline __inline  __attribute__((always_inline,unused)) notrace
+#define inline \
+   inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline  __attribute__((unused)) notrace
-#define __inline__ __inline__  __attribute__((unused)) notrace
-#define __inline __inline  __attribute__((unused)) notrace
+#define inline inline  __attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inlineinline __attribute__((always_inline))
 #define  noinline  __attribute__((noinline))
 


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.4 05/31] x86/asm: Add _ASM_ARG* constants for argument registers to

2018-07-20 Thread Greg Kroah-Hartman
4.4-stable review patch.  If anyone has any objections, please let me know.

--

From: H. Peter Anvin 

commit 0e2e160033283e20f688d8bad5b89460cc5bfcc4 upstream.

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin 
Signed-off-by: Nick Desaulniers 
Reviewed-by: Sedat Dilek 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: a...@arndb.de
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: j...@perches.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: thomas.lenda...@amd.com
Cc: tstel...@redhat.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/include/asm/asm.h |   59 +
 1 file changed, 59 insertions(+)

--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -44,6 +44,65 @@
 #define _ASM_SI__ASM_REG(si)
 #define _ASM_DI__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1  _ASM_AX
+#define _ASM_ARG2  _ASM_DX
+#define _ASM_ARG3  _ASM_CX
+
+#define _ASM_ARG1L eax
+#define _ASM_ARG2L edx
+#define _ASM_ARG3L ecx
+
+#define _ASM_ARG1W ax
+#define _ASM_ARG2W dx
+#define _ASM_ARG3W cx
+
+#define _ASM_ARG1B al
+#define _ASM_ARG2B dl
+#define _ASM_ARG3B cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1  _ASM_DI
+#define _ASM_ARG2  _ASM_SI
+#define _ASM_ARG3  _ASM_DX
+#define _ASM_ARG4  _ASM_CX
+#define _ASM_ARG5  r8
+#define _ASM_ARG6  r9
+
+#define _ASM_ARG1Q rdi
+#define _ASM_ARG2Q rsi
+#define _ASM_ARG3Q rdx
+#define _ASM_ARG4Q rcx
+#define _ASM_ARG5Q r8
+#define _ASM_ARG6Q r9
+
+#define _ASM_ARG1L edi
+#define _ASM_ARG2L esi
+#define _ASM_ARG3L edx
+#define _ASM_ARG4L ecx
+#define _ASM_ARG5L r8d
+#define _ASM_ARG6L r9d
+
+#define _ASM_ARG1W di
+#define _ASM_ARG2W si
+#define _ASM_ARG3W dx
+#define _ASM_ARG4W cx
+#define _ASM_ARG5W r8w
+#define _ASM_ARG6W r9w
+
+#define _ASM_ARG1B dil
+#define _ASM_ARG2B sil
+#define _ASM_ARG3B dl
+#define _ASM_ARG4B cl
+#define _ASM_ARG5B r8b
+#define _ASM_ARG6B r9b
+
+#endif
+
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE(from,to) \


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4.4 04/31] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations

2018-07-20 Thread Greg Kroah-Hartman
4.4-stable review patch.  If anyone has any objections, please let me know.

--

From: Nick Desaulniers 

commit d03db2bc26f0e4a6849ad649a09c9c73fccdc656 upstream.

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann 
Suggested-by: H. Peter Anvin 
Suggested-by: Joe Perches 
Signed-off-by: Nick Desaulniers 
Acked-by: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: a...@redhat.com
Cc: akata...@vmware.com
Cc: a...@linux-foundation.org
Cc: andrea.pa...@amarulasolutions.com
Cc: ard.biesheu...@linaro.org
Cc: aryabi...@virtuozzo.com
Cc: astrac...@google.com
Cc: boris.ostrov...@oracle.com
Cc: brijesh.si...@amd.com
Cc: caoj.f...@cn.fujitsu.com
Cc: ge...@linux-m68k.org
Cc: ghackm...@google.com
Cc: gre...@linuxfoundation.org
Cc: jan.kis...@siemens.com
Cc: jarkko.sakki...@linux.intel.com
Cc: jpoim...@redhat.com
Cc: keesc...@google.com
Cc: kirill.shute...@linux.intel.com
Cc: kstew...@linuxfoundation.org
Cc: linux-...@vger.kernel.org
Cc: linux-kbu...@vger.kernel.org
Cc: manojgu...@google.com
Cc: mawil...@microsoft.com
Cc: michal.l...@markovi.net
Cc: mj...@google.com
Cc: m...@chromium.org
Cc: pombreda...@nexb.com
Cc: rient...@google.com
Cc: rost...@goodmis.org
Cc: sedat.di...@gmail.com
Cc: thomas.lenda...@amd.com
Cc: tstel...@redhat.com
Cc: tw...@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.dea...@arm.com
Cc: yamada.masah...@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulni...@google.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/compiler-gcc.h |   29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -65,25 +65,40 @@
 #endif
 
 /*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline  __attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
  * GCC does not warn about unused static inline functions for
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||   \
 !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline  __attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__  __attribute__((always_inline,unused)) notrace
-#define __inline __inline  __attribute__((always_inline,unused)) notrace
+#define inline \
+   inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline  __attribute__((unused)) notrace
-#define __inline__ __inline__  __attribute__((unused)) notrace
-#define __inline __inline  __attribute__((unused)) notrace
+#define inline inline  __attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inlineinline __attribute__((always_inline))
 #define  noinline  __attribute__((noinline))
 


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v36 3/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

2018-07-20 Thread Wei Wang
Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the
support of reporting hints of guest free pages to host via virtio-balloon.
Currenlty, only free page blocks of MAX_ORDER - 1 are reported. They are
obtained one by one from the mm free list via the regular allocation
function. The allocated pages are given back to mm after they are put onto
the vq.

Host requests the guest to report free page hints by sending a new cmd id
to the guest via the free_page_report_cmd_id configuration register. When
the guest starts to report, it first sends a start cmd to host via the
free page vq, which acks to host the cmd id received. When the guest
finishes reporting free pages, a stop cmd is sent to host via the vq.

TODO:
- Add a batch page allocation API to amortize the allocation overhead.

Signed-off-by: Wei Wang 
Signed-off-by: Liang Li 
Cc: Michael S. Tsirkin 
Cc: Michal Hocko 
Cc: Andrew Morton 
Cc: Linus Torvalds 
---
 drivers/virtio/virtio_balloon.c | 331 +---
 include/uapi/linux/virtio_balloon.h |   4 +
 2 files changed, 307 insertions(+), 28 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c6fd406..82cd497 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -42,6 +42,14 @@
 #define DEFAULT_BALLOON_PAGES_TO_SHRINK 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
+__GFP_NOMEMALLOC)
+/* The order of free page blocks to report to host */
+#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
+/* The size of a free page block in bytes */
+#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
+   (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
+
 static unsigned long balloon_pages_to_shrink = DEFAULT_BALLOON_PAGES_TO_SHRINK;
 module_param(balloon_pages_to_shrink, ulong, 0600);
 MODULE_PARM_DESC(balloon_pages_to_shrink, "pages to free on memory presure");
@@ -50,9 +58,22 @@ MODULE_PARM_DESC(balloon_pages_to_shrink, "pages to free on 
memory presure");
 static struct vfsmount *balloon_mnt;
 #endif
 
+enum virtio_balloon_vq {
+   VIRTIO_BALLOON_VQ_INFLATE,
+   VIRTIO_BALLOON_VQ_DEFLATE,
+   VIRTIO_BALLOON_VQ_STATS,
+   VIRTIO_BALLOON_VQ_FREE_PAGE,
+   VIRTIO_BALLOON_VQ_MAX
+};
+
 struct virtio_balloon {
struct virtio_device *vdev;
-   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
+
+   /* Balloon's own wq for cpu-intensive work items */
+   struct workqueue_struct *balloon_wq;
+   /* The free page reporting work item submitted to the balloon wq */
+   struct work_struct report_free_page_work;
 
/* The balloon servicing is delegated to a freezable workqueue. */
struct work_struct update_balloon_stats_work;
@@ -62,6 +83,16 @@ struct virtio_balloon {
spinlock_t stop_update_lock;
bool stop_update;
 
+   /* The list of allocated free pages, waiting to be given back to mm */
+   struct list_head free_page_list;
+   spinlock_t free_page_list_lock;
+   /* The cmd id received from host */
+   u32 cmd_id_received;
+   /* The cmd id that is actively in use */
+   __virtio32 cmd_id_active;
+   /* Buffer to store the stop sign */
+   __virtio32 cmd_id_stop;
+
/* Waiting for host to ack the pages we released. */
wait_queue_head_t acked;
 
@@ -325,17 +356,6 @@ static void stats_handle_request(struct virtio_balloon *vb)
virtqueue_kick(vq);
 }
 
-static void virtballoon_changed(struct virtio_device *vdev)
-{
-   struct virtio_balloon *vb = vdev->priv;
-   unsigned long flags;
-
-   spin_lock_irqsave(>stop_update_lock, flags);
-   if (!vb->stop_update)
-   queue_work(system_freezable_wq, >update_balloon_size_work);
-   spin_unlock_irqrestore(>stop_update_lock, flags);
-}
-
 static inline s64 towards_target(struct virtio_balloon *vb)
 {
s64 target;
@@ -352,6 +372,52 @@ static inline s64 towards_target(struct virtio_balloon *vb)
return target - vb->num_pages;
 }
 
+static void virtballoon_changed(struct virtio_device *vdev)
+{
+   struct virtio_balloon *vb = vdev->priv;
+   unsigned long flags;
+   s64 diff = towards_target(vb);
+
+   if (diff) {
+   spin_lock_irqsave(>stop_update_lock, flags);
+   if (!vb->stop_update)
+   queue_work(system_freezable_wq,
+  >update_balloon_size_work);
+   spin_unlock_irqrestore(>stop_update_lock, flags);
+   }
+
+   if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+   virtio_cread(vdev, struct virtio_balloon_config,
+free_page_report_cmd_id, >cmd_id_received);
+   if (vb->cmd_id_received != 

[PATCH v36 5/5] virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON

2018-07-20 Thread Wei Wang
The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
guest is using page poisoning. Guest writes to the poison_val config
field to tell host about the page poisoning value that is in use.

Suggested-by: Michael S. Tsirkin 
Signed-off-by: Wei Wang 
Cc: Michael S. Tsirkin 
Cc: Michal Hocko 
Cc: Andrew Morton 
---
 drivers/virtio/virtio_balloon.c | 10 ++
 include/uapi/linux/virtio_balloon.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 82cd497..6340cc1 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -814,6 +814,7 @@ static int virtio_balloon_register_shrinker(struct 
virtio_balloon *vb)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
struct virtio_balloon *vb;
+   __u32 poison_val;
int err;
 
if (!vdev->config->get) {
@@ -883,6 +884,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
  VIRTIO_BALLOON_CMD_ID_STOP);
spin_lock_init(>free_page_list_lock);
INIT_LIST_HEAD(>free_page_list);
+   if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+   memset(_val, PAGE_POISON, sizeof(poison_val));
+   virtio_cwrite(vb->vdev, struct virtio_balloon_config,
+ poison_val, _val);
+   }
}
 
err = virtio_balloon_register_shrinker(vb);
@@ -979,6 +985,9 @@ static int virtballoon_restore(struct virtio_device *vdev)
 
 static int virtballoon_validate(struct virtio_device *vdev)
 {
+   if (!page_poisoning_enabled())
+   __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+
__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
return 0;
 }
@@ -988,6 +997,7 @@ static unsigned int features[] = {
VIRTIO_BALLOON_F_STATS_VQ,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
VIRTIO_BALLOON_F_FREE_PAGE_HINT,
+   VIRTIO_BALLOON_F_PAGE_POISON,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h 
b/include/uapi/linux/virtio_balloon.h
index 18ee430..80a7b7e 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -35,6 +35,7 @@
 #define VIRTIO_BALLOON_F_STATS_VQ  1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM2 /* Deflate balloon on OOM */
 #define VIRTIO_BALLOON_F_FREE_PAGE_HINT3 /* VQ to report free pages */
+#define VIRTIO_BALLOON_F_PAGE_POISON   4 /* Guest is using page poisoning */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -47,6 +48,8 @@ struct virtio_balloon_config {
__u32 actual;
/* Free page report command id, readonly by guest */
__u32 free_page_report_cmd_id;
+   /* Stores PAGE_POISON if page poisoning is in use */
+   __u32 poison_val;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
-- 
2.7.4

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v36 4/5] mm/page_poison: expose page_poisoning_enabled to kernel modules

2018-07-20 Thread Wei Wang
In some usages, e.g. virtio-balloon, a kernel module needs to know if
page poisoning is in use. This patch exposes the page_poisoning_enabled
function to kernel modules.

Signed-off-by: Wei Wang 
Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: Michael S. Tsirkin 
Acked-by: Andrew Morton 
---
 mm/page_poison.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/mm/page_poison.c b/mm/page_poison.c
index aa2b3d3..830f604 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf)
 }
 early_param("page_poison", early_page_poison_param);
 
+/**
+ * page_poisoning_enabled - check if page poisoning is enabled
+ *
+ * Return true if page poisoning is enabled, or false if not.
+ */
 bool page_poisoning_enabled(void)
 {
/*
@@ -29,6 +34,7 @@ bool page_poisoning_enabled(void)
(!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
debug_pagealloc_enabled()));
 }
+EXPORT_SYMBOL_GPL(page_poisoning_enabled);
 
 static void poison_page(struct page *page)
 {
-- 
2.7.4

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v36 1/5] virtio-balloon: remove BUG() in init_vqs

2018-07-20 Thread Wei Wang
It's a bit overkill to use BUG when failing to add an entry to the
stats_vq in init_vqs. So remove it and just return the error to the
caller to bail out nicely.

Signed-off-by: Wei Wang 
Cc: Michael S. Tsirkin 
---
 drivers/virtio/virtio_balloon.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 6b237e3..9356a1a 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -455,9 +455,13 @@ static int init_vqs(struct virtio_balloon *vb)
num_stats = update_balloon_stats(vb);
 
sg_init_one(, vb->stats, sizeof(vb->stats[0]) * num_stats);
-   if (virtqueue_add_outbuf(vb->stats_vq, , 1, vb, GFP_KERNEL)
-   < 0)
-   BUG();
+   err = virtqueue_add_outbuf(vb->stats_vq, , 1, vb,
+  GFP_KERNEL);
+   if (err) {
+   dev_warn(>vdev->dev, "%s: add stat_vq failed\n",
+__func__);
+   return err;
+   }
virtqueue_kick(vb->stats_vq);
}
return 0;
-- 
2.7.4

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v36 2/5] virtio_balloon: replace oom notifier with shrinker

2018-07-20 Thread Wei Wang
The OOM notifier is getting deprecated to use for the reasons mentioned
here by Michal Hocko: https://lkml.org/lkml/2018/7/12/314

This patch replaces the virtio-balloon oom notifier with a shrinker
to release balloon pages on memory pressure.

In addition, the bug in the replaced virtballoon_oom_notify that only
VIRTIO_BALLOON_ARRAY_PFNS_MAX (i.e 256) balloon pages can be freed
though the user has specified more than that number is fixed in the
shrinker_scan function.

Signed-off-by: Wei Wang 
Cc: Michael S. Tsirkin 
Cc: Michal Hocko 
Cc: Andrew Morton 
Cc: Linus Torvalds 
---
 drivers/virtio/virtio_balloon.c | 113 +++-
 1 file changed, 65 insertions(+), 48 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9356a1a..c6fd406 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -27,7 +27,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -40,12 +39,12 @@
  */
 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> 
VIRTIO_BALLOON_PFN_SHIFT)
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
-#define OOM_VBALLOON_DEFAULT_PAGES 256
+#define DEFAULT_BALLOON_PAGES_TO_SHRINK 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
-static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
-module_param(oom_pages, int, S_IRUSR | S_IWUSR);
-MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
+static unsigned long balloon_pages_to_shrink = DEFAULT_BALLOON_PAGES_TO_SHRINK;
+module_param(balloon_pages_to_shrink, ulong, 0600);
+MODULE_PARM_DESC(balloon_pages_to_shrink, "pages to free on memory presure");
 
 #ifdef CONFIG_BALLOON_COMPACTION
 static struct vfsmount *balloon_mnt;
@@ -86,8 +85,8 @@ struct virtio_balloon {
/* Memory statistics */
struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 
-   /* To register callback in oom notifier call chain */
-   struct notifier_block nb;
+   /* To register a shrinker to shrink memory upon memory pressure */
+   struct shrinker shrinker;
 };
 
 static struct virtio_device_id id_table[] = {
@@ -365,38 +364,6 @@ static void update_balloon_size(struct virtio_balloon *vb)
  );
 }
 
-/*
- * virtballoon_oom_notify - release pages when system is under severe
- * memory pressure (called from out_of_memory())
- * @self : notifier block struct
- * @dummy: not used
- * @parm : returned - number of freed pages
- *
- * The balancing of memory by use of the virtio balloon should not cause
- * the termination of processes while there are pages in the balloon.
- * If virtio balloon manages to release some memory, it will make the
- * system return and retry the allocation that forced the OOM killer
- * to run.
- */
-static int virtballoon_oom_notify(struct notifier_block *self,
- unsigned long dummy, void *parm)
-{
-   struct virtio_balloon *vb;
-   unsigned long *freed;
-   unsigned num_freed_pages;
-
-   vb = container_of(self, struct virtio_balloon, nb);
-   if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
-   return NOTIFY_OK;
-
-   freed = parm;
-   num_freed_pages = leak_balloon(vb, oom_pages);
-   update_balloon_size(vb);
-   *freed += num_freed_pages;
-
-   return NOTIFY_OK;
-}
-
 static void update_balloon_stats_func(struct work_struct *work)
 {
struct virtio_balloon *vb;
@@ -548,6 +515,61 @@ static struct file_system_type balloon_fs = {
 
 #endif /* CONFIG_BALLOON_COMPACTION */
 
+static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+   unsigned long pages_to_free = balloon_pages_to_shrink,
+ pages_freed = 0;
+   struct virtio_balloon *vb = container_of(shrinker,
+   struct virtio_balloon, shrinker);
+
+   /*
+* One invocation of leak_balloon can deflate at most
+* VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
+* multiple times to deflate pages till reaching
+* balloon_pages_to_shrink pages.
+*/
+   while (vb->num_pages && pages_to_free) {
+   pages_to_free = balloon_pages_to_shrink - pages_freed;
+   pages_freed += leak_balloon(vb, pages_to_free);
+   }
+   update_balloon_size(vb);
+
+   return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
+}
+
+static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
+  struct shrink_control *sc)
+{
+   struct virtio_balloon *vb = container_of(shrinker,
+   struct virtio_balloon, shrinker);
+
+   /*
+* We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to handle the
+* case when shrinker needs to be invoked to relieve memory pressure.
+

[PATCH v36 0/5] Virtio-balloon: support free page reporting

2018-07-20 Thread Wei Wang
This patch series is separated from the previous "Virtio-balloon
Enhancement" series. The new feature, VIRTIO_BALLOON_F_FREE_PAGE_HINT,  
implemented by this series enables the virtio-balloon driver to report
hints of guest free pages to the host. It can be used to accelerate live
migration of VMs. Here is an introduction of this usage:

Live migration needs to transfer the VM's memory from the source machine
to the destination round by round. For the 1st round, all the VM's memory
is transferred. From the 2nd round, only the pieces of memory that were
written by the guest (after the 1st round) are transferred. One method
that is popularly used by the hypervisor to track which part of memory is
written is to write-protect all the guest memory.

This feature enables the optimization by skipping the transfer of guest
free pages during VM live migration. It is not concerned that the memory
pages are used after they are given to the hypervisor as a hint of the
free pages, because they will be tracked by the hypervisor and transferred
in the subsequent round if they are used and written.

* Tests
- Test Environment
Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz
Guest: 8G RAM, 4 vCPU
Migration setup: migrate_set_speed 100G, migrate_set_downtime 2 second

- Test Results
- Idle Guest Live Migration Time (results are averaged over 10 runs):
- Optimization v.s. Legacy = 409ms vs 1757ms --> ~77% reduction
(setting page poisoning zero and enabling ksm don't affect the
 comparison result)
- Guest with Linux Compilation Workload (make bzImage -j4):
- Live Migration Time (average)
  Optimization v.s. Legacy = 1407ms v.s. 2528ms --> ~44% reduction
- Linux Compilation Time
  Optimization v.s. Legacy = 5min4s v.s. 5min12s
  --> no obvious difference

ChangeLog:
v35->v36:
- remove the mm patch, as Linus has a suggestion to get free page
  addresses via allocation, instead of reading from the free page
  list.
- virtio-balloon:
- replace oom notifier with shrinker;
- the guest to host communication interface remains the same as
  v32.
- allocate free page blocks and send to host one by one, and free
  them after sending all the pages.

For ChangeLogs from v22 to v35, please reference
https://lwn.net/Articles/759413/

For ChangeLogs before v21, please reference
https://lwn.net/Articles/743660/

Wei Wang (5):
  virtio-balloon: remove BUG() in init_vqs
  virtio_balloon: replace oom notifier with shrinker
  virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
  mm/page_poison: expose page_poisoning_enabled to kernel modules
  virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON

 drivers/virtio/virtio_balloon.c | 456 ++--
 include/uapi/linux/virtio_balloon.h |   7 +
 mm/page_poison.c|   6 +
 3 files changed, 394 insertions(+), 75 deletions(-)

-- 
2.7.4

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization