Re: performance trouble

2012-02-23 Thread Gleb Natapov
On Wed, Feb 22, 2012 at 05:33:56PM +0100, David Cure wrote:
 Le Sun, Feb 19, 2012 at 11:13:15AM +0200, Gleb Natapov ecrivait :
  
   http://www.roullier.net/report-no-hpet.txt.gz
   
  How have you acquired this trace? It does not trace all kvm events only
  those that have set_irq in them.
 
   I run : trace-cmd record -b 2 -e kvm -P process_pid
 
Ah, I guess the reason is that it records events only of IO thread. You
need to trace all vcpu threads too. Not sure trace-cmd allows more then
one -P option though.

  Nothing particularly strange here. ~1745 IRQs are injected into the
  guest each second.  RTC clock is configured to 1kH and other devices
  contribute ~721 irqs (MSI mostly) per second more. Hardly unusual.
 
   ok, so no ideas to decrease the response time ?
 
Without knowing the reason of the slowdown no. Probably you have the
same problem as this other performance thread (frequent access to pm
timer), but we need better trace to confirm that.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 1/4] KVM: PPC: epapr: Factor out the epapr init

2012-02-23 Thread Liu Yu
from the kvm guest paravirt init code.

Signed-off-by: Liu Yu yu@freescale.com
---
v6:
1. rename epapr_para to epapr_paravirt
2. remove redundant warnings
3. remove unnecessary init

 arch/powerpc/include/asm/epapr_hcalls.h |2 +
 arch/powerpc/kernel/Makefile|1 +
 arch/powerpc/kernel/epapr_hcalls.S  |   25 ++
 arch/powerpc/kernel/epapr_paravirt.c|   54 +++
 arch/powerpc/kernel/kvm.c   |   28 ++--
 arch/powerpc/kernel/kvm_emul.S  |   10 --
 arch/powerpc/platforms/Kconfig  |9 +
 7 files changed, 94 insertions(+), 35 deletions(-)
 create mode 100644 arch/powerpc/kernel/epapr_hcalls.S
 create mode 100644 arch/powerpc/kernel/epapr_paravirt.c

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h 
b/arch/powerpc/include/asm/epapr_hcalls.h
index f3b0c2c..2173d4c 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -148,6 +148,8 @@
 #define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, r5
 #define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, r4
 
+extern bool epapr_paravirt_enabled;
+extern u32 epapr_hypercall_start[];
 
 /*
  * We use uintptr_t to define a register because it's guaranteed to be a
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ee728e4..ba8fa43 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -136,6 +136,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
 obj-y  += ppc_save_regs.o
 endif
 
+obj-$(CONFIG_EPAPR_PARAVIRT)   += epapr_paravirt.o epapr_hcalls.o
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvm_emul.o
 
 # Disable GCOV in odd or sensitive code
diff --git a/arch/powerpc/kernel/epapr_hcalls.S 
b/arch/powerpc/kernel/epapr_hcalls.S
new file mode 100644
index 000..697b390
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include linux/threads.h
+#include asm/reg.h
+#include asm/page.h
+#include asm/cputable.h
+#include asm/thread_info.h
+#include asm/ppc_asm.h
+#include asm/asm-offsets.h
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+.global epapr_hypercall_start
+epapr_hypercall_start:
+   li  r3, -1
+   nop
+   nop
+   nop
+   blr
diff --git a/arch/powerpc/kernel/epapr_paravirt.c 
b/arch/powerpc/kernel/epapr_paravirt.c
new file mode 100644
index 000..45eb439
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -0,0 +1,54 @@
+/*
+ * ePAPR para-virtualization support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ */
+
+#include linux/of.h
+#include asm/epapr_hcalls.h
+#include asm/cacheflush.h
+#include asm/code-patching.h
+
+bool epapr_paravirt_enabled;
+
+static int __init epapr_paravirt_init(void)
+{
+   struct device_node *hyper_node;
+   const u32 *insts;
+   int len, i;
+
+   hyper_node = of_find_node_by_path(/hypervisor);
+   if (!hyper_node)
+   return -ENODEV;
+
+   insts = of_get_property(hyper_node, hcall-instructions, len);
+   if (!insts)
+   return 0;
+
+   if (!(len % 4)  len = (4 * 4)) {
+   for (i = 0; i  (len / 4); i++)
+   patch_instruction(epapr_hypercall_start + i, insts[i]);
+
+   epapr_paravirt_enabled = true;
+   } else {
+   printk(KERN_WARNING
+  ePAPR paravirt: hcall-instructions format error\n);
+   }
+
+   return 0;
+}
+
+early_initcall(epapr_paravirt_init);
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 62bdf23..1c13307 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -31,6 +31,7 @@
 #include asm/cacheflush.h
 #include asm/disassemble.h
 #include asm/ppc-opcode.h
+#include asm/epapr_hcalls.h
 
 #define KVM_MAGIC_PAGE (-4096L)
 #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
@@ -726,7 +727,7 @@ unsigned long 

[PATCH v6 2/4] KVM: PPC: epapr: Add idle hcall support for host

2012-02-23 Thread Liu Yu
And add a new flag definition in kvm_ppc_pvinfo to indicate
whether host support EV_IDLE hcall.

Signed-off-by: Liu Yu yu@freescale.com
---
v6: no change

 arch/powerpc/include/asm/Kbuild |1 +
 arch/powerpc/include/asm/kvm_para.h |   14 --
 arch/powerpc/kvm/powerpc.c  |6 ++
 include/linux/kvm.h |2 ++
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7e313f1..13d6b7b 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -34,5 +34,6 @@ header-y += termios.h
 header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
+header-y += epapr_hcalls.h
 
 generic-y += rwsem.h
diff --git a/arch/powerpc/include/asm/kvm_para.h 
b/arch/powerpc/include/asm/kvm_para.h
index 7b754e7..81a34c9 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -75,9 +75,19 @@ struct kvm_vcpu_arch_shared {
 };
 
 #define KVM_SC_MAGIC_R00x4b564d21 /* KVM! */
-#define HC_VENDOR_KVM  (42  16)
+
+#include asm/epapr_hcalls.h
+
+/* ePAPR Hypercall Vendor ID */
+#define HC_VENDOR_EPAPR(EV_EPAPR_VENDOR_ID  16)
+#define HC_VENDOR_KVM  (EV_KVM_VENDOR_ID  16)
+
+/* ePAPR Hypercall Token */
+#define HC_EV_IDLE EV_IDLE
+
+/* ePAPR Hypercall Return Codes */
 #define HC_EV_SUCCESS  0
-#define HC_EV_UNIMPLEMENTED12
+#define HC_EV_UNIMPLEMENTEDEV_UNIMPLEMENTED
 
 #define KVM_FEATURE_MAGIC_PAGE 1
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0e21d15..7098840 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -81,6 +81,10 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 
/* Second return value is in r4 */
break;
+   case HC_VENDOR_EPAPR | HC_EV_IDLE:
+   r = HC_EV_SUCCESS;
+   kvm_vcpu_block(vcpu);
+   break;
default:
r = HC_EV_UNIMPLEMENTED;
break;
@@ -746,6 +750,8 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo 
*pvinfo)
pvinfo-hcall[2] = inst_sc;
pvinfo-hcall[3] = inst_nop;
 
+   pvinfo-flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
+
return 0;
 }
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index acbe429..6b2c70e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -449,6 +449,8 @@ struct kvm_ppc_pvinfo {
__u8  pad[108];
 };
 
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (10)
+
 #define KVMIO 0xAE
 
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 3/4] KVM: PPC: epapr: install ev_idle hcall for e500 guest

2012-02-23 Thread Liu Yu
If the guest hypervisor node contains has-idle property.

Signed-off-by: Liu Yu yu@freescale.com
---
v6:
reuse the EV_IDLE definition

 arch/powerpc/include/asm/epapr_hcalls.h |   11 ++-
 arch/powerpc/kernel/epapr_hcalls.S  |   27 +++
 arch/powerpc/kernel/epapr_paravirt.c|   11 ++-
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h 
b/arch/powerpc/include/asm/epapr_hcalls.h
index 2173d4c..78460ac 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -50,10 +50,6 @@
 #ifndef _EPAPR_HCALLS_H
 #define _EPAPR_HCALLS_H
 
-#include linux/types.h
-#include linux/errno.h
-#include asm/byteorder.h
-
 #define EV_BYTE_CHANNEL_SEND   1
 #define EV_BYTE_CHANNEL_RECEIVE2
 #define EV_BYTE_CHANNEL_POLL   3
@@ -108,6 +104,11 @@
 #define EV_UNIMPLEMENTED   12  /* Unimplemented hypercall */
 #define EV_BUFFER_OVERFLOW 13  /* Caller-supplied buffer too small */
 
+#ifndef __ASSEMBLY__
+#include linux/types.h
+#include linux/errno.h
+#include asm/byteorder.h
+
 /*
  * Hypercall register clobber list
  *
@@ -500,5 +501,5 @@ static inline unsigned int ev_idle(void)
 
return r3;
 }
-
+#endif /* !__ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/kernel/epapr_hcalls.S 
b/arch/powerpc/kernel/epapr_hcalls.S
index 697b390..bf643ed 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -8,6 +8,7 @@
  */
 
 #include linux/threads.h
+#include asm/epapr_hcalls.h
 #include asm/reg.h
 #include asm/page.h
 #include asm/cputable.h
@@ -15,6 +16,32 @@
 #include asm/ppc_asm.h
 #include asm/asm-offsets.h
 
+_GLOBAL(epapr_ev_idle)
+epapr_ev_idle:
+   rlwinm  r3,r1,0,0,31-THREAD_SHIFT   /* current thread_info */
+   lwz r4,TI_LOCAL_FLAGS(r3)   /* set napping bit */
+   ori r4,r4,_TLF_NAPPING  /* so when we take an exception */
+   stw r4,TI_LOCAL_FLAGS(r3)   /* it will return to our caller */
+
+   wrteei  1
+
+idle_loop:
+   LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
+
+.global epapr_ev_idle_start
+epapr_ev_idle_start:
+   li  r3, -1
+   nop
+   nop
+   nop
+
+   /*
+* Guard against spurious wakeups from a hypervisor --
+* only interrupt will cause us to return to LR due to
+* _TLF_NAPPING.
+*/
+   b   idle_loop
+
 /* Hypercall entry point. Will be patched with device tree instructions. */
 .global epapr_hypercall_start
 epapr_hypercall_start:
diff --git a/arch/powerpc/kernel/epapr_paravirt.c 
b/arch/powerpc/kernel/epapr_paravirt.c
index 45eb439..c3e6e25 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -21,6 +21,10 @@
 #include asm/epapr_hcalls.h
 #include asm/cacheflush.h
 #include asm/code-patching.h
+#include asm/machdep.h
+
+extern void epapr_ev_idle(void);
+extern u32 epapr_ev_idle_start[];
 
 bool epapr_paravirt_enabled;
 
@@ -39,8 +43,13 @@ static int __init epapr_paravirt_init(void)
return 0;
 
if (!(len % 4)  len = (4 * 4)) {
-   for (i = 0; i  (len / 4); i++)
+   for (i = 0; i  (len / 4); i++) {
patch_instruction(epapr_hypercall_start + i, insts[i]);
+   patch_instruction(epapr_ev_idle_start + i, insts[i]);
+   }
+
+   if (of_get_property(hyper_node, has-idle, NULL))
+   ppc_md.power_save = epapr_ev_idle;
 
epapr_paravirt_enabled = true;
} else {
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 4/4] KVM: PPC: epapr: Update other hypercall invoking

2012-02-23 Thread Liu Yu
Discard the old way that invoke hypercall,
instead, use epapr paravirt.

Signed-off-by: Liu Yu yu@freescale.com
---
v6:
select epapr_paravirt when enable fsl_hv driver

 arch/powerpc/include/asm/epapr_hcalls.h |   22 +-
 arch/powerpc/include/asm/fsl_hcalls.h   |   36 +++---
 drivers/virt/Kconfig|1 +
 3 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h 
b/arch/powerpc/include/asm/epapr_hcalls.h
index 78460ac..b95758d 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -189,7 +189,7 @@ static inline unsigned int ev_int_set_config(unsigned int 
interrupt,
r5  = priority;
r6  = destination;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), +r (r4), +r (r5), +r (r6)
: : EV_HCALL_CLOBBERS4
);
@@ -218,7 +218,7 @@ static inline unsigned int ev_int_get_config(unsigned int 
interrupt,
r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
r3 = interrupt;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), =r (r4), =r (r5), =r (r6)
: : EV_HCALL_CLOBBERS4
);
@@ -248,7 +248,7 @@ static inline unsigned int ev_int_set_mask(unsigned int 
interrupt,
r3 = interrupt;
r4 = mask;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), +r (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -273,7 +273,7 @@ static inline unsigned int ev_int_get_mask(unsigned int 
interrupt,
r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
r3 = interrupt;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), =r (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -301,7 +301,7 @@ static inline unsigned int ev_int_eoi(unsigned int 
interrupt)
r11 = EV_HCALL_TOKEN(EV_INT_EOI);
r3 = interrupt;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -340,7 +340,7 @@ static inline unsigned int ev_byte_channel_send(unsigned 
int handle,
r7 = be32_to_cpu(p[2]);
r8 = be32_to_cpu(p[3]);
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3),
  +r (r4), +r (r5), +r (r6), +r (r7), +r (r8)
: : EV_HCALL_CLOBBERS6
@@ -379,7 +379,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned 
int handle,
r3 = handle;
r4 = *count;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), +r (r4),
  =r (r5), =r (r6), =r (r7), =r (r8)
: : EV_HCALL_CLOBBERS6
@@ -417,7 +417,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned 
int handle,
r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
r3 = handle;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), =r (r4), =r (r5)
: : EV_HCALL_CLOBBERS3
);
@@ -450,7 +450,7 @@ static inline unsigned int ev_int_iack(unsigned int handle,
r11 = EV_HCALL_TOKEN(EV_INT_IACK);
r3 = handle;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), =r (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -474,7 +474,7 @@ static inline unsigned int ev_doorbell_send(unsigned int 
handle)
r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
r3 = handle;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -494,7 +494,7 @@ static inline unsigned int ev_idle(void)
 
r11 = EV_HCALL_TOKEN(EV_IDLE);
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), =r (r3)
: : EV_HCALL_CLOBBERS1
);
diff --git a/arch/powerpc/include/asm/fsl_hcalls.h 
b/arch/powerpc/include/asm/fsl_hcalls.h
index 922d9b5..3abb583 100644
--- a/arch/powerpc/include/asm/fsl_hcalls.h
+++ b/arch/powerpc/include/asm/fsl_hcalls.h
@@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
r11 = FH_HCALL_TOKEN(FH_SEND_NMI);
r3 = vcpu_mask;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int 
handle,
  

[PATCH 0/4] KVM: srcu-less dirty logging

2012-02-23 Thread Takuya Yoshikawa
This patch series is the result of the integration of my dirty logging
optimization work, including preparation for the new GET_DIRTY_LOG API,
and the attempt to get rid of controversial synchronize_srcu_expedited().

1 - KVM: MMU: Split the main body of rmap_write_protect() off from others
2 - KVM: Avoid checking huge page mappings in get_dirty_log()
3 - KVM: Switch to srcu-less get_dirty_log()
4 - KVM: Remove unused dirty_bitmap_head and nr_dirty_pages

Although there are still some remaining tasks, the test result obtained
looks very promising.


Remaining tasks:

- Implement set_bit_le() for mark_page_dirty()

  Some drivers are using their own implementation of it and a bit of
  work is needed to make it generic.  I want to do this separately
  later because it cannot be done within kvm tree.

- Stop allocating extra dirty bitmap buffer area

  According to Peter, mmu_notifier has become preemptible.  If we can
  change mmu_lock from spin_lock to mutex_lock, as Avi said before, this
  would be staightforward because we can use __put_user() right after
  xchg() with the mmu_lock held.


Test results:

1. dirty-log-perf unit test (on Sandy Bridge core-i3 32-bit host)

With some changes added since the previous post, the performance was
much improved: now even when every page in the slot is dirty, the number
is reasonably close to the original one.  For others, needless to say,
we have achieved very nice improvement.

- kvm.git next
average(ns)stdev ns/pagepages

 147018.677604.9147018.61
 158080.282211.9 79040.12
 127555.680619.8 31888.94
 108865.678499.3 13608.28
 114707.843508.6  7169.2   16
  76679.037659.8  2396.2   32
  59159.820417.1   924.3   64
  60418.219405.7   472.0  128
  76267.021450.5   297.9  256
 113182.022684.9   221.0  512
 930344.2   153766.5   908.5   1K
 939098.2   163800.3   458.5   2K
 996813.477921.0   243.3   4K
1113232.6   107782.6   135.8   8K
1241206.482282.575.7  16K
1529526.4   116388.246.6  32K
2147538.4   227375.932.7  64K
3309619.479356.825.2 128K
6016951.8   549873.422.9 256K

- kvm.git next + srcu-less series
average(ns)stdev ns/pagepagesimprovement(%)

  14086.0 3532.3 14086.01 944
  13303.6 3317.7  6651.821088
  13455.6 3315.2  3363.94 848
  14125.8 3435.4  1765.78 671
  15322.4 3690.1   957.6   16 649
  17026.6 4037.2   532.0   32 350
  21258.6 4852.3   332.1   64 178
  33845.614115.8   264.4  128  79
  37893.0  681.8   148.0  256 101
  61707.4 1057.6   120.5  512  83
  88861.4 2131.086.7   1K 947
 151315.6 6490.573.8   2K 521
 290579.6 8523.070.9   4K 243
 518231.020412.663.2   8K 115
2271171.412064.9   138.6  16K -45
3375866.214743.3   103.0  32K -55
4408395.610720.067.2  64K -51
5915336.226538.145.1 128K -44
8497356.416441.032.4 256K -29

Note that when the number of dirty pages was large, we spent less than
100ns for getting one dirty page information: see ns/page column.

As Avi noted before, this is much faster than the userspace send one
page to the destination node.

Furthermore, with the already proposed new GET_DIRTY_LOG API, we will
be able to restrict the area from which we get the log and will not need
to care about ms order of latency observed for very large number of dirty
pages.

2. real workloads (on Xeon W3520 64-bit host)

I traced kvm_vm_ioctl_get_dirty_log() during heavy VGA updates and
during live migration.

2.1. VGA: guest was doing x11perf -rect1 -rect10 -rect100 -rect500

As can be guessed from the result of dirty-log-perf, we observed very
nice improvement.

- kvm.git next
For heavy updates: 100us to 300us.
Worst: 300us

- kvm.git next + srcu-less series
For heavy updates: 3us to 10us.
Worst: 50us.

2.2. live migration: guest was doing dd if=/path/to/a/file of=/dev/null

The improvement was significant again.

- kvm.git next
For heavy updates: 1ms to 3ms

- kvm.git next + srcu-less series
For heavy updates: 50us to 300us

Probably we gained a lot from the locality of WWS.


Takuya
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] KVM: MMU: Split the main body of rmap_write_protect() off from others

2012-02-23 Thread Takuya Yoshikawa
We will use this in the following patch to implement another function
which needs to write protect pages using the rmap information.

Note that there is a small change in debug printing for large pages:
we do not differentiate them from others to avoid duplicating code.

Signed-off-by: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp
---
 arch/x86/kvm/mmu.c |   53 ++-
 1 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ff053ca..67857bd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1010,42 +1010,43 @@ static void drop_spte(struct kvm *kvm, u64 *sptep)
rmap_remove(kvm, sptep);
 }
 
-int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
-  struct kvm_memory_slot *slot)
+static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int 
level)
 {
-   unsigned long *rmapp;
-   u64 *spte;
-   int i, write_protected = 0;
+   u64 *spte = NULL;
+   int write_protected = 0;
 
-   rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot);
-   spte = rmap_next(rmapp, NULL);
-   while (spte) {
+   while ((spte = rmap_next(rmapp, spte))) {
BUG_ON(!(*spte  PT_PRESENT_MASK));
rmap_printk(rmap_write_protect: spte %p %llx\n, spte, *spte);
-   if (is_writable_pte(*spte)) {
+
+   if (!is_writable_pte(*spte))
+   continue;
+
+   if (level == PT_PAGE_TABLE_LEVEL) {
mmu_spte_update(spte, *spte  ~PT_WRITABLE_MASK);
-   write_protected = 1;
+   } else {
+   BUG_ON(!is_large_pte(*spte));
+   drop_spte(kvm, spte);
+   --kvm-stat.lpages;
+   spte = NULL;
}
-   spte = rmap_next(rmapp, spte);
+
+   write_protected = 1;
}
 
-   /* check for huge page mappings */
-   for (i = PT_DIRECTORY_LEVEL;
+   return write_protected;
+}
+
+int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
+  struct kvm_memory_slot *slot)
+{
+   unsigned long *rmapp;
+   int i, write_protected = 0;
+
+   for (i = PT_PAGE_TABLE_LEVEL;
 i  PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
rmapp = __gfn_to_rmap(gfn, i, slot);
-   spte = rmap_next(rmapp, NULL);
-   while (spte) {
-   BUG_ON(!(*spte  PT_PRESENT_MASK));
-   BUG_ON(!is_large_pte(*spte));
-   pgprintk(rmap_write_protect(large): spte %p %llx 
%lld\n, spte, *spte, gfn);
-   if (is_writable_pte(*spte)) {
-   drop_spte(kvm, spte);
-   --kvm-stat.lpages;
-   spte = NULL;
-   write_protected = 1;
-   }
-   spte = rmap_next(rmapp, spte);
-   }
+   write_protected |= __rmap_write_protect(kvm, rmapp, i);
}
 
return write_protected;
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] KVM: Avoid checking huge page mappings in get_dirty_log()

2012-02-23 Thread Takuya Yoshikawa
Dropped such mappings when we enabled dirty logging and we will never
create new ones until we stop the logging.

For this we introduce a new function which can be used to write protect
a range of PT level pages: although we do not need to care about a range
of pages at this point, the following patch will need this feature to
optimize the write protection of many pages.

Signed-off-by: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp
---
 arch/x86/include/asm/kvm_host.h |5 +++--
 arch/x86/kvm/mmu.c  |   38 --
 arch/x86/kvm/x86.c  |8 +++-
 3 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74c9edf..bd0f78e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -712,8 +712,9 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
-int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
-  struct kvm_memory_slot *slot);
+void kvm_mmu_write_protect_pt_range(struct kvm *kvm,
+   struct kvm_memory_slot *slot,
+   gfn_t start_offset, gfn_t end_offset);
 void kvm_mmu_zap_all(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 67857bd..c453ddd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1037,27 +1037,45 @@ static int __rmap_write_protect(struct kvm *kvm, 
unsigned long *rmapp, int level
return write_protected;
 }
 
-int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
-  struct kvm_memory_slot *slot)
+/**
+ * kvm_mmu_write_protect_pt_range - write protect a range of PT level pages
+ * @kvm: kvm instance
+ * @slot: slot to protect
+ * @start_offset: offset of the first page to protect
+ * @end_offset: offset of the last page to protect
+ *
+ * Used when we do not need to care about huge page mappings: e.g. during dirty
+ * logging we do not have any such mappings.
+ */
+void kvm_mmu_write_protect_pt_range(struct kvm *kvm,
+   struct kvm_memory_slot *slot,
+   gfn_t start_offset, gfn_t end_offset)
 {
+   gfn_t i;
unsigned long *rmapp;
-   int i, write_protected = 0;
 
-   for (i = PT_PAGE_TABLE_LEVEL;
-i  PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
-   rmapp = __gfn_to_rmap(gfn, i, slot);
-   write_protected |= __rmap_write_protect(kvm, rmapp, i);
+   for (i = start_offset; i = end_offset; i++) {
+   rmapp = slot-rmap[i];
+   __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL);
}
-
-   return write_protected;
 }
 
 static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 {
struct kvm_memory_slot *slot;
+   unsigned long *rmapp;
+   int i;
+   int write_protected = 0;
 
slot = gfn_to_memslot(kvm, gfn);
-   return kvm_mmu_rmap_write_protect(kvm, gfn, slot);
+
+   for (i = PT_PAGE_TABLE_LEVEL;
+i  PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
+   rmapp = __gfn_to_rmap(gfn, i, slot);
+   write_protected |= __rmap_write_protect(kvm, rmapp, i);
+   }
+
+   return write_protected;
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9d99e5..3b3d1eb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3069,13 +3069,11 @@ static void write_protect_slot(struct kvm *kvm,
 
/* Not many dirty pages compared to # of shadow pages. */
if (nr_dirty_pages  kvm-arch.n_used_mmu_pages) {
-   unsigned long gfn_offset;
+   gfn_t offset;
 
-   for_each_set_bit(gfn_offset, dirty_bitmap, memslot-npages) {
-   unsigned long gfn = memslot-base_gfn + gfn_offset;
+   for_each_set_bit(offset, dirty_bitmap, memslot-npages)
+   kvm_mmu_write_protect_pt_range(kvm, memslot, offset, 
offset);
 
-   kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
-   }
kvm_flush_remote_tlbs(kvm);
} else
kvm_mmu_slot_remove_write_access(kvm, memslot-id);
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] KVM: Switch to srcu-less get_dirty_log()

2012-02-23 Thread Takuya Yoshikawa
We have seen some problems of the current implementation of
get_dirty_log() which uses synchronize_srcu_expedited() for updating
dirty bitmaps; e.g. it is noticeable that this sometimes gives us ms
order of latency when we use VGA displays.

Furthermore the recent discussion on the following thread
srcu: Implement call_srcu()
http://lkml.org/lkml/2012/1/31/211
also motivated us to implement get_dirty_log() without SRCU.

This patch achieves this goal without sacrificing the performance of
both VGA and live migration: in practice the new code is much faster
than the old one unless we have too many dirty pages.

Implementation:

The key part of the implementation is the use of xchg() operation for
clearing dirty bits atomically.  Since this allows us to update only
BITS_PER_LONG pages at once, we need to iterate over the dirty bitmap
until every dirty bit is cleared again for the next call.

Although some people may worry about the problem of using the atomic
memory instruction many times to the concurrently accessible bitmap,
it is usually accessed with mmu_lock held and we rarely see concurrent
accesses: so what we need to care about is the pure xchg() overheads.

Another point to note is that we do not use for_each_set_bit() to check
which ones in each BITS_PER_LONG pages are actually dirty.  Instead we
simply use __ffs() and __fls() and pass the range in between the two
positions found by them to kvm_mmu_write_protect_pt_range().

Even though the passed range may include clean pages, it is much faster
than repeatedly call find_next_bit() due to the locality of dirty pages.

Performance:

The dirty-log-perf unit test showed nice improvement, some times faster
than before, when the number of dirty pages was below 8K.  For other
cases we saw a bit of regression but still enough fast compared to the
processing of these dirty pages in the userspace.

For real workloads, both VGA and live migration, we have observed pure
improvement: when the guest was reading a file, we originally saw a few
ms of latency, but with the new method the latency was 50us to 300us.

Signed-off-by: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp
---
 arch/x86/kvm/x86.c |  117 +++-
 1 files changed, 43 insertions(+), 74 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3b3d1eb..be4c52b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3041,55 +3041,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 }
 
 /**
- * write_protect_slot - write protect a slot for dirty logging
- * @kvm: the kvm instance
- * @memslot: the slot we protect
- * @dirty_bitmap: the bitmap indicating which pages are dirty
- * @nr_dirty_pages: the number of dirty pages
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
  *
- * We have two ways to find all sptes to protect:
- * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and
- *checks ones that have a spte mapping a page in the slot.
- * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently.  So, to avoid losing data, we keep the following order for
+ * each bit:
  *
- * Generally speaking, if there are not so many dirty pages compared to the
- * number of shadow pages, we should use the latter.
+ *   1. Take a snapshot of the bit and clear it if needed.
+ *   2. Write protect the corresponding page.
+ *   3. Flush TLB's if needed.
+ *   4. Copy the snapshot to the userspace.
  *
- * Note that letting others write into a page marked dirty in the old bitmap
- * by using the remaining tlb entry is not a problem.  That page will become
- * write protected again when we flush the tlb and then be reported dirty to
- * the user space by copying the old bitmap.
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry.  This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
  */
-static void write_protect_slot(struct kvm *kvm,
-  struct kvm_memory_slot *memslot,
-  unsigned long *dirty_bitmap,
-  unsigned long nr_dirty_pages)
-{
-   spin_lock(kvm-mmu_lock);
-
-   /* Not many dirty pages compared to # of shadow pages. */
-   if (nr_dirty_pages  kvm-arch.n_used_mmu_pages) {
-   gfn_t offset;
-
-   for_each_set_bit(offset, dirty_bitmap, memslot-npages)
-   kvm_mmu_write_protect_pt_range(kvm, memslot, offset, 
offset);
-
-   kvm_flush_remote_tlbs(kvm);
-   } else
-   kvm_mmu_slot_remove_write_access(kvm, memslot-id);
-
-   spin_unlock(kvm-mmu_lock);
-}
-
-/*
- * 

[PATCH 4/4] KVM: Remove unused dirty_bitmap_head and nr_dirty_pages

2012-02-23 Thread Takuya Yoshikawa
Now that we do neither double buffering nor heuristic selection of the
write protection method these are not needed anymore.

Note: some drivers have their own implementation of set_bit_le() and
making it generic needs a bit of work; so we use test_and_set_bit_le()
and will later replace it with generic set_bit_le().

Signed-off-by: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp
---
 include/linux/kvm_host.h |2 --
 virt/kvm/kvm_main.c  |   14 +-
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 355e445..73c7d76 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -177,8 +177,6 @@ struct kvm_memory_slot {
unsigned long flags;
unsigned long *rmap;
unsigned long *dirty_bitmap;
-   unsigned long *dirty_bitmap_head;
-   unsigned long nr_dirty_pages;
struct kvm_arch_memory_slot arch;
unsigned long userspace_addr;
int user_alloc;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e4431ad..27a1083 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -522,12 +522,11 @@ static void kvm_destroy_dirty_bitmap(struct 
kvm_memory_slot *memslot)
return;
 
if (2 * kvm_dirty_bitmap_bytes(memslot)  PAGE_SIZE)
-   vfree(memslot-dirty_bitmap_head);
+   vfree(memslot-dirty_bitmap);
else
-   kfree(memslot-dirty_bitmap_head);
+   kfree(memslot-dirty_bitmap);
 
memslot-dirty_bitmap = NULL;
-   memslot-dirty_bitmap_head = NULL;
 }
 
 /*
@@ -611,8 +610,7 @@ static int kvm_vm_release(struct inode *inode, struct file 
*filp)
 
 /*
  * Allocation size is twice as large as the actual dirty bitmap size.
- * This makes it possible to do double buffering: see x86's
- * kvm_vm_ioctl_get_dirty_log().
+ * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed.
  */
 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
@@ -627,8 +625,6 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot 
*memslot)
if (!memslot-dirty_bitmap)
return -ENOMEM;
 
-   memslot-dirty_bitmap_head = memslot-dirty_bitmap;
-   memslot-nr_dirty_pages = 0;
 #endif /* !CONFIG_S390 */
return 0;
 }
@@ -1476,8 +1472,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
if (memslot  memslot-dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot-base_gfn;
 
-   if (!test_and_set_bit_le(rel_gfn, memslot-dirty_bitmap))
-   memslot-nr_dirty_pages++;
+   /* TODO: introduce set_bit_le() and use it */
+   test_and_set_bit_le(rel_gfn, memslot-dirty_bitmap);
}
 }
 
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/4] KVM: srcu-less dirty logging

2012-02-23 Thread Peter Zijlstra
On Thu, 2012-02-23 at 20:33 +0900, Takuya Yoshikawa wrote:
 - Stop allocating extra dirty bitmap buffer area
 
   According to Peter, mmu_notifier has become preemptible.  If we can
   change mmu_lock from spin_lock to mutex_lock, as Avi said before, this
   would be staightforward because we can use __put_user() right after
   xchg() with the mmu_lock held 

So the 'only' thing to consider is running the end result with lockdep
enabled since the mmu locks are rather deep in the nesting tree its very
easy to accidentally cause inversions.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


linux guests and ksm performance

2012-02-23 Thread Peter Lieven
Hi,

i have recently been playing with an old idea (originally in grsecurity
for security reasons) to change
the policy from zero on allocate to zero after free in the linux page
allocator. My concern is that linux
leaves a lot of waste in the physical memory unlike Windows which per
default zeros pages after
they are freed.

I have run some tests and I can confirm some old results that a hardware
Linux machine
is approximately 2-3% slower with zero after free on big compilation jobs.
This might be due
to either the fact that pages are only zeroed on allocate if GFP_ZERO is
set or due to caching
benefits.

However, in a virtual machine I have not observed the above slow down to
that extend
while the benefit of zero after free in a virtualisation environment is
obvious:

1) zero pages can easily be merged by ksm or other technique.
2) zero (dup) pages are a lot faster to transfer in case of migration.

Therefore I would like to hear your thoughts if it would be a good idea to
change
the strategy in the Linux kernel from zero on allocate to zero after free
automatically
if the 'hypervisor' cpu feature is set? Or even have another technique to
tell a linux
guest that ksm is running on the host.

If this is not feasible can someone think of a kernel module / userspace
program that
zeroes out unused pages periodically.

Peter


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] FLR capability hidden in VF config space

2012-02-23 Thread Alex Williamson
On Thu, 2012-02-23 at 09:25 +0530, rukhsana ansari wrote:
 Hello,
 
 Was wondering whether someone could shed some light on the issue below.
 Without FLR exposed in the VF, VF reset via FLR cannot be initiated from
 the guest.
 Appreciate any pointers.

The device state needs to be restored after an FLR.  A guest is not able
to do this by this by itself as much of the config space is virtualized.
That means qemu needs to be involved in the FLR.  It's possible we could
trap FLR and call reset_assign_device().  Patches welcome.  Why do want
to reset the device?

 On Wed, Feb 15, 2012 at 2:54 PM, rukhsana ansari ruk.ans...@gmail.comwrote:
 
  Hi,
 
  The following code snippet (line 1457,
  function:assigned_device_pci_cap_init()  file: hw/device-assignment.c)
  from the latest qemu-kvm git (qemu-kvm-devel: 1.0.50)  implies that FLR
  capability is unset for VF that is assigned to a guest:
 
  /* device capabilities: hide FLR */
  devcap = pci_get_long(pci_dev-config + pos + PCI_EXP_DEVCAP);
  devcap = ~PCI_EXP_DEVCAP_FLR;
  pci_set_long(pci_dev-config + pos + PCI_EXP_DEVCAP, devcap);
 
 
  However the SR-IOV spec mandates VF FLR.

The SR-IOV spec mandates that the VF supports FLR.  That doesn't mean it
has to be exposed though to a guest.  Thanks,

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: linux guests and ksm performance

2012-02-23 Thread Stefan Hajnoczi
On Thu, Feb 23, 2012 at 3:40 PM, Peter Lieven p...@dlh.net wrote:
 However, in a virtual machine I have not observed the above slow down to
 that extend
 while the benefit of zero after free in a virtualisation environment is
 obvious:

 1) zero pages can easily be merged by ksm or other technique.
 2) zero (dup) pages are a lot faster to transfer in case of migration.

The other approach is a memory page discard mechanism - which
obviously requires more code changes than zeroing freed pages.

The advantage is that we don't take the brute-force and CPU intensive
approach of zeroing pages.  It would be like a fine-grained ballooning
feature.

I hope someone will follow up saying this has already been done or
prototyped :).

Stefan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

2012-02-23 Thread Stefano Stabellini
We need a register to pass the hypercall number because we might not
know it at compile time and HVC only takes an immediate argument.

Among the available registers r12 seems to be the best choice because it
is defined as intra-procedure call scratch register.

Use the ISS to pass an hypervisor specific tag.

Signed-off-by: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: kvm@vger.kernel.org
---
 arch/arm/include/asm/xen/hypercall.h |   87 +++---
 1 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/arch/arm/include/asm/xen/hypercall.h 
b/arch/arm/include/asm/xen/hypercall.h
index 404e63f0..04eba1c 100644
--- a/arch/arm/include/asm/xen/hypercall.h
+++ b/arch/arm/include/asm/xen/hypercall.h
@@ -33,13 +33,17 @@
 #ifndef _ASM_ARM_XEN_HYPERCALL_H
 #define _ASM_ARM_XEN_HYPERCALL_H
 
-#define __HVC_IMM(name)(  #name   0xf) +\
-   (( #name   4)  0xfff00)
+#include xen/interface/xen.h
+#include asm/errno.h
 
-#define HYPERCALL(name) .word 0xe1400070 +  __HVC_IMM(name)
-#define __HYPERCALL(name) HYPERCALL(__HYPERVISOR_##name)
+#define XEN_HYPERCALL_TAG  0XEA1
+
+#define __HVC_IMM(tag) (  tag   0xf) +  \
+   (( tag   4)  0xfff00)
+#define __HYPERCALL .word 0xe1400070 +  __HVC_IMM(XEN_HYPERCALL_TAG)
 
 #define __HYPERCALL_RETREG r0
+#define __HYPERCALL_NUMBER r12
 #define __HYPERCALL_ARG1REGr0
 #define __HYPERCALL_ARG2REGr1
 #define __HYPERCALL_ARG3REGr2
@@ -48,30 +52,32 @@
 
 #define __HYPERCALL_DECLS  \
register unsigned long __res  asm(__HYPERCALL_RETREG);  \
+   register unsigned long __num  asm(__HYPERCALL_NUMBER) = __num; \
register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \
register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
 
-#define __HYPERCALL_0PARAM =r (__res)
+#define __HYPERCALL_0PARAM =r (__res), +r (__num)
 #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, +r (__arg1)
 #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, +r (__arg2)
 #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, +r (__arg3)
 #define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, +r (__arg4)
 #define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, +r (__arg5)
 
-#define __HYPERCALL_0ARG()
-#define __HYPERCALL_1ARG(a1)   \
-   __HYPERCALL_0ARG()  __arg1 = (unsigned long)(a1);
-#define __HYPERCALL_2ARG(a1,a2)
\
-   __HYPERCALL_1ARG(a1)__arg2 = (unsigned long)(a2);
-#define __HYPERCALL_3ARG(a1,a2,a3) \
-   __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3);
-#define __HYPERCALL_4ARG(a1,a2,a3,a4)  \
-   __HYPERCALL_3ARG(a1,a2,a3)  __arg4 = (unsigned long)(a4);
-#define __HYPERCALL_5ARG(a1,a2,a3,a4,a5)   \
-   __HYPERCALL_4ARG(a1,a2,a3,a4)   __arg5 = (unsigned long)(a5);
+#define __HYPERCALL_0ARG(hypercall)
\
+   __num = (unsigned long)hypercall;
+#define __HYPERCALL_1ARG(hypercall,a1) 
\
+   __HYPERCALL_0ARG(hypercall) __arg1 = (unsigned long)(a1);
+#define __HYPERCALL_2ARG(hypercall,a1,a2)  
\
+   __HYPERCALL_1ARG(hypercall,a1)  __arg2 = (unsigned long)(a2);
+#define __HYPERCALL_3ARG(hypercall,a1,a2,a3)   
\
+   __HYPERCALL_2ARG(hypercall,a1,a2)   __arg3 = (unsigned 
long)(a3);
+#define __HYPERCALL_4ARG(hypercall,a1,a2,a3,a4)
\
+   __HYPERCALL_3ARG(hypercall,a1,a2,a3)__arg4 = (unsigned long)(a4);
+#define __HYPERCALL_5ARG(hypercall,a1,a2,a3,a4,a5) 
\
+   __HYPERCALL_4ARG(hypercall,a1,a2,a3,a4) __arg5 = (unsigned long)(a5);
 
 #define __HYPERCALL_CLOBBER5   memory
 #define __HYPERCALL_CLOBBER4   __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG
@@ -80,102 +86,105 @@
 #define __HYPERCALL_CLOBBER1   __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG
 #define __HYPERCALL_CLOBBER0   __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG
 
-#define _hypercall0(type, name)
\
+#define _hypercall0(type, hypercall)   
\
 ({ \
__HYPERCALL_DECLS;  \
-   __HYPERCALL_0ARG(); \
-   asm volatile (__HYPERCALL(name) 

Re: linux guests and ksm performance

2012-02-23 Thread Javier Guerra Giraldez
On Thu, Feb 23, 2012 at 11:42 AM, Stefan Hajnoczi stefa...@gmail.com wrote:
 The other approach is a memory page discard mechanism - which
 obviously requires more code changes than zeroing freed pages.

 The advantage is that we don't take the brute-force and CPU intensive
 approach of zeroing pages.  It would be like a fine-grained ballooning
 feature.

(disclaimer: i don't know the code, i'm just guessing)

does KVM emulate the MMU? if so, is there any 'unmap page' primitive?

-- 
Javier
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Quirk for IVB graphics FLR errata

2012-02-23 Thread Jesse Barnes
On Mon, 20 Feb 2012 02:27:25 +
Hao, Xudong xudong@intel.com wrote:

 For IvyBridge Mobile platform, a system hang may occur if a FLR(Function 
 Level Reset) is asserted to internal graphics.
 
 This quirk patch is workaround for the IVB FLR errata issue.
 

Can you name the magic constants and offsets and document what the FLR
quirk is actually trying to do?  IIRC it had something to do with
waiting for the PCH acknowledge for the display portion of the reset...

Thanks,
-- 
Jesse Barnes, Intel Open Source Technology Center


signature.asc
Description: PGP signature


Re: linux guests and ksm performance

2012-02-23 Thread peter.lie...@gmail.com




Stefan Hajnoczi stefa...@gmail.com schrieb:

On Thu, Feb 23, 2012 at 3:40 PM, Peter Lieven p...@dlh.net wrote:
 However, in a virtual machine I have not observed the above slow down
to
 that extend
 while the benefit of zero after free in a virtualisation environment
is
 obvious:

 1) zero pages can easily be merged by ksm or other technique.
 2) zero (dup) pages are a lot faster to transfer in case of
migration.

The other approach is a memory page discard mechanism - which
obviously requires more code changes than zeroing freed pages.

The advantage is that we don't take the brute-force and CPU intensive
approach of zeroing pages.  It would be like a fine-grained ballooning
feature.


I dont think that it is cpu intense. All user pages are zeroed anyway, but at 
allocation time it shouldnt be a big difference in terms of cpu power.

I hope someone will follow up saying this has already been done or
prototyped :).

Stefan

-- 
Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v2 0/4] RTC: New logic to emulate RTC

2012-02-23 Thread Zhang, Yang Z
 -Original Message-
 From: Paolo Bonzini [mailto:pbonz...@redhat.com]
 Sent: Wednesday, February 22, 2012 7:19 PM
 0) My alarm tests failed quite badly. :(  I attach a patch for kvm-unit-tests
 (repository at git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git).
 The tests can be compiled simply with make and run with qemu-kvm -kernel
 /path/to/x86/rtc.flat -serial stdio -display none.  Upstream QEMU fails some 
 of
 the tests.  My branch rtc-cleanup at git://github.com/bonzini/qemu.git passes
 them.  The tests should take 30-40 seconds to run.
 
Hi paolo
The DM and 24/12 test case assumes the changing of DM bit will reflect to RTC 
internal clock. But the datasheet said nothing will affect if you change it. 
Also, the current logic in qemu has the same assumption. Does this a bug or 
just by design?


best regards
yang
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 03/12] ARM: KVM: Hypervisor identity mapping

2012-02-23 Thread Rusty Russell
On Thu, 23 Feb 2012 02:32:33 -0500, Christoffer Dall 
c.d...@virtualopensystems.com wrote:
 diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
 index 9c75ec4..c0adab0 100644
 --- a/arch/arm/kvm/guest.c
 +++ b/arch/arm/kvm/guest.c
 @@ -24,7 +24,6 @@
  #include asm/kvm_asm.h
  #include asm/kvm_emulate.h
  
 -
  struct kvm_stats_debugfs_item debugfs_entries[] = {
   { NULL }
  };

Cheers,
Rusty.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 04/12] ARM: KVM: Hypervisor inititalization

2012-02-23 Thread Rusty Russell
On Thu, 23 Feb 2012 02:32:39 -0500, Christoffer Dall 
c.d...@virtualopensystems.com wrote:
 + /*
 +  * Allocate stack pages for Hypervisor-mode
 +  */
 + for_each_possible_cpu(cpu)
 + per_cpu(kvm_arm_hyp_stack_page, cpu) = NULL;

This is weird; we can't call this init function multiple times without
reloading the module.

 + for_each_possible_cpu(cpu) {
 + void *stack_page;
 +
 + stack_page = (void *)__get_free_page(GFP_KERNEL);

Actually, if you change kvm_arm_hyp_stack_page to an unsigned long, and
your mapping functions to take unsigned long too, you can avoid many
casts.

Cheers,
Rusty.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 02/12] ARM: KVM: Initial skeleton to compile KVM support

2012-02-23 Thread Rusty Russell
On Thu, 23 Feb 2012 02:32:26 -0500, Christoffer Dall 
c.d...@virtualopensystems.com wrote:
 From: Christoffer Dall c.d...@virtualopensystems.com
 
 Targets KVM support for Cortex A-15 processors.
 
 Contains no real functionality but all the framework components,
 make files, header files and some tracing functionality.
 
 “Nothing to see here. Move along, move along...
 
 Most functionality is in arch/arm/kvm/* or arch/arm/include/asm/kvm_*.h.
 
 Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
 ---
  arch/arm/Kconfig   |2 
  arch/arm/Makefile  |1 
  arch/arm/include/asm/kvm.h |   72 ++
  arch/arm/include/asm/kvm_asm.h |   28 
  arch/arm/include/asm/kvm_emulate.h |   92 +
  arch/arm/include/asm/kvm_host.h|  116 
  arch/arm/include/asm/kvm_para.h|9 +
  arch/arm/include/asm/unified.h |   12 ++
  arch/arm/kvm/Kconfig   |   45 ++
  arch/arm/kvm/Makefile  |   17 ++
  arch/arm/kvm/arm.c |  256 
 
  arch/arm/kvm/emulate.c |  121 +
  arch/arm/kvm/exports.c |   16 ++
  arch/arm/kvm/guest.c   |  148 +
  arch/arm/kvm/init.S|   17 ++
  arch/arm/kvm/interrupts.S  |   17 ++
  arch/arm/kvm/mmu.c |   15 ++
  arch/arm/kvm/trace.h   |   52 +++
  arch/arm/mm/Kconfig|8 +
  19 files changed, 1044 insertions(+), 0 deletions(-)
  create mode 100644 arch/arm/include/asm/kvm.h
  create mode 100644 arch/arm/include/asm/kvm_asm.h
  create mode 100644 arch/arm/include/asm/kvm_emulate.h
  create mode 100644 arch/arm/include/asm/kvm_host.h
  create mode 100644 arch/arm/include/asm/kvm_para.h
  create mode 100644 arch/arm/kvm/Kconfig
  create mode 100644 arch/arm/kvm/Makefile
  create mode 100644 arch/arm/kvm/arm.c
  create mode 100644 arch/arm/kvm/emulate.c
  create mode 100644 arch/arm/kvm/exports.c
  create mode 100644 arch/arm/kvm/guest.c
  create mode 100644 arch/arm/kvm/init.S
  create mode 100644 arch/arm/kvm/interrupts.S
  create mode 100644 arch/arm/kvm/mmu.c
  create mode 100644 arch/arm/kvm/trace.h
 
 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
 index e12bc34..81aa08f 100644
 --- a/arch/arm/Kconfig
 +++ b/arch/arm/Kconfig
 @@ -2263,3 +2263,5 @@ source security/Kconfig
  source crypto/Kconfig
  
  source lib/Kconfig
 +
 +source arch/arm/kvm/Kconfig
 diff --git a/arch/arm/Makefile b/arch/arm/Makefile
 index 40319d9..eca44e0 100644
 --- a/arch/arm/Makefile
 +++ b/arch/arm/Makefile
 @@ -253,6 +253,7 @@ core-$(CONFIG_VFP)+= arch/arm/vfp/
  
  # If we have a machine-specific directory, then include it in the build.
  core-y   += arch/arm/kernel/ arch/arm/mm/ 
 arch/arm/common/
 +core-y   += arch/arm/kvm/
  core-y   += $(machdirs) $(platdirs)
  
  drivers-$(CONFIG_OPROFILE)  += arch/arm/oprofile/
 diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
 new file mode 100644
 index 000..544cb2a
 --- /dev/null
 +++ b/arch/arm/include/asm/kvm.h
 @@ -0,0 +1,72 @@
 +/*
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License, version 2, as
 + * published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program; if not, write to the Free Software
 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 + *
 + */
 +
 +#ifndef __ARM_KVM_H__
 +#define __ARM_KVM_H__
 +
 +#include asm/types.h
 +
 +#define __KVM_HAVE_GUEST_DEBUG
 +
 +/*
 + * Modes used for short-hand mode determinition in the world-switch code and
 + * in emulation code.
 + *
 + * Note: These indices do NOT correspond to the value of the CPSR mode bits!
 + */
 +enum vcpu_modes {

Nitpick: s/vcpu_modes/vcpu_mode/ ?

...
 +static inline unsigned char vcpu_mode(struct kvm_vcpu *vcpu)

static inline enum vcpu_mode vcpu_mode(struct kvm_vcpu *vcpu)...

 +{
 + u8 modes_table[16] = {
 + MODE_USR,   /* 0x0 */
 + MODE_FIQ,   /* 0x1 */
 + MODE_IRQ,   /* 0x2 */
 + MODE_SVC,   /* 0x3 */
 + 0xf, 0xf, 0xf,
 + MODE_ABT,   /* 0x7 */
 + 0xf, 0xf, 0xf,
 + MODE_UND,   /* 0xb */
 + 0xf, 0xf, 0xf,
 + MODE_SYS};  /* 0xf */
 +
 + BUG_ON(modes_table[vcpu-arch.regs.cpsr  0xf] == 0xf);
 + return modes_table[vcpu-arch.regs.cpsr  0xf];
 +}

Like much 

Re: [KVM paravirt issue?] Re: vsyscall=emulate regression

2012-02-23 Thread H. Peter Anvin
On 02/16/2012 09:39 AM, Avi Kivity wrote:

 Yes, this is on purpose

Why?

-hpa


-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 02/12] ARM: KVM: Initial skeleton to compile KVM support

2012-02-23 Thread Christoffer Dall
On Thu, Feb 23, 2012 at 10:32 PM, Rusty Russell ru...@rustcorp.com.au wrote:
 On Thu, 23 Feb 2012 02:32:26 -0500, Christoffer Dall 
 c.d...@virtualopensystems.com wrote:
 From: Christoffer Dall c.d...@virtualopensystems.com

 Targets KVM support for Cortex A-15 processors.

 Contains no real functionality but all the framework components,
 make files, header files and some tracing functionality.

 “Nothing to see here. Move along, move along...

 Most functionality is in arch/arm/kvm/* or arch/arm/include/asm/kvm_*.h.

 Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
 ---
  arch/arm/Kconfig                   |    2
  arch/arm/Makefile                  |    1
  arch/arm/include/asm/kvm.h         |   72 ++
  arch/arm/include/asm/kvm_asm.h     |   28 
  arch/arm/include/asm/kvm_emulate.h |   92 +
  arch/arm/include/asm/kvm_host.h    |  116 
  arch/arm/include/asm/kvm_para.h    |    9 +
  arch/arm/include/asm/unified.h     |   12 ++
  arch/arm/kvm/Kconfig               |   45 ++
  arch/arm/kvm/Makefile              |   17 ++
  arch/arm/kvm/arm.c                 |  256 
 
  arch/arm/kvm/emulate.c             |  121 +
  arch/arm/kvm/exports.c             |   16 ++
  arch/arm/kvm/guest.c               |  148 +
  arch/arm/kvm/init.S                |   17 ++
  arch/arm/kvm/interrupts.S          |   17 ++
  arch/arm/kvm/mmu.c                 |   15 ++
  arch/arm/kvm/trace.h               |   52 +++
  arch/arm/mm/Kconfig                |    8 +
  19 files changed, 1044 insertions(+), 0 deletions(-)
  create mode 100644 arch/arm/include/asm/kvm.h
  create mode 100644 arch/arm/include/asm/kvm_asm.h
  create mode 100644 arch/arm/include/asm/kvm_emulate.h
  create mode 100644 arch/arm/include/asm/kvm_host.h
  create mode 100644 arch/arm/include/asm/kvm_para.h
  create mode 100644 arch/arm/kvm/Kconfig
  create mode 100644 arch/arm/kvm/Makefile
  create mode 100644 arch/arm/kvm/arm.c
  create mode 100644 arch/arm/kvm/emulate.c
  create mode 100644 arch/arm/kvm/exports.c
  create mode 100644 arch/arm/kvm/guest.c
  create mode 100644 arch/arm/kvm/init.S
  create mode 100644 arch/arm/kvm/interrupts.S
  create mode 100644 arch/arm/kvm/mmu.c
  create mode 100644 arch/arm/kvm/trace.h

 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
 index e12bc34..81aa08f 100644
 --- a/arch/arm/Kconfig
 +++ b/arch/arm/Kconfig
 @@ -2263,3 +2263,5 @@ source security/Kconfig
  source crypto/Kconfig

  source lib/Kconfig
 +
 +source arch/arm/kvm/Kconfig
 diff --git a/arch/arm/Makefile b/arch/arm/Makefile
 index 40319d9..eca44e0 100644
 --- a/arch/arm/Makefile
 +++ b/arch/arm/Makefile
 @@ -253,6 +253,7 @@ core-$(CONFIG_VFP)                += arch/arm/vfp/

  # If we have a machine-specific directory, then include it in the build.
  core-y                               += arch/arm/kernel/ arch/arm/mm/ 
 arch/arm/common/
 +core-y                               += arch/arm/kvm/
  core-y                               += $(machdirs) $(platdirs)

  drivers-$(CONFIG_OPROFILE)      += arch/arm/oprofile/
 diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
 new file mode 100644
 index 000..544cb2a
 --- /dev/null
 +++ b/arch/arm/include/asm/kvm.h
 @@ -0,0 +1,72 @@
 +/*
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License, version 2, as
 + * published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program; if not, write to the Free Software
 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 + *
 + */
 +
 +#ifndef __ARM_KVM_H__
 +#define __ARM_KVM_H__
 +
 +#include asm/types.h
 +
 +#define __KVM_HAVE_GUEST_DEBUG
 +
 +/*
 + * Modes used for short-hand mode determinition in the world-switch code and
 + * in emulation code.
 + *
 + * Note: These indices do NOT correspond to the value of the CPSR mode bits!
 + */
 +enum vcpu_modes {

 Nitpick: s/vcpu_modes/vcpu_mode/ ?

 ...
 +static inline unsigned char vcpu_mode(struct kvm_vcpu *vcpu)

 static inline enum vcpu_mode vcpu_mode(struct kvm_vcpu *vcpu)...

 +{
 +     u8 modes_table[16] = {
 +             MODE_USR,       /* 0x0 */
 +             MODE_FIQ,       /* 0x1 */
 +             MODE_IRQ,       /* 0x2 */
 +             MODE_SVC,       /* 0x3 */
 +             0xf, 0xf, 0xf,
 +             MODE_ABT,       /* 0x7 */
 +             0xf, 0xf, 0xf,
 +             MODE_UND,       /* 0xb */
 +             0xf, 0xf, 0xf,
 +             MODE_SYS};      /* 0xf */
 +
 +     BUG_ON(modes_table[vcpu-arch.regs.cpsr  0xf] == 0xf);
 

Re: linux guests and ksm performance

2012-02-23 Thread Stefan Hajnoczi
On Thu, Feb 23, 2012 at 7:08 PM, peter.lie...@gmail.com p...@dlh.net wrote:




 Stefan Hajnoczi stefa...@gmail.com schrieb:

On Thu, Feb 23, 2012 at 3:40 PM, Peter Lieven p...@dlh.net wrote:
 However, in a virtual machine I have not observed the above slow down
to
 that extend
 while the benefit of zero after free in a virtualisation environment
is
 obvious:

 1) zero pages can easily be merged by ksm or other technique.
 2) zero (dup) pages are a lot faster to transfer in case of
migration.

The other approach is a memory page discard mechanism - which
obviously requires more code changes than zeroing freed pages.

The advantage is that we don't take the brute-force and CPU intensive
approach of zeroing pages.  It would be like a fine-grained ballooning
feature.


 I dont think that it is cpu intense. All user pages are zeroed anyway, but at 
 allocation time it shouldnt be a big difference in terms of cpu power.

It's easy to find a scenario where eagerly zeroing pages is wasteful.
Imagine a process that uses all of physical memory.  Once it
terminates the system is going to run processes that only use a small
set of pages.  It's pointless zeroing all those pages if we're not
going to use them anymore.

Stefan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: linux guests and ksm performance

2012-02-23 Thread Stefan Hajnoczi
On Fri, Feb 24, 2012 at 6:41 AM, Stefan Hajnoczi stefa...@gmail.com wrote:
 On Thu, Feb 23, 2012 at 7:08 PM, peter.lie...@gmail.com p...@dlh.net wrote:




 Stefan Hajnoczi stefa...@gmail.com schrieb:

On Thu, Feb 23, 2012 at 3:40 PM, Peter Lieven p...@dlh.net wrote:
 However, in a virtual machine I have not observed the above slow down
to
 that extend
 while the benefit of zero after free in a virtualisation environment
is
 obvious:

 1) zero pages can easily be merged by ksm or other technique.
 2) zero (dup) pages are a lot faster to transfer in case of
migration.

The other approach is a memory page discard mechanism - which
obviously requires more code changes than zeroing freed pages.

The advantage is that we don't take the brute-force and CPU intensive
approach of zeroing pages.  It would be like a fine-grained ballooning
feature.


 I dont think that it is cpu intense. All user pages are zeroed anyway, but 
 at allocation time it shouldnt be a big difference in terms of cpu power.

 It's easy to find a scenario where eagerly zeroing pages is wasteful.
 Imagine a process that uses all of physical memory.  Once it
 terminates the system is going to run processes that only use a small
 set of pages.  It's pointless zeroing all those pages if we're not
 going to use them anymore.

Perhaps the middle path is to zero pages but do it after a grace
timeout.  I wonder if this helps eliminate the 2-3% slowdown you
noticed when compiling.

This requires no special host-guest interfaces for discarding pages.

Stefan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/4] RTC: New logic to emulate RTC

2012-02-23 Thread Paolo Bonzini
On 02/24/2012 01:55 AM, Zhang, Yang Z wrote:
 Hi paolo The DM and 24/12 test case assumes the changing of DM bit
 will reflect to RTC internal clock. But the datasheet said nothing
 will affect if you change it. Also, the current logic in qemu has the
 same assumption. Does this a bug or just by design?

Don't care about that part, it was more to test the BCD logic.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: linux guests and ksm performance

2012-02-23 Thread Gleb Natapov
On Thu, Feb 23, 2012 at 04:42:54PM +, Stefan Hajnoczi wrote:
 On Thu, Feb 23, 2012 at 3:40 PM, Peter Lieven p...@dlh.net wrote:
  However, in a virtual machine I have not observed the above slow down to
  that extend
  while the benefit of zero after free in a virtualisation environment is
  obvious:
 
  1) zero pages can easily be merged by ksm or other technique.
  2) zero (dup) pages are a lot faster to transfer in case of migration.
 
 The other approach is a memory page discard mechanism - which
 obviously requires more code changes than zeroing freed pages.
 
 The advantage is that we don't take the brute-force and CPU intensive
 approach of zeroing pages.  It would be like a fine-grained ballooning
 feature.
 
 I hope someone will follow up saying this has already been done or
 prototyped :).
 
That was attempted. It is called page hinting, but AFAIK due to
complex locking issue attempt was abandoned.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: linux guests and ksm performance

2012-02-23 Thread Stefan Hajnoczi
On Fri, Feb 24, 2012 at 6:53 AM, Stefan Hajnoczi stefa...@gmail.com wrote:
 On Fri, Feb 24, 2012 at 6:41 AM, Stefan Hajnoczi stefa...@gmail.com wrote:
 On Thu, Feb 23, 2012 at 7:08 PM, peter.lie...@gmail.com p...@dlh.net wrote:
 Stefan Hajnoczi stefa...@gmail.com schrieb:

On Thu, Feb 23, 2012 at 3:40 PM, Peter Lieven p...@dlh.net wrote:
 However, in a virtual machine I have not observed the above slow down
to
 that extend
 while the benefit of zero after free in a virtualisation environment
is
 obvious:

 1) zero pages can easily be merged by ksm or other technique.
 2) zero (dup) pages are a lot faster to transfer in case of
migration.

The other approach is a memory page discard mechanism - which
obviously requires more code changes than zeroing freed pages.

The advantage is that we don't take the brute-force and CPU intensive
approach of zeroing pages.  It would be like a fine-grained ballooning
feature.


 I dont think that it is cpu intense. All user pages are zeroed anyway, but 
 at allocation time it shouldnt be a big difference in terms of cpu power.

 It's easy to find a scenario where eagerly zeroing pages is wasteful.
 Imagine a process that uses all of physical memory.  Once it
 terminates the system is going to run processes that only use a small
 set of pages.  It's pointless zeroing all those pages if we're not
 going to use them anymore.

 Perhaps the middle path is to zero pages but do it after a grace
 timeout.  I wonder if this helps eliminate the 2-3% slowdown you
 noticed when compiling.

Gah, it's too early in the morning.  I don't think this timer actually
makes sense.

Stefan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: linux guests and ksm performance

2012-02-23 Thread Peter Lieven

Am 24.02.2012 um 08:23 schrieb Stefan Hajnoczi:

 On Fri, Feb 24, 2012 at 6:53 AM, Stefan Hajnoczi stefa...@gmail.com wrote:
 On Fri, Feb 24, 2012 at 6:41 AM, Stefan Hajnoczi stefa...@gmail.com wrote:
 On Thu, Feb 23, 2012 at 7:08 PM, peter.lie...@gmail.com p...@dlh.net 
 wrote:
 Stefan Hajnoczi stefa...@gmail.com schrieb:
 
 On Thu, Feb 23, 2012 at 3:40 PM, Peter Lieven p...@dlh.net wrote:
 However, in a virtual machine I have not observed the above slow down
 to
 that extend
 while the benefit of zero after free in a virtualisation environment
 is
 obvious:
 
 1) zero pages can easily be merged by ksm or other technique.
 2) zero (dup) pages are a lot faster to transfer in case of
 migration.
 
 The other approach is a memory page discard mechanism - which
 obviously requires more code changes than zeroing freed pages.
 
 The advantage is that we don't take the brute-force and CPU intensive
 approach of zeroing pages.  It would be like a fine-grained ballooning
 feature.
 
 
 I dont think that it is cpu intense. All user pages are zeroed anyway, but 
 at allocation time it shouldnt be a big difference in terms of cpu power.
 
 It's easy to find a scenario where eagerly zeroing pages is wasteful.
 Imagine a process that uses all of physical memory.  Once it
 terminates the system is going to run processes that only use a small
 set of pages.  It's pointless zeroing all those pages if we're not
 going to use them anymore.
 
 Perhaps the middle path is to zero pages but do it after a grace
 timeout.  I wonder if this helps eliminate the 2-3% slowdown you
 noticed when compiling.
 
 Gah, it's too early in the morning.  I don't think this timer actually
 makes sense.

ok, that would be the idea of an ansynchronous page zeroing in the guest. i also
think this is to complicated.

maybe the other idea is too simple:
is it possible to give the guest a hint that ksm is enabled on the host (lets 
say in
a way like its done with kvmclock). if ksm is enabled on the host the 
administrator
has already made the decision that performance is not so important and he/she
is eager to save physical memory. what if and only if this flag is set switch 
from
zero on allocate to zero after free. i think the whole thing is less than 10-20
lines of code. and its code that has been proven to be working well in 
grsecurity
for ages.

this might introduce a little (2-3%) overhead, but only if there is a lot of 
non GFP_FREE
memory is allocated, but its definitely faster than swapping. 
of course, it has to be garanteed that this code does not slow down normal 
systems
due to additionales branches (would it be enough to mark the if statements as 
unlikely?)

peter


peter





 
 Stefan

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 1/4] KVM: PPC: epapr: Factor out the epapr init

2012-02-23 Thread Liu Yu
from the kvm guest paravirt init code.

Signed-off-by: Liu Yu yu@freescale.com
---
v6:
1. rename epapr_para to epapr_paravirt
2. remove redundant warnings
3. remove unnecessary init

 arch/powerpc/include/asm/epapr_hcalls.h |2 +
 arch/powerpc/kernel/Makefile|1 +
 arch/powerpc/kernel/epapr_hcalls.S  |   25 ++
 arch/powerpc/kernel/epapr_paravirt.c|   54 +++
 arch/powerpc/kernel/kvm.c   |   28 ++--
 arch/powerpc/kernel/kvm_emul.S  |   10 --
 arch/powerpc/platforms/Kconfig  |9 +
 7 files changed, 94 insertions(+), 35 deletions(-)
 create mode 100644 arch/powerpc/kernel/epapr_hcalls.S
 create mode 100644 arch/powerpc/kernel/epapr_paravirt.c

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h 
b/arch/powerpc/include/asm/epapr_hcalls.h
index f3b0c2c..2173d4c 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -148,6 +148,8 @@
 #define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, r5
 #define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, r4
 
+extern bool epapr_paravirt_enabled;
+extern u32 epapr_hypercall_start[];
 
 /*
  * We use uintptr_t to define a register because it's guaranteed to be a
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ee728e4..ba8fa43 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -136,6 +136,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
 obj-y  += ppc_save_regs.o
 endif
 
+obj-$(CONFIG_EPAPR_PARAVIRT)   += epapr_paravirt.o epapr_hcalls.o
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvm_emul.o
 
 # Disable GCOV in odd or sensitive code
diff --git a/arch/powerpc/kernel/epapr_hcalls.S 
b/arch/powerpc/kernel/epapr_hcalls.S
new file mode 100644
index 000..697b390
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include linux/threads.h
+#include asm/reg.h
+#include asm/page.h
+#include asm/cputable.h
+#include asm/thread_info.h
+#include asm/ppc_asm.h
+#include asm/asm-offsets.h
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+.global epapr_hypercall_start
+epapr_hypercall_start:
+   li  r3, -1
+   nop
+   nop
+   nop
+   blr
diff --git a/arch/powerpc/kernel/epapr_paravirt.c 
b/arch/powerpc/kernel/epapr_paravirt.c
new file mode 100644
index 000..45eb439
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -0,0 +1,54 @@
+/*
+ * ePAPR para-virtualization support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ */
+
+#include linux/of.h
+#include asm/epapr_hcalls.h
+#include asm/cacheflush.h
+#include asm/code-patching.h
+
+bool epapr_paravirt_enabled;
+
+static int __init epapr_paravirt_init(void)
+{
+   struct device_node *hyper_node;
+   const u32 *insts;
+   int len, i;
+
+   hyper_node = of_find_node_by_path(/hypervisor);
+   if (!hyper_node)
+   return -ENODEV;
+
+   insts = of_get_property(hyper_node, hcall-instructions, len);
+   if (!insts)
+   return 0;
+
+   if (!(len % 4)  len = (4 * 4)) {
+   for (i = 0; i  (len / 4); i++)
+   patch_instruction(epapr_hypercall_start + i, insts[i]);
+
+   epapr_paravirt_enabled = true;
+   } else {
+   printk(KERN_WARNING
+  ePAPR paravirt: hcall-instructions format error\n);
+   }
+
+   return 0;
+}
+
+early_initcall(epapr_paravirt_init);
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 62bdf23..1c13307 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -31,6 +31,7 @@
 #include asm/cacheflush.h
 #include asm/disassemble.h
 #include asm/ppc-opcode.h
+#include asm/epapr_hcalls.h
 
 #define KVM_MAGIC_PAGE (-4096L)
 #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
@@ -726,7 +727,7 @@ unsigned long 

[PATCH v6 2/4] KVM: PPC: epapr: Add idle hcall support for host

2012-02-23 Thread Liu Yu
And add a new flag definition in kvm_ppc_pvinfo to indicate
whether host support EV_IDLE hcall.

Signed-off-by: Liu Yu yu@freescale.com
---
v6: no change

 arch/powerpc/include/asm/Kbuild |1 +
 arch/powerpc/include/asm/kvm_para.h |   14 --
 arch/powerpc/kvm/powerpc.c  |6 ++
 include/linux/kvm.h |2 ++
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7e313f1..13d6b7b 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -34,5 +34,6 @@ header-y += termios.h
 header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
+header-y += epapr_hcalls.h
 
 generic-y += rwsem.h
diff --git a/arch/powerpc/include/asm/kvm_para.h 
b/arch/powerpc/include/asm/kvm_para.h
index 7b754e7..81a34c9 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -75,9 +75,19 @@ struct kvm_vcpu_arch_shared {
 };
 
 #define KVM_SC_MAGIC_R00x4b564d21 /* KVM! */
-#define HC_VENDOR_KVM  (42  16)
+
+#include asm/epapr_hcalls.h
+
+/* ePAPR Hypercall Vendor ID */
+#define HC_VENDOR_EPAPR(EV_EPAPR_VENDOR_ID  16)
+#define HC_VENDOR_KVM  (EV_KVM_VENDOR_ID  16)
+
+/* ePAPR Hypercall Token */
+#define HC_EV_IDLE EV_IDLE
+
+/* ePAPR Hypercall Return Codes */
 #define HC_EV_SUCCESS  0
-#define HC_EV_UNIMPLEMENTED12
+#define HC_EV_UNIMPLEMENTEDEV_UNIMPLEMENTED
 
 #define KVM_FEATURE_MAGIC_PAGE 1
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0e21d15..7098840 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -81,6 +81,10 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 
/* Second return value is in r4 */
break;
+   case HC_VENDOR_EPAPR | HC_EV_IDLE:
+   r = HC_EV_SUCCESS;
+   kvm_vcpu_block(vcpu);
+   break;
default:
r = HC_EV_UNIMPLEMENTED;
break;
@@ -746,6 +750,8 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo 
*pvinfo)
pvinfo-hcall[2] = inst_sc;
pvinfo-hcall[3] = inst_nop;
 
+   pvinfo-flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
+
return 0;
 }
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index acbe429..6b2c70e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -449,6 +449,8 @@ struct kvm_ppc_pvinfo {
__u8  pad[108];
 };
 
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (10)
+
 #define KVMIO 0xAE
 
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 4/4] KVM: PPC: epapr: Update other hypercall invoking

2012-02-23 Thread Liu Yu
Discard the old way that invoke hypercall,
instead, use epapr paravirt.

Signed-off-by: Liu Yu yu@freescale.com
---
v6:
select epapr_paravirt when enable fsl_hv driver

 arch/powerpc/include/asm/epapr_hcalls.h |   22 +-
 arch/powerpc/include/asm/fsl_hcalls.h   |   36 +++---
 drivers/virt/Kconfig|1 +
 3 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h 
b/arch/powerpc/include/asm/epapr_hcalls.h
index 78460ac..b95758d 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -189,7 +189,7 @@ static inline unsigned int ev_int_set_config(unsigned int 
interrupt,
r5  = priority;
r6  = destination;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), +r (r4), +r (r5), +r (r6)
: : EV_HCALL_CLOBBERS4
);
@@ -218,7 +218,7 @@ static inline unsigned int ev_int_get_config(unsigned int 
interrupt,
r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
r3 = interrupt;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), =r (r4), =r (r5), =r (r6)
: : EV_HCALL_CLOBBERS4
);
@@ -248,7 +248,7 @@ static inline unsigned int ev_int_set_mask(unsigned int 
interrupt,
r3 = interrupt;
r4 = mask;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), +r (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -273,7 +273,7 @@ static inline unsigned int ev_int_get_mask(unsigned int 
interrupt,
r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
r3 = interrupt;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), =r (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -301,7 +301,7 @@ static inline unsigned int ev_int_eoi(unsigned int 
interrupt)
r11 = EV_HCALL_TOKEN(EV_INT_EOI);
r3 = interrupt;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -340,7 +340,7 @@ static inline unsigned int ev_byte_channel_send(unsigned 
int handle,
r7 = be32_to_cpu(p[2]);
r8 = be32_to_cpu(p[3]);
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3),
  +r (r4), +r (r5), +r (r6), +r (r7), +r (r8)
: : EV_HCALL_CLOBBERS6
@@ -379,7 +379,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned 
int handle,
r3 = handle;
r4 = *count;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), +r (r4),
  =r (r5), =r (r6), =r (r7), =r (r8)
: : EV_HCALL_CLOBBERS6
@@ -417,7 +417,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned 
int handle,
r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
r3 = handle;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), =r (r4), =r (r5)
: : EV_HCALL_CLOBBERS3
);
@@ -450,7 +450,7 @@ static inline unsigned int ev_int_iack(unsigned int handle,
r11 = EV_HCALL_TOKEN(EV_INT_IACK);
r3 = handle;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3), =r (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -474,7 +474,7 @@ static inline unsigned int ev_doorbell_send(unsigned int 
handle)
r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
r3 = handle;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -494,7 +494,7 @@ static inline unsigned int ev_idle(void)
 
r11 = EV_HCALL_TOKEN(EV_IDLE);
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), =r (r3)
: : EV_HCALL_CLOBBERS1
);
diff --git a/arch/powerpc/include/asm/fsl_hcalls.h 
b/arch/powerpc/include/asm/fsl_hcalls.h
index 922d9b5..3abb583 100644
--- a/arch/powerpc/include/asm/fsl_hcalls.h
+++ b/arch/powerpc/include/asm/fsl_hcalls.h
@@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
r11 = FH_HCALL_TOKEN(FH_SEND_NMI);
r3 = vcpu_mask;
 
-   __asm__ __volatile__ (sc 1
+   asm volatile(blepapr_hypercall_start
: +r (r11), +r (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int 
handle,