[COMMIT] [WIN-GUEST-DRIVERS] Initial support for MSI interrupts and DPC\Interrupt behaviour

2009-10-22 Thread Yan Vugenfirer
repository: C:/dev/kvm-guest-drivers-windows
branch: master
commit a1d1275e735a02450bc70c262e343c356da1bb34
Author: Yan Vugenfirer yvuge...@redhat.com
Date:   Thu Oct 22 13:44:42 2009 +0200

[WIN-GUEST-DRIVERS] Initial support for MSI interrupts and DPC\Interrupt 
behaviour

diff --git a/NetKVM/Common/ParaNdis-Common.c b/NetKVM/Common/ParaNdis-Common.c
index 3f9faa1..896e6dc 100644
--- a/NetKVM/Common/ParaNdis-Common.c
+++ b/NetKVM/Common/ParaNdis-Common.c
@@ -406,7 +406,7 @@ static BOOLEAN GetAdapterResources(PNDIS_RESOURCE_LIST 
RList, tAdapterResources
pResources-Level = 
RList-PartialDescriptors[i].u.Interrupt.Level;
pResources-Affinity = 
RList-PartialDescriptors[i].u.Interrupt.Affinity;
pResources-InterruptFlags = 
RList-PartialDescriptors[i].Flags;
-   DPrintf(0, (Found Interrupt level %d, vector %d, 
affinity %X, flags %X,
+   DPrintf(0, (Found Interrupt vector %d, level %d, 
affinity %X, flags %X,
pResources-Vector, pResources-Level, 
(ULONG)pResources-Affinity, pResources-InterruptFlags));
}
}
@@ -529,6 +529,12 @@ NDIS_STATUS ParaNdis_InitializeContext(
pContext-AdapterResources.IOLength)
)
{
+   if (pContext-AdapterResources.InterruptFlags  
CM_RESOURCE_INTERRUPT_MESSAGE)
+   {
+   DPrintf(0, ([%s] Message interrupt assigned, 
__FUNCTION__));
+   pContext-bUsingMSIX = TRUE;
+   }
+
VirtIODeviceSetIOAddress(pContext-IODevice, 
pContext-AdapterResources.ulIOAddress);
JustForCheckClearInterrupt(pContext, init 0);
ParaNdis_ResetVirtIONetDevice(pContext);
@@ -557,7 +563,7 @@ NDIS_STATUS ParaNdis_InitializeContext(
{
VirtIODeviceGet(
pContext-IODevice,
-   0, // offsetof(struct virtio_net_config, mac)
+   pContext-bUsingMSIX ? 4 : 0, // 
offsetof(struct virtio_net_config, mac)
pContext-PermanentMacAddress,
ETH_LENGTH_OF_ADDRESS);
if 
(!ParaNdis_ValidateMacAddress(pContext-PermanentMacAddress, FALSE))
@@ -1133,6 +1139,7 @@ BOOLEAN ParaNdis_OnInterrupt(
if (b)
{
NdisGetCurrentSystemTime(pContext-LastInterruptTimeStamp);
+   ParaNdis_VirtIOEnableInterrupt(pContext, FALSE);
}
return b;
 }
@@ -1698,12 +1705,24 @@ void ParaNdis_ReportLinkStatus(PARANDIS_ADAPTER 
*pContext)
if (pContext-bLinkDetectSupported)
{
USHORT linkStatus = 0;
+   USHORT offset = (pContext-bUsingMSIX ? 4 : 0) + 
sizeof(pContext-CurrentMacAddress);
// link changed
-   VirtIODeviceGet(pContext-IODevice, 
sizeof(pContext-CurrentMacAddress), linkStatus, sizeof(linkStatus));
+   VirtIODeviceGet(pContext-IODevice, offset, linkStatus, 
sizeof(linkStatus));
bConnected = (linkStatus  VIRTIO_NET_S_LINK_UP) != 0;
}
ParaNdis_IndicateConnect(pContext, bConnected, FALSE);
 }
+
+BOOLEAN ParaNdis_MiniportSynchronizeInterruptEnable(IN PVOID  
SynchronizeContext)
+{
+   PARANDIS_ADAPTER *pContext = (PARANDIS_ADAPTER *)SynchronizeContext;
+
+   DEBUG_ENTRY(6);
+   ParaNdis_VirtIOEnableInterrupt(pContext, TRUE);
+
+   return TRUE;
+}
+
 /**
 DPC implementation, common for both NDIS
 Parameters:
@@ -1757,6 +1776,8 @@ void ParaNdis_DPCWorkBody(PARANDIS_ADAPTER *pContext)
}
ParaNdis_DebugHistory(pContext, hopDPC, NULL, 0, 
pContext-nofFreeHardwareBuffers, pContext-nofFreeTxDescriptors);
NdisReleaseSpinLock(pContext-DPCLock);
+
+   ParaNdis_SyncInterruptEnable(pContext);
 }
 
 /**
@@ -1967,7 +1988,7 @@ NDIS_STATUS ParaNdis_SetMulticastList(
 }
 
 /**
-TODO
+
 Parameters:
 Return value:
 ***/
@@ -1975,7 +1996,8 @@ VOID ParaNdis_VirtIOEnableInterrupt(
PARANDIS_ADAPTER *pContext,
BOOLEAN bEnable)
 {
-   DPrintf(0, ([%s] NOT IMPLEMENTED, __FUNCTION__));
+   
pContext-NetSendQueue-vq_ops-enable_interrupt(pContext-NetSendQueue, 
bEnable);
+   
pContext-NetReceiveQueue-vq_ops-enable_interrupt(pContext-NetReceiveQueue, 
bEnable);
 }
 
 /**
diff --git a/NetKVM/Common/ParaNdis-Debug.c b/NetKVM/Common/ParaNdis-Debug.c
index ef56651..22d3b73 100644
--- a/NetKVM/Common/ParaNdis-Debug.c
+++ b/NetKVM/Common/ParaNdis-Debug.c
@@ -11,6 +11,7 @@
 

Re: qemu-kvm: require 4K aligned resource size for memory

2009-10-22 Thread Chris Wright
* Michael S. Tsirkin (m...@redhat.com) wrote:
 KVM does not virtualize low address bits for memory accesses, so we must
 require that PCI BAR size is a multiple of 4K for passthrough to work
 (this also guarantees that address is 4K aligned).
 
 Users of recent linux kernels can force resource size up to 4K
 using:
 
   commit 32a9a682bef2f6fce7026bd94d1ce20028b0e52d
   Author: Yuji Shimada shimada-...@necst.nec.co.jp
   Date:   Mon Mar 16 17:13:39 2009 +0900
   PCI: allow assignment of memory resources with a specified alignment
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 
 ---
 
 diff --git a/hw/device-assignment.c b/hw/device-assignment.c
 index 237060f..c2ef31f 100644
 --- a/hw/device-assignment.c
 +++ b/hw/device-assignment.c
 @@ -381,6 +381,14 @@ static int assigned_dev_register_regions(PCIRegion 
 *io_regions,
  int t = cur_region-type  IORESOURCE_PREFETCH
  ? PCI_ADDRESS_SPACE_MEM_PREFETCH
  : PCI_ADDRESS_SPACE_MEM;
 +if (cur_region-size  0xFFF) {
 +fprintf(stderr, Unable to assign device: PCI region %d 
 +at address 0x%llx has size 0x%x, 
 + which is not a multiple of 4K\n,
 +i, (unsigned long long)cur_region-base_addr,
 +cur_region-size);
 +return -1;
 +}

should render this useless now shouldn't it?

   /* add offset */
pci_dev-v_addrs[i].u.r_virtbase +=
(cur_region-base_addr  0xFFF);

thanks,
-chris
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 0.11: SMP guests using one host CPU only?

2009-10-22 Thread Avi Kivity

On 10/20/2009 04:48 PM, Tomasz Chmielewski wrote:

Avi Kivity wrote:

On 10/20/2009 10:19 PM, Tomasz Chmielewski wrote:


I meant, how many qemu threads are there, and how much cpu does 
each take?



There is only one qemu thread for the 4-cpu guest.


Not possible.  Even a single-cpu guest has two threads.


ps auxH shuld show me all threads? I started it multiple times, and 
it shown 1 thread for the 4-CPU guest

(with no CPU intensive tasks running - could this be a reason?).


No.  kvm implies 2+ threads.





What does 'ls /proc/$(pgrep qemu)/task' show?


Running several CPU-intensive processes on this guest uses only one 
CPU on the host.


Both ps auxH and /proc confirm that this guest has 4-5 threads when 
I run several CPU-intensive apps.


Only one thread for this guest uses 100% CPU time; other threads use ~0%.

If I don't run any CPU-intensive tasks on this guests, it only runs 
one thread (unless I misinterpret something here).



Some 1-CPU guests have only one thread though?



Are you sure they're using kvm?  Try 'info kvm' in the monitor.  tcg 
will only use on thread (more will be spawned for I/O, but will 
eventually die).


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: VMX: remove GUEST_CR3 write from vmx_vcpu_run

2009-10-22 Thread Avi Kivity

On 10/20/2009 04:59 PM, Marcelo Tosatti wrote:


   

Nice.  Any reason why ept_load_pdptrs() couldn't go the same way?
 

Its already protected by VCPU_EXREG_PDPTR caching, so it does not buy
much.

The advantage would symmetry to cr3.

   


Yes, the PDPTRs fulfil exactly the same role as cr3, so the same rules 
should apply.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: VMX: remove GUEST_CR3 write from vmx_vcpu_run

2009-10-22 Thread Sheng Yang
On Tuesday 20 October 2009 20:37:20 Marcelo Tosatti wrote:
 GUEST_CR3 is updated via kvm_set_cr3 whenever CR3 value
 changes.

The description is not that accuracy... If CR3 value change in guest when EPT 
enabled, no VM Exit would happen, then no kvm_set_cr3...

-- 
regards
Yang, Sheng


 Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 364263a..325075f 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -3638,10 +3638,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
  {
   struct vcpu_vmx *vmx = to_vmx(vcpu);

 - if (enable_ept  is_paging(vcpu)) {
 - vmcs_writel(GUEST_CR3, vcpu-arch.cr3);
 + if (enable_ept  is_paging(vcpu))
   ept_load_pdptrs(vcpu);
 - }
 +
   /* Record the guest's net vcpu time for enforced NMI injections. */
   if (unlikely(!cpu_has_virtual_nmis()  vmx-soft_vnmi_blocked))
   vmx-entry_time = ktime_get();
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: VMX: remove GUEST_CR3 write from vmx_vcpu_run

2009-10-22 Thread Avi Kivity

On 10/22/2009 09:35 AM, Sheng Yang wrote:

On Tuesday 20 October 2009 20:37:20 Marcelo Tosatti wrote:
   

GUEST_CR3 is updated via kvm_set_cr3 whenever CR3 value
changes.
 

The description is not that accuracy... If CR3 value change in guest when EPT
enabled, no VM Exit would happen, then no kvm_set_cr3...
   


True, should be 'changed from userspace'.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] Nested VMX patch 5 implements vmlaunch and vmresume

2009-10-22 Thread Gleb Natapov
On Wed, Oct 21, 2009 at 04:43:44PM +0200, Orit Wasserman wrote:
   @@ -4641,10 +4955,13 @@ static void vmx_complete_interrupts(struct
  vcpu_vmx *vmx)
   int type;
   bool idtv_info_valid;
  
   -   exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
   -
   vmx-exit_reason = vmcs_read32(VM_EXIT_REASON);
  
   +   if (vmx-nested.nested_mode)
   +  return;
   +
  Why return here? What the function does that should not be done in
  nested mode?
 In nested mode L0 injects an interrupt to L2 only in one scenario,
 if there is an IDT_VALID event and L0 decides to run L2 again and not to
 switch back to L1.
 In all other cases the injection is handled by L1.
This is exactly the kind of scenario that is handled by
vmx_complete_interrupts(). (vmx|svm)_complete_interrups() store
pending event in arch agnostic way and re-injection is handled by
x86.c You bypass this logic by inserting return here and introducing
nested_handle_valid_idt() function below.

 
   +   exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
   +
   /* Handle machine checks before interrupts are enabled */
   if ((vmx-exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
   || (vmx-exit_reason == EXIT_REASON_EXCEPTION_NMI
   @@ -4747,6 +5064,60 @@ static void fixup_rmode_irq(struct vcpu_vmx
 *vmx)
  | vmx-rmode.irq.vector;
}
  
   +static int nested_handle_valid_idt(struct kvm_vcpu *vcpu)
   +{
  It seems by this function you are trying to bypass general event
  reinjection logic. Why?
 See above.
The logic implemented by this function is handled in x86.c in arch
agnostic way. Is there something wrong with this?

   +   vmx-launched = vmx-nested.l2_state-launched;
   +
  Can you explain why -launched logic is needed?
 It is possible L1 called vmlaunch but we didn't actually run L2 (for
 example there was an interrupt and
 enable_irq_window switched back to L1 before running L2). L1 thinks the
 vmlaunch was successful and call vmresume in the next time
 but KVM needs to call vmlaunch for L2.
handle_vmlauch() and handle_vmresume() are exactly the same. Why KVM needs
to run one and not the other?
 
   +static int nested_vmx_vmexit(struct kvm_vcpu *vcpu,
   +  bool is_interrupt)
   +{
   +   struct vcpu_vmx *vmx = to_vmx(vcpu);
   +   int initial_pfu_active = vcpu-fpu_active;
   +
   +   if (!vmx-nested.nested_mode) {
   +  printk(KERN_INFO WARNING: %s called but not in nested mode\n,
   + __func__);
   +  return 0;
   +   }
   +
   +   save_msrs(vmx-guest_msrs, vmx-save_nmsrs);
   +
   +   sync_cached_regs_to_vmcs(vcpu);
   +
   +   if (!nested_map_shadow_vmcs(vcpu)) {
   +  printk(KERN_INFO Error mapping shadow vmcs\n);
   +  set_rflags_to_vmx_fail_valid(vcpu);
  Error during vmexit should set abort flag, not change flags.
 I think this is more a vmlaunch/vmresume error (in the code that switch
 back to L1).
How is this vmlaunch/vmresume error? This function is called to exit
from L2 guest while on L2 vcms. It is called asynchronously in respect
to L2 guest and you modify L2 guest rflags register at unpredictable
place here.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Avi Kivity

On 10/20/2009 06:40 PM, Carsten Otte wrote:


This patch moves s390 processor status word into the base kvm_run
struct and keeps it up-to date on all userspace exits.

+#include linux/autoconf.h
#include linux/types.h
#include linux/compiler.h
#include linux/ioctl.h


Not needed.


@@ -116,6 +117,11 @@
__u64 cr8;
__u64 apic_base;

+#ifdef CONFIG_S390
+/* the processor status word for s390 */
+__u64 psw_mask; /* psw upper half */
+__u64 psw_addr; /* psw lower half */
+#endif


Doesn't this break backward compatibility by changing the structure?

Best to put it after the union (and as a copy, so userspace that expects 
the previous location still works).  If you're reading it from the 
kernel, also need a way to tell the kernel which copy to read from.


Also advertise with a KVM_CAP.

Additionally, CONFIG_ in public headers are frowned upon as 
non-portable.  A workaround is to #define __KVM_S390 in asm/kvm.h and 
depend on that.


--- kvm.orig/arch/s390/kvm/kvm-s390.c2009-10-20 15:01:02.0 
+0200

+++ kvm/arch/s390/kvm/kvm-s390.c2009-10-20 18:13:45.0 +0200
@@ -421,7 +421,8 @@
if (atomic_read(vcpu-arch.sie_block-cpuflags)  CPUSTAT_RUNNING)
rc = -EBUSY;
else
-vcpu-arch.sie_block-gpsw = psw;
+vcpu-run-psw_mask = psw.mask;
+vcpu-run-psw_addr = psw.addr;


It's traditional to add braces around multi-line else blocks.

I'd also appreciate an explanation of what this is all about.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Alexander Graf


On 22.10.2009, at 11:08, Avi Kivity wrote:


On 10/20/2009 06:40 PM, Carsten Otte wrote:


This patch moves s390 processor status word into the base kvm_run
struct and keeps it up-to date on all userspace exits.

+#include linux/autoconf.h
#include linux/types.h
#include linux/compiler.h
#include linux/ioctl.h


Not needed.


@@ -116,6 +117,11 @@
   __u64 cr8;
   __u64 apic_base;

+#ifdef CONFIG_S390
+/* the processor status word for s390 */
+__u64 psw_mask; /* psw upper half */
+__u64 psw_addr; /* psw lower half */
+#endif


Doesn't this break backward compatibility by changing the structure?

Best to put it after the union (and as a copy, so userspace that  
expects the previous location still works).  If you're reading it  
from the kernel, also need a way to tell the kernel which copy to  
read from.


Also advertise with a KVM_CAP.


I don't think we need to go through the hassle here. There is  
effectively no user of that code for now and the ABI is considered  
unstable.


Additionally, CONFIG_ in public headers are frowned upon as non- 
portable.  A workaround is to #define __KVM_S390 in asm/kvm.h and  
depend on that.


--- kvm.orig/arch/s390/kvm/kvm-s390.c2009-10-20  
15:01:02.0 +0200
+++ kvm/arch/s390/kvm/kvm-s390.c2009-10-20 18:13:45.0  
+0200

@@ -421,7 +421,8 @@
   if (atomic_read(vcpu-arch.sie_block-cpuflags)   
CPUSTAT_RUNNING)

   rc = -EBUSY;
   else
-vcpu-arch.sie_block-gpsw = psw;
+vcpu-run-psw_mask = psw.mask;
+vcpu-run-psw_addr = psw.addr;


It's traditional to add braces around multi-line else blocks.

I'd also appreciate an explanation of what this is all about.


Explanation in the code or explanation in an email reply?

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Carsten Otte

Avi Kivity wrote:

@@ -116,6 +117,11 @@
__u64 cr8;
__u64 apic_base;

+#ifdef CONFIG_S390
+/* the processor status word for s390 */
+__u64 psw_mask; /* psw upper half */
+__u64 psw_addr; /* psw lower half */
+#endif


Doesn't this break backward compatibility by changing the structure?
Yes, but with a zero user base I think it's okay. I'd update our 
userspace. Once we pull CONFIG_EXPERIMENTAL we keep the API stable.


Additionally, CONFIG_ in public headers are frowned upon as 
non-portable.  A workaround is to #define __KVM_S390 in asm/kvm.h and 
depend on that.

Yea, that's better.

--- kvm.orig/arch/s390/kvm/kvm-s390.c2009-10-20 15:01:02.0 
+0200

+++ kvm/arch/s390/kvm/kvm-s390.c2009-10-20 18:13:45.0 +0200
@@ -421,7 +421,8 @@
if (atomic_read(vcpu-arch.sie_block-cpuflags)  CPUSTAT_RUNNING)
rc = -EBUSY;
else
-vcpu-arch.sie_block-gpsw = psw;
+vcpu-run-psw_mask = psw.mask;
+vcpu-run-psw_addr = psw.addr;


It's traditional to add braces around multi-line else blocks.

This is a plain bug, will fix.



I'd also appreciate an explanation of what this is all about.
The processor status word does contain various bits about the CPU's 
state, such as interrupt mask bits, current address space, and the 
current instruction address. The status is kept in the in-kernel sie 
control block data structure and has so far only been mirrored into 
kvm_run during exit_reason == s390_sieic exits because user space needs 
to work on it. It was never part of get_regs/set_regs and friends as 
performance optimization: it's needed on almost every exit, having it in 
kvm_run saves doing syscalls.
The gdb stub requires an up-to-date copy at every exit, and therefore 
the patch moves it out of the union and updates it at all userland exits.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm: require 4K aligned resource size for memory

2009-10-22 Thread Michael S. Tsirkin
On Wed, Oct 21, 2009 at 11:42:47PM -0700, Chris Wright wrote:
 * Michael S. Tsirkin (m...@redhat.com) wrote:
  KVM does not virtualize low address bits for memory accesses, so we must
  require that PCI BAR size is a multiple of 4K for passthrough to work
  (this also guarantees that address is 4K aligned).
  
  Users of recent linux kernels can force resource size up to 4K
  using:
  
  commit 32a9a682bef2f6fce7026bd94d1ce20028b0e52d
  Author: Yuji Shimada shimada-...@necst.nec.co.jp
  Date:   Mon Mar 16 17:13:39 2009 +0900
  PCI: allow assignment of memory resources with a specified alignment
  
  Signed-off-by: Michael S. Tsirkin m...@redhat.com
  
  ---
  
  diff --git a/hw/device-assignment.c b/hw/device-assignment.c
  index 237060f..c2ef31f 100644
  --- a/hw/device-assignment.c
  +++ b/hw/device-assignment.c
  @@ -381,6 +381,14 @@ static int assigned_dev_register_regions(PCIRegion 
  *io_regions,
   int t = cur_region-type  IORESOURCE_PREFETCH
   ? PCI_ADDRESS_SPACE_MEM_PREFETCH
   : PCI_ADDRESS_SPACE_MEM;
  +if (cur_region-size  0xFFF) {
  +fprintf(stderr, Unable to assign device: PCI region %d 
  +at address 0x%llx has size 0x%x, 
  + which is not a multiple of 4K\n,
  +i, (unsigned long long)cur_region-base_addr,
  +cur_region-size);
  +return -1;
  +}
 
 should render this useless now shouldn't it?
 
/* add offset */
 pci_dev-v_addrs[i].u.r_virtbase +=
 (cur_region-base_addr  0xFFF);

Yes, we could go over the code and remove all alignment.
But I'd rather not in case we decide on another approach
in the future. And for upstream, all this code will have to
be rewritten anyway.

 thanks,
 -chris
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Michael S. Tsirkin
On Wed, Oct 21, 2009 at 04:59:20PM -0700, Shirley Ma wrote:
 Hello Michael,
 
 There was a recent bugfix in qemu-kvm I pushed.
 Could you please verify that you have 
 cec75e39151e49cc90c849eab5d0d729667c9e68
 ?
 
 Yes, I cloned your qemu-kvm and kernel git. Here is git log output:
 
 commit cec75e39151e49cc90c849eab5d0d729667c9e68
 Author: Michael S. Tsirkin m...@redhat.com
 Date:   Wed Oct 21 12:34:36 2009 +0200
 
 vhost: use acked features, not all features
 
 This was recently reported without vhost, did not
 reproduce it here yet.
 And you say you do not see the above without vhost?
 
 I saw above with and without vhost, but the network worked with the errors
 without vhost.

Could you please supply more information?
What is the qemu command line you use?
What guest do you use?
How is the network configured?

Please attach tcpdump to interfaces in host and guest,
and run ping from guest.
Do you see any traffic? Does it look sane?



 Thanks
 Shirley
 
 
 Inactive hide details for Michael S. Tsirkin ---10/21/2009 01:24:58 PM---On
 Wed, Oct 21, 2009 at 12:59:50PM -0700, Shirley MaMichael S. Tsirkin 
 ---10/21/
 2009 01:24:58 PM---On Wed, Oct 21, 2009 at 12:59:50PM -0700, Shirley Ma wrote:
 
 Michael S.[cid]   *
 TsirkinTo Shirley Ma/Beaverton/i...@ibmus
 m...@redhat.com   [cid]   *
 cc David Stevens/Beaverton/i...@ibmus,
 10/21/2009 01:19   kvm@vger.kernel.org, 
 s...@linux.vnet.ibm.com
 PM [cid]   *
Subject Re: vhost-net patches
**
 
 On Wed, Oct 21, 2009 at 12:59:50PM -0700, Shirley Ma wrote:
  Hello Micahel,
 
  I have set up guest kernel 2.6.32-rc5 with MSI configured. Here are errors
 what
  I have got:
 
  1. First, qemu complained extboot.bin not found, I copied the file from
  optionrom/ dir to pc-bios/ dir, this problem is gone.
 
  2. Second, when guest boot up, it has lots of errors as below. Without vhost
  support, I still saw same errors but the guest interface can communicate 
  with
  host, but with vhost, it doesn't work.
 
 There was a ecent bugfix in qemu-kvm I pushed.
 Could you please verify that you have cec75e39151e49cc90c849eab5d0d729667c9e68
 ?
 
 
  I am posting the errors from /var/log/
  messages here:
 
  virtio-pci :00:03.0: can't find IRQ for PCI INT A; probably buggy MP
 table
  virtio-pci :00:04.0: can't find IRQ for PCI INT A; probably buggy MP
 table
  virtio-pci :00:04.0: irq 24 for MSI/MSI-X
  virtio-pci :00:04.0: irq 25 for MSI/MSI-X
  IRQ handler type mismatch for IRQ 1
  current handler: i8042
  Pid: 335, comm: modprobe Not tainted 2.6.32-rc5 #3
  Call Trace:
  __setup_irq+0x24c/0x2ac
  request_threaded_irq+0x113/0x179
  ? vring_interrupt+0x0/0x2f
  vp_try_to_find_vqs+0x4a3/0x4e0 [virtio_pci]
  ? blk_done+0x0/0xa7 [virtio_blk]
  vp_find_vqs+0x1b/0x62 [virtio_pci]
  virtblk_probe+0xbd/0x3d0 [virtio_blk]
  ? sysfs_do_create_link+0xbb/0xfd
  ? blk_done+0x0/0xa7 [virtio_blk]
  ? add_status+0x1f/0x24
  virtio_dev_probe+0x91/0xb0
  driver_probe_device+0x79/0x105
  __driver_attach+0x43/0x5f
  bus_for_each_dev+0x3d/0x67
  driver_attach+0x14/0x16
  ? __driver_attach+0x0/0x5f
  bus_add_driver+0xa2/0x1c9
  driver_register+0x8b/0xeb
  ? init+0x0/0x24 [virtio_blk]
  register_virtio_driver+0x1f/0x22
  init+0x22/0x24 [virtio_blk]
  do_one_initcall+0x4c/0x13a
  sys_init_module+0xa7/0x1db
  syscall_call+0x7/0xb
  virtio-pci :00:04.0: irq 24 for MSI/MSI-X
  virtio-pci :00:04.0: irq 25 for MSI/MSI-X
 
  vda: vda1 vda2
  EXT3-fs: INFO: recovery required on readonly filesystem.
  EXT3-fs: write access will be enabled during recovery.
  kjournald starting. Commit interval 5 seconds
  EXT3-fs: recovery complete.
  EXT3-fs: mounted filesystem with writeback data mode.
  udevd version 127 started
  virtio-pci :00:03.0: irq 26 for MSI/MSI-X
  virtio-pci :00:03.0: irq 27 for MSI/MSI-X
  virtio-pci :00:03.0: irq 28 for MSI/MSI-X
  IRQ handler type mismatch for IRQ 1
  current handler: i8042
  Pid: 440, comm: modprobe Not tainted 2.6.32-rc5 #3
  Call Trace:
  __setup_irq+0x24c/0x2ac
  request_threaded_irq+0x113/0x179
  ? vring_interrupt+0x0/0x2f
  vp_try_to_find_vqs+0x4a3/0x4e0 [virtio_pci]
  ? skb_recv_done+0x0/0x36 [virtio_net]
  vp_find_vqs+0x1b/0x62 [virtio_pci]
  virtnet_probe+0x265/0x347 [virtio_net]
  ? skb_recv_done+0x0/0x36 [virtio_net]
  ? skb_xmit_done+0x0/0x1e [virtio_net]
  ? add_status+0x1f/0x24
  virtio_dev_probe+0x91/0xb0
  driver_probe_device+0x79/0x105
  __driver_attach+0x43/0x5f
  bus_for_each_dev+0x3d/0x67
  driver_attach+0x14/0x16
  ? __driver_attach+0x0/0x5f
  bus_add_driver+0xa2/0x1c9
  driver_register+0x8b/0xeb
  ? init+0x0/0xf [virtio_net]
  register_virtio_driver+0x1f/0x22
  init+0xd/0xf [virtio_net]
  do_one_initcall+0x4c/0x13a
  sys_init_module+0xa7/0x1db
  syscall_call+0x7/0xb
  

Re: 0.11: SMP guests using one host CPU only?

2009-10-22 Thread Tomasz Chmielewski

Avi Kivity wrote:


Some 1-CPU guests have only one thread though?



Are you sure they're using kvm?  Try 'info kvm' in the monitor.  tcg 
will only use on thread (more will be spawned for I/O, but will 
eventually die).


Indeed, that was a good suggestion - they were not using KVM (support 
not compiled in, and kvm didn't complain when starting).


IO and CPU speed was rather OK, so I didn't notice ;)

To sum up the thread: I guess qemu _without_ KVM will only use one CPU, 
even when you assign more CPUs to the guest?



--
Tomasz Chmielewski
http://wpkg.org

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] sched, cpuacct: fix niced guest time accounting

2009-10-22 Thread Avi Kivity

On 10/20/2009 05:00 PM, Ryota Ozaki wrote:

Hi Avi,

This is the patch we discussed earlier. Please review it.

   


It looks good to me.


BTW, should this be sent to lkml as well?

   



In fact, it should go through tip.git, not kvm.git, so please send to 
lkml and copy Ingo.


Acked-by: Avi Kivity a...@redhat.com

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Avi Kivity

On 10/22/2009 11:11 AM, Alexander Graf wrote:

Doesn't this break backward compatibility by changing the structure?

Best to put it after the union (and as a copy, so userspace that 
expects the previous location still works).  If you're reading it 
from the kernel, also need a way to tell the kernel which copy to 
read from.


Also advertise with a KVM_CAP.



I don't think we need to go through the hassle here. There is 
effectively no user of that code for now and the ABI is considered 
unstable.


At the very least we need a KVM_CAP so qemu knows to fail on older kernels.



I'd also appreciate an explanation of what this is all about.


Explanation in the code or explanation in an email reply?



email.  I assume s390 hackers would understand why the psw needs to be 
exposed to qemu on every exit.  This is mostly for my personal interest.



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Alexander Graf


On 22.10.2009, at 11:53, Avi Kivity wrote:


On 10/22/2009 11:11 AM, Alexander Graf wrote:

Doesn't this break backward compatibility by changing the structure?

Best to put it after the union (and as a copy, so userspace that  
expects the previous location still works).  If you're reading it  
from the kernel, also need a way to tell the kernel which copy to  
read from.


Also advertise with a KVM_CAP.



I don't think we need to go through the hassle here. There is  
effectively no user of that code for now and the ABI is considered  
unstable.


At the very least we need a KVM_CAP so qemu knows to fail on older  
kernels.


Hm. Oh well :-). It can't hurt to have yet another CAP, right?


I'd also appreciate an explanation of what this is all about.


Explanation in the code or explanation in an email reply?



email.  I assume s390 hackers would understand why the psw needs to  
be exposed to qemu on every exit.  This is mostly for my personal  
interest.


PSW = (eflags  32) | pc;

:-)

Before that patch it was only synced with the vmcb on special  
userspace handled intercepts, now it's synced on every exit to  
userspace.


Alex
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Alexander Graf


On 22.10.2009, at 11:55, Alexander Graf wrote:


PSW = (eflags  32) | pc;


Eh - 64 of course. The PSW is 128 bits.

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Avi Kivity

On 10/22/2009 11:18 AM, Carsten Otte wrote:



I'd also appreciate an explanation of what this is all about.
The processor status word does contain various bits about the CPU's 
state, such as interrupt mask bits, current address space, and the 
current instruction address. The status is kept in the in-kernel sie 
control block data structure and has so far only been mirrored into 
kvm_run during exit_reason == s390_sieic exits because user space 
needs to work on it. It was never part of get_regs/set_regs and 
friends as performance optimization: it's needed on almost every exit, 
having it in kvm_run saves doing syscalls.
The gdb stub requires an up-to-date copy at every exit, and therefore 
the patch moves it out of the union and updates it at all userland exits.


gdb is hardly performance critical.  Is that the only reason for the change?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Avi Kivity

On 10/22/2009 11:55 AM, Alexander Graf wrote:


On 22.10.2009, at 11:53, Avi Kivity wrote:


On 10/22/2009 11:11 AM, Alexander Graf wrote:

Doesn't this break backward compatibility by changing the structure?

Best to put it after the union (and as a copy, so userspace that 
expects the previous location still works).  If you're reading it 
from the kernel, also need a way to tell the kernel which copy to 
read from.


Also advertise with a KVM_CAP.



I don't think we need to go through the hassle here. There is 
effectively no user of that code for now and the ABI is considered 
unstable.


At the very least we need a KVM_CAP so qemu knows to fail on older 
kernels.


Hm. Oh well :-). It can't hurt to have yet another CAP, right?


I'd also appreciate an explanation of what this is all about.


Explanation in the code or explanation in an email reply?



email.  I assume s390 hackers would understand why the psw needs to 
be exposed to qemu on every exit.  This is mostly for my personal 
interest.


PSW = (eflags  32) | pc;

:-)

Before that patch it was only synced with the vmcb on special 
userspace handled intercepts, now it's synced on every exit to userspace.


Right, but why?  x86 qemu doesn't care about either pc or eflags (with 
in-kernel irqchip, which s390 essentially is).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Alexander Graf


On 22.10.2009, at 12:03, Avi Kivity wrote:


On 10/22/2009 11:55 AM, Alexander Graf wrote:


On 22.10.2009, at 11:53, Avi Kivity wrote:


On 10/22/2009 11:11 AM, Alexander Graf wrote:
Doesn't this break backward compatibility by changing the  
structure?


Best to put it after the union (and as a copy, so userspace that  
expects the previous location still works).  If you're reading  
it from the kernel, also need a way to tell the kernel which  
copy to read from.


Also advertise with a KVM_CAP.



I don't think we need to go through the hassle here. There is  
effectively no user of that code for now and the ABI is  
considered unstable.


At the very least we need a KVM_CAP so qemu knows to fail on older  
kernels.


Hm. Oh well :-). It can't hurt to have yet another CAP, right?


I'd also appreciate an explanation of what this is all about.


Explanation in the code or explanation in an email reply?



email.  I assume s390 hackers would understand why the psw needs  
to be exposed to qemu on every exit.  This is mostly for my  
personal interest.


PSW = (eflags  32) | pc;

:-)

Before that patch it was only synced with the vmcb on special  
userspace handled intercepts, now it's synced on every exit to  
userspace.


Right, but why?  x86 qemu doesn't care about either pc or eflags  
(with in-kernel irqchip, which s390 essentially is).


The old code would only sync the psw for us on intercepts that the  
kernel knows require userspace interaction. We'd x /i $pc somewhere  
along the way and would get an old psw, but GET_REGS would give us  
recent register states, not including the psw. That's a total nightmare.


To solve the psw nightmare there were basically 2 roads possible to  
take here:


1) Sync the PSW in the GET_REGS ioctl
2) Sync the PSW in kvm_run on all exits to userspace

Otherwise we can't debug guests.

The same goes for setting the PSW. Setting the PSW was only  
implemented when kvm_run-exit_reason == the kernel thinks userspace  
needs to do something. So If you wanted to change the pc along the  
way, you couldn't.


Furthermore, there was a nasty bug in this whole optimization that  
lead me to think it's better to just get rid of the whole kernel  
tries to be clever implementation and just sync the psw at all times.  
Most exits to userspace are of the userspace needs to do something  
type anyways, so we're not really losing anything here. Implementing  
2) was just really easy.


So I guess Carsten thought the same way and improved my original patch.


I hope that clarifies things.

Alex
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Carsten Otte

Avi Kivity wrote:
gdb is hardly performance critical.  Is that the only reason for the 
change?
Right, gdb is not performance critical. gdb is the reason for moving it 
out of the union, performance is the reason for having it in kvm_run at all.
There's only more reason to it: with a little tweaking in kuli, we'll be 
able to get rid of KVM_S390_SET_INITIAL_PSW ioctl with this change. That 
removes one oddity that makes us different from other platforms.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Carsten Otte

Avi Kivity wrote:
Right, but why?  x86 qemu doesn't care about either pc or eflags (with 
in-kernel irqchip, which s390 essentially is).

For different reasons. Most prominent for setting the condition code,
which is a sideband result of most instructions that indicates whether or
not the instruction actually worked - similar to the exception model in
high level programming languages.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Enable 32bit dirty log pointers on 64bit host

2009-10-22 Thread Avi Kivity

On 10/21/2009 04:08 PM, Alexander Graf wrote:

From: Arnd Bergmanna...@arndb.de

With big endian userspace, we can't quite figure out if a pointer
is 32 bit (shifted  32) or 64 bit when we read a 64 bit pointer.

This is what happens with dirty logging. To get the pointer interpreted
correctly, we thus need Arnd's patch to implement a compat layer for
the ioctl:

A better way to do this is to add a separate compat_ioctl() method that
converts this for you.

From: Arnd Bergmanna...@arndb.de
Signed-off-by: Arnd Bergmanna...@arndb.de
Acked-by: Alexander Grafag...@suse.de

   


If you send someone's patch, you need to sign this off.  That says you 
are legally allowed to send it along.


Ack means I have a say in this area and it looks good to me, you add 
it when someone else is doing the sending.



Changes from Arnd's example version:
   


This goes double when changing the patch.

With all the legalese out of the way, the actual code looks good.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Enable 32bit dirty log pointers on 64bit host

2009-10-22 Thread Alexander Graf


On 22.10.2009, at 12:23, Avi Kivity wrote:


On 10/21/2009 04:08 PM, Alexander Graf wrote:

From: Arnd Bergmanna...@arndb.de

With big endian userspace, we can't quite figure out if a pointer
is 32 bit (shifted  32) or 64 bit when we read a 64 bit pointer.

This is what happens with dirty logging. To get the pointer  
interpreted

correctly, we thus need Arnd's patch to implement a compat layer for
the ioctl:

A better way to do this is to add a separate compat_ioctl() method  
that

converts this for you.

From: Arnd Bergmanna...@arndb.de
Signed-off-by: Arnd Bergmanna...@arndb.de
Acked-by: Alexander Grafag...@suse.de




If you send someone's patch, you need to sign this off.  That says  
you are legally allowed to send it along.


Ack means I have a say in this area and it looks good to me, you  
add it when someone else is doing the sending.


Ok, so is it still a From: Arnd then? Is it still signed-off by Arnd  
even though I change it?


Is there a Based-on-patch-by: tag? :-)

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Avi Kivity

On 10/22/2009 12:22 PM, Carsten Otte wrote:

Avi Kivity wrote:
Right, but why?  x86 qemu doesn't care about either pc or eflags 
(with in-kernel irqchip, which s390 essentially is).

For different reasons. Most prominent for setting the condition code,
which is a sideband result of most instructions that indicates whether or
not the instruction actually worked - similar to the exception model in
high level programming languages.


Ok.  Thanks for the explanation.

On x86 we avoid emulating instructions in userspace.  Instead the kernel 
requests userspace to do something (triggered by the instruction), and 
the kernel does anything which might be implied by the instruction (like 
copying the result into a register, or updating pc).


An example is port I/O.  instead of userspace reading %edx to query the 
port number and setting %eax to indicate the result, userspace reads a 
port number struct field and writes an I/O result struct field.  Only 
the kernel accesses registers.


I don't know whether that model makes sense or not for s390, but please 
consider it.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Avi Kivity

On 10/22/2009 12:20 PM, Carsten Otte wrote:

Avi Kivity wrote:
gdb is hardly performance critical.  Is that the only reason for the 
change?
Right, gdb is not performance critical. gdb is the reason for moving 
it out of the union, performance is the reason for having it in 
kvm_run at all.
There's only more reason to it: with a little tweaking in kuli, we'll 
be able to get rid of KVM_S390_SET_INITIAL_PSW ioctl with this change. 
That removes one oddity that makes us different from other platforms.


In fact qemu/x86 does a KVM_SET_SREGS (which updates pc) on reset.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Enable 32bit dirty log pointers on 64bit host

2009-10-22 Thread Avi Kivity

On 10/22/2009 12:25 PM, Alexander Graf wrote:
If you send someone's patch, you need to sign this off.  That says 
you are legally allowed to send it along.


Ack means I have a say in this area and it looks good to me, you 
add it when someone else is doing the sending.



Ok, so is it still a From: Arnd then? Is it still signed-off by Arnd 
even though I change it?


Yes.  Most kvm patches are From: other people, I just add my signoff 
(and definitely don't remote theirs).


Whether to change From: depends on the changes.  If you rewrite the 
patch, it's From: you.  If you just fix a couple of bugs, you generally 
leave From: alone.



Is there a Based-on-patch-by: tag? :-)


You can always say Based on initial patch from 

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH, -next] KVM: x86: Fix 32-bit host build warning

2009-10-22 Thread Jan Kiszka
Marcelo Tosatti wrote:
 On Tue, Oct 20, 2009 at 02:15:10PM +0200, Jan Kiszka wrote:
 Fixes cast to pointer from integer of different size on 32-bit hosts
 and applies a micro-refactoring.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 
 Applied, thanks.

Mmpf, sorry, please replace that patch (provided it is still only in
-next) with the actually tested version below. There was one further
typecast missing.

Jan

---

Fixes cast to pointer from integer of different size on 32-bit hosts
and applies a micro-refactoring.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

 arch/x86/kvm/x86.c |   11 ++-
 1 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3270b3b..5ad65b4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -841,11 +841,12 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, 
u64 data)
 
 static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
 {
+   struct kvm *kvm = vcpu-kvm;
int lm = is_long_mode(vcpu);
-   u8 *blob_addr = lm ? (u8 *)vcpu-kvm-arch.xen_hvm_config.blob_addr_64
-   : (u8 *)vcpu-kvm-arch.xen_hvm_config.blob_addr_32;
-   u8 blob_size = lm ? vcpu-kvm-arch.xen_hvm_config.blob_size_64
-   : vcpu-kvm-arch.xen_hvm_config.blob_size_32;
+   u8 *blob_addr = lm ? (u8 *)(long)kvm-arch.xen_hvm_config.blob_addr_64
+   : (u8 *)(long)kvm-arch.xen_hvm_config.blob_addr_32;
+   u8 blob_size = lm ? kvm-arch.xen_hvm_config.blob_size_64
+   : kvm-arch.xen_hvm_config.blob_size_32;
u32 page_num = data  ~PAGE_MASK;
u64 page_addr = data  PAGE_MASK;
u8 *page;
@@ -861,7 +862,7 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
r = -EFAULT;
if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
goto out_free;
-   if (kvm_write_guest(vcpu-kvm, page_addr, page, PAGE_SIZE))
+   if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
goto out_free;
r = 0;
 out_free:
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Carsten Otte

Avi Kivity wrote:
On x86 we avoid emulating instructions in userspace.  Instead the kernel 
requests userspace to do something (triggered by the instruction), and 
the kernel does anything which might be implied by the instruction (like 
copying the result into a register, or updating pc).


An example is port I/O.  instead of userspace reading %edx to query the 
port number and setting %eax to indicate the result, userspace reads a 
port number struct field and writes an I/O result struct field.  Only 
the kernel accesses registers.


I don't know whether that model makes sense or not for s390, but please 
consider it.
We do the same for many instructions (arch/s390/kvm/instruction.c). User 
exits
are only performed for isntructions that cannot be handled in kernel. 
Also, we do exit with requests to userspace, see the s390_reset exit 
reason. I think in this regard, our implementation is very similar to 
x86. Btw: this is something I did copycat from your implementation on 
integration into kvm. The original zlive code did handle all 
instructions in userland.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9] S390x KVM support

2009-10-22 Thread Avi Kivity

On 10/22/2009 12:43 PM, Carsten Otte wrote:

Avi Kivity wrote:
On x86 we avoid emulating instructions in userspace.  Instead the 
kernel requests userspace to do something (triggered by the 
instruction), and the kernel does anything which might be implied by 
the instruction (like copying the result into a register, or updating 
pc).


An example is port I/O.  instead of userspace reading %edx to query 
the port number and setting %eax to indicate the result, userspace 
reads a port number struct field and writes an I/O result struct 
field.  Only the kernel accesses registers.


I don't know whether that model makes sense or not for s390, but 
please consider it.
We do the same for many instructions (arch/s390/kvm/instruction.c). 
User exits
are only performed for isntructions that cannot be handled in kernel. 
Also, we do exit with requests to userspace, see the s390_reset exit 
reason. I think in this regard, our implementation is very similar to 
x86. Btw: this is something I did copycat from your implementation on 
integration into kvm. The original zlive code did handle all 
instructions in userland.


So why not do it for this instruction as well?  Instead of updating the 
psw, return a success/error code and let the kernel update psw.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 0.11: SMP guests using one host CPU only?

2009-10-22 Thread Avi Kivity

On 10/22/2009 11:28 AM, Tomasz Chmielewski wrote:
To sum up the thread: I guess qemu _without_ KVM will only use one 
CPU, even when you assign more CPUs to the guest?




Yes.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] Nested VMX patch 5 implements vmlaunch and vmresume

2009-10-22 Thread Avi Kivity

On 10/21/2009 04:43 PM, Orit Wasserman wrote:


It is possible L1 called vmlaunch but we didn't actually run L2 (for
example there was an interrupt and
enable_irq_window switched back to L1 before running L2). L1 thinks the
vmlaunch was successful and call vmresume in the next time
but KVM needs to call vmlaunch for L2.
   


Is it really possible?  If vmlaunch is started, it should complete 
unconditionally for L1.  The irq window should be recalculated based on 
the guest vmcs exit on external interrupt and guest eflags/cr8 etc.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH 1/3] KVM test: Add new utility functions to kvm_utils

2009-10-22 Thread Uri Lublin

On 10/21/2009 11:31 PM, Lucas Meneghel Rodrigues wrote:

Some distributors ship CD and DVD files with SHA1 hash sums instead
of MD5 hash sums, so let's extend the kvm_utils functions to
evaluate and compare SHA1 hashes:

* sha1sum_file(): Calculate SHA1 sum for file
+def sha1sum_file(filename, size=None):
+
+Calculate the sha1sum of filename.
+If size is not None, limit to first size bytes.
+Throw exception if something is wrong with filename.
+Can be also implemented with bash one-liner (assuming size%1024==0):
+dd if=filename bs=1024 count=size/1024 | sha1sum -
+
+@param filename: Path of the file that will have its sha1sum calculated.
+@param returns: sha1sum of the file.
+
+chunksize = 4096
+fsize = os.path.getsize(filename)
+if not size or sizefsize:
+size = fsize
+f = open(filename, 'rb')
+o = sha.new()
+while size  0:
+if chunksize  size:
+chunksize = size
+data = f.read(chunksize)
+if len(data) == 0:
+logging.debug(Nothing left to read but size=%d % size)
+break
+o.update(data)
+size -= len(data)
+f.close()
+return o.hexdigest()
+


This code and md5sum_file can share code.
Just pass o (or hash_new func: sha.new/md5.new) as a parameter to the function.



+
+
+def get_hash_from_file(sha_path, dvd_basename):


For consistency, replace sha_path with hash_path (as in the comment below).


+
+Get the a hash from a given DVD image from a hash file
+(Hash files are usually named MD5SUM or SHA1SUM and are located inside the
+download directories of the DVDs)
+
+@param hash_path: Local path to a hash file.
+@param cd_image: Basename of a CD image
+
+hash_file = open(sha_path, 'r')
+for line in hash_file.readlines():
+if dvd_basename in line:
+return line.split()[0]
+


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH 3/3] KVM test: Extend VM.create() method to support SHA1 check

2009-10-22 Thread Uri Lublin

On 10/21/2009 11:31 PM, Lucas Meneghel Rodrigues wrote:


--- a/client/tests/kvm/kvm_vm.py
+++ b/client/tests/kvm/kvm_vm.py
@@ -339,20 +339,27 @@ class VM:
+elif params.get(sha1sum):
+logging.debug(Comparing expected SHA1 sum with SHA1 sum of 
+  ISO file...)
+actual_hash = kvm_utils.md5sum_file(iso)
+expected_hash = params.get(md5sum)


typo: replace md5 with sha for those last 2 lines

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Michael S. Tsirkin
On Wed, Oct 21, 2009 at 04:59:20PM -0700, Shirley Ma wrote:
 
 Hello Michael,
 
 There was a recent bugfix in qemu-kvm I pushed.
 Could you please verify that you have
 cec75e39151e49cc90c849eab5d0d729667c9e68 ?
 
 Yes, I cloned your qemu-kvm and kernel git.
  I am posting the errors from /var/log/
  messages here:
 
  virtio-pci :00:03.0: can't find IRQ for PCI INT A; probably buggy MP
 table
  virtio-pci :00:04.0: can't find IRQ for PCI INT A; probably buggy MP
 table
  virtio-pci :00:04.0: irq 24 for MSI/MSI-X
  virtio-pci :00:04.0: irq 25 for MSI/MSI-X
  IRQ handler type mismatch for IRQ 1


So I think these are not related.

  current handler: i8042
  Pid: 335, comm: modprobe Not tainted 2.6.32-rc5 #3
  Call Trace:
  __setup_irq+0x24c/0x2ac
  request_threaded_irq+0x113/0x179
  ? vring_interrupt+0x0/0x2f
  vp_try_to_find_vqs+0x4a3/0x4e0 [virtio_pci]
  ? blk_done+0x0/0xa7 [virtio_blk]
  vp_find_vqs+0x1b/0x62 [virtio_pci]
  virtblk_probe+0xbd/0x3d0 [virtio_blk]
  ? sysfs_do_create_link+0xbb/0xfd
  ? blk_done+0x0/0xa7 [virtio_blk]
  ? add_status+0x1f/0x24
  virtio_dev_probe+0x91/0xb0
  driver_probe_device+0x79/0x105
  __driver_attach+0x43/0x5f
  bus_for_each_dev+0x3d/0x67
  driver_attach+0x14/0x16
  ? __driver_attach+0x0/0x5f
  bus_add_driver+0xa2/0x1c9
  driver_register+0x8b/0xeb
  ? init+0x0/0x24 [virtio_blk]
  register_virtio_driver+0x1f/0x22
  init+0x22/0x24 [virtio_blk]
  do_one_initcall+0x4c/0x13a
  sys_init_module+0xa7/0x1db
  syscall_call+0x7/0xb
  virtio-pci :00:04.0: irq 24 for MSI/MSI-X
  virtio-pci :00:04.0: irq 25 for MSI/MSI-X
 
  vda: vda1 vda2
  EXT3-fs: INFO: recovery required on readonly filesystem.
  EXT3-fs: write access will be enabled during recovery.
  kjournald starting. Commit interval 5 seconds
  EXT3-fs: recovery complete.
  EXT3-fs: mounted filesystem with writeback data mode.
  udevd version 127 started
  virtio-pci :00:03.0: irq 26 for MSI/MSI-X
  virtio-pci :00:03.0: irq 27 for MSI/MSI-X
  virtio-pci :00:03.0: irq 28 for MSI/MSI-X
  IRQ handler type mismatch for IRQ 1
  current handler: i8042
  Pid: 440, comm: modprobe Not tainted 2.6.32-rc5 #3
  Call Trace:
  __setup_irq+0x24c/0x2ac
  request_threaded_irq+0x113/0x179
  ? vring_interrupt+0x0/0x2f
  vp_try_to_find_vqs+0x4a3/0x4e0 [virtio_pci]
  ? skb_recv_done+0x0/0x36 [virtio_net]
  vp_find_vqs+0x1b/0x62 [virtio_pci]
  virtnet_probe+0x265/0x347 [virtio_net]
  ? skb_recv_done+0x0/0x36 [virtio_net]
  ? skb_xmit_done+0x0/0x1e [virtio_net]
  ? add_status+0x1f/0x24
  virtio_dev_probe+0x91/0xb0
  driver_probe_device+0x79/0x105
  __driver_attach+0x43/0x5f
  bus_for_each_dev+0x3d/0x67
  driver_attach+0x14/0x16
  ? __driver_attach+0x0/0x5f
  bus_add_driver+0xa2/0x1c9
  driver_register+0x8b/0xeb
  ? init+0x0/0xf [virtio_net]
  register_virtio_driver+0x1f/0x22
  init+0xd/0xf [virtio_net]
  do_one_initcall+0x4c/0x13a
  sys_init_module+0xa7/0x1db
  syscall_call+0x7/0xb
  virtio-pci :00:03.0: irq 26 for MSI/MSI-X
  virtio-pci :00:03.0: irq 27 for MSI/MSI-X
 
  3. The guest interface is up, and cat /proc/interrupts outputs:
 
  24: 0 PCI-MSI-edge virtio1-config
  25: 2571 PCI-MSI-edge virtio1-virtqueues
  26: 0 PCI-MSI-edge virtio0-config
  27: 0 PCI-MSI-edge virtio0-virtqueues

Aha. I notice that we failed to allocate 3 vectors, and have fallen back
on 2 vectors here.  This also should work, but is not the configuration
that I am testing, and will be slower anyway.  Why are there 2 virtio
devices here? Is the second one for storage? Can you try switching to
another hard drive model please?


  Thanks
  Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


qemu-kvm: sigsegv at exit

2009-10-22 Thread Michael S. Tsirkin
Hi!
I'm sometimes getting segfaults when I kill qemu.
This time I caught it when qemu was under gdb:


Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x411d0940 (LWP 14446)]
0x0040afb4 in qemu_mod_timer (ts=0x19f0fd0, expire_time=62275467335)
at /home/mst/scm/qemu-kvm/vl.c:1009
1009if ((alarm_timer-flags  ALARM_FLAG_EXPIRED) == 0) {
(gdb) l
1004ts-next = *pt;
1005*pt = ts;
1006
1007/* Rearm if necessary  */
1008if (pt == active_timers[ts-clock-type]) {
1009if ((alarm_timer-flags  ALARM_FLAG_EXPIRED) == 0) {
1010qemu_rearm_alarm_timer(alarm_timer);
1011}
1012/* Interrupt execution to force deadline recalculation.  */
1013if (use_icount)
(gdb) p alarm_timer
$1 = (struct qemu_alarm_timer *) 0x0
(gdb) where
#0  0x0040afb4 in qemu_mod_timer (ts=0x19f0fd0, expire_time=62275467335)
at /home/mst/scm/qemu-kvm/vl.c:1009
#1  0x0041aadf in virtio_net_handle_tx (vdev=value optimized out, 
vq=0x19f5af0)
at /home/mst/scm/qemu-kvm/hw/virtio-net.c:696
#2  0x00421669 in kvm_run (vcpu=0x19d46a0, env=0x19c2250) at 
/home/mst/scm/qemu-kvm/qemu-kvm.c:797
#3  0x004216d6 in kvm_cpu_exec (env=0x83d0f8) at 
/home/mst/scm/qemu-kvm/qemu-kvm.c:1714
#4  0x00422981 in ap_main_loop (_env=value optimized out) at 
/home/mst/scm/qemu-kvm/qemu-kvm.c:1969
#5  0x00377dc06367 in start_thread () from /lib64/libpthread.so.0
#6  0x00377d0d30ad in clone () from /lib64/libc.so.6
(gdb)

So this probably means that we have already run quit_timers:

static void quit_timers(void)
{
alarm_timer-stop(alarm_timer);
alarm_timer = NULL;
}

but kvm vcpu thread is still running.


Not sure what the right fix is here: should we stop
kvm after main loop has exited?

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] sched, cpuacct: fix niced guest time accounting

2009-10-22 Thread Ryota Ozaki
On Thu, Oct 22, 2009 at 6:47 PM, Avi Kivity a...@redhat.com wrote:
 On 10/20/2009 05:00 PM, Ryota Ozaki wrote:

 Hi Avi,

 This is the patch we discussed earlier. Please review it.



 It looks good to me.

 BTW, should this be sent to lkml as well?




 In fact, it should go through tip.git, not kvm.git, so please send to lkml
 and copy Ingo.

Thanks for your review. I'll rebase and forward it soon.
  ozaki-r



 Acked-by: Avi Kivity a...@redhat.com

 --
 error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Enable 32bit dirty log pointers on 64bit host

2009-10-22 Thread Alexander Graf
From: Arnd Bergmann a...@arndb.de

With big endian userspace, we can't quite figure out if a pointer
is 32 bit (shifted  32) or 64 bit when we read a 64 bit pointer.

This is what happens with dirty logging. To get the pointer interpreted
correctly, we thus need Arnd's patch to implement a compat layer for
the ioctl:

A better way to do this is to add a separate compat_ioctl() method that
converts this for you.

Based on initial patch from Arnd Bergmann.

From: Arnd Bergmann a...@arndb.de
Signed-off-by: Arnd Bergmann a...@arndb.de
Signed-off-by: Alexander Graf ag...@suse.de

---

Changes from Arnd's example version:

  - s/log.log/log/ (Avi)
  - use sizeof(compat_log) (Avi)
  - compile fixes
---
 virt/kvm/kvm_main.c |   49 -
 1 files changed, 48 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cac69c4..54a272f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -43,6 +43,7 @@
 #include linux/swap.h
 #include linux/bitops.h
 #include linux/spinlock.h
+#include linux/compat.h
 
 #include asm/processor.h
 #include asm/io.h
@@ -1542,6 +1543,52 @@ out:
return r;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_kvm_dirty_log {
+   __u32 slot;
+   __u32 padding1;
+   union {
+   compat_uptr_t dirty_bitmap; /* one bit per page */
+   __u64 padding2;
+   };
+};
+
+static long kvm_vm_compat_ioctl(struct file *filp,
+  unsigned int ioctl, unsigned long arg)
+{
+   struct kvm *kvm = filp-private_data;
+   int r;
+
+   if (kvm-mm != current-mm)
+   return -EIO;
+   switch (ioctl) {
+   case KVM_GET_DIRTY_LOG: {
+   struct compat_kvm_dirty_log compat_log;
+   struct kvm_dirty_log log;
+
+   r = -EFAULT;
+   if (copy_from_user(compat_log, (void __user *)arg,
+  sizeof(compat_log)))
+   goto out;
+   log.slot = compat_log.slot;
+   log.padding1 = compat_log.padding1;
+   log.padding2 = compat_log.padding2;
+   log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
+
+   r = kvm_vm_ioctl_get_dirty_log(kvm, log);
+   if (r)
+   goto out;
+   break;
+   }
+   default:
+   r = kvm_vm_ioctl(filp, ioctl, arg);
+   }
+
+out:
+   return r;
+}
+#endif
+
 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
struct page *page[1];
@@ -1576,7 +1623,7 @@ static int kvm_vm_mmap(struct file *file, struct 
vm_area_struct *vma)
 static struct file_operations kvm_vm_fops = {
.release= kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,
-   .compat_ioctl   = kvm_vm_ioctl,
+   .compat_ioctl   = kvm_vm_compat_ioctl,
.mmap   = kvm_vm_mmap,
 };
 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Michael S. Tsirkin
On Wed, Oct 21, 2009 at 04:59:20PM -0700, Shirley Ma wrote:
 
 Hello Michael,
 
 There was a recent bugfix in qemu-kvm I pushed.
 Could you please verify that you have
 cec75e39151e49cc90c849eab5d0d729667c9e68 ?
 
 Yes, I cloned your qemu-kvm and kernel git.


It seems that the errors you observe are a result of changes
in guest virtio. I work to fix these, for now please
revert this commit f68d24082e22ccee3077d11aeb6dc5354f0ca7f1
in guest kernel.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff

2009-10-22 Thread Orit Wasserman


Avi Kivity a...@redhat.com wrote on 20/10/2009 06:00:50:

 From:

 Avi Kivity a...@redhat.com

 To:

 Orit Wasserman/Haifa/i...@ibmil

 Cc:

 kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/i...@ibmil, Abel Gordon/
 Haifa/i...@ibmil, Muli Ben-Yehuda/Haifa/i...@ibmil,
 aligu...@us.ibm.com, md...@us.ibm.com

 Date:

 20/10/2009 06:02

 Subject:

 Re: [PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff

 On 10/15/2009 11:41 PM, or...@il.ibm.com wrote:
 
/*
  + * Handles msr read for nested virtualization
  + */
  +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
  +   u64 *pdata)
  +{
  +   u64 vmx_msr = 0;
  +
  +   switch (msr_index) {
  +   case MSR_IA32_FEATURE_CONTROL:
  +  *pdata = 0;
  +  break;
  +   case MSR_IA32_VMX_BASIC:
  +  *pdata = 0;
  +  rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr);
  +  *pdata = (vmx_msr  0x00cf);
  +  break;
  +
 

 This (and the rest of the msrs) must be controllable from userspace.
 Otherwise a live migration from a newer host to an older host would
break.
OK.

 
/*
  + * Writes msr value for nested virtualization
  + * Returns 0 on success, non-0 otherwise.
  + */
  +static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32
 msr_index, u64 data)
  +{
  +   switch (msr_index) {
  +   case MSR_IA32_FEATURE_CONTROL:
  +  if ((data  (FEATURE_CONTROL_LOCKED |
  +  FEATURE_CONTROL_VMXON_ENABLED))
  +  != (FEATURE_CONTROL_LOCKED |
  + FEATURE_CONTROL_VMXON_ENABLED))
  + return 1;
  +  break;
  +   default:
  +  return 1;
  +   }
  +
  +   return 0;
  +}
  +
 

 Need to export this msr to userspace for live migration.  See
 msrs_to_save[].
OK.

 
  +/*
  + * Check to see if vcpu can execute vmx command
  + * Inject the corrseponding exception
  + */
  +static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
  +{
  +   struct kvm_segment cs;
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +   struct kvm_msr_entry *msr;
  +
  +   vmx_get_segment(vcpu,cs, VCPU_SREG_CS);
  +
  +   if (!vmx-nested.vmxon) {
  +  printk(KERN_DEBUG %s: vmx not on\n, __func__);
 

 pr_debug
I will change it.

  +  kvm_queue_exception(vcpu, UD_VECTOR);
  +  return 0;
  +   }
  +
  +   msr = find_msr_entry(vmx, MSR_EFER);
  +
  +   if ((vmx_get_rflags(vcpu)  X86_EFLAGS_VM) ||
  +   ((msr-data  EFER_LMA)  !cs.l)) {
 

 is_long_mode()
I will change it.

static int handle_vmx_insn(struct kvm_vcpu *vcpu)
{
   kvm_queue_exception(vcpu, UD_VECTOR);
   return 1;
}
 
  +static int handle_vmoff(struct kvm_vcpu *vcpu)
  +{
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +
  +   if (!nested_vmx_check_permission(vcpu))
  +  return 1;
  +
  +   vmx-nested.vmxon = 0;
  +
  +   skip_emulated_instruction(vcpu);
  +   return 1;
  +}
  +
  +static int handle_vmon(struct kvm_vcpu *vcpu)
  +{
  +   struct kvm_segment cs;
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +
  +   if (!nested) {
  +  printk(KERN_DEBUG %s: nested vmx not enabled\n, __func__);
  +  kvm_queue_exception(vcpu, UD_VECTOR);
  +  return 1;
  +   }
  +
  +   vmx_get_segment(vcpu,cs, VCPU_SREG_CS);
  +
  +   if (!(vcpu-arch.cr4  X86_CR4_VMXE) ||
  +   !(vcpu-arch.cr0  X86_CR0_PE) ||
  +   (vmx_get_rflags(vcpu)  X86_EFLAGS_VM)) {
  +  kvm_queue_exception(vcpu, UD_VECTOR);
  +  printk(KERN_INFO %s invalid register state\n, __func__);
  +  return 1;
  +   }
  +#ifdef CONFIG_X86_64
  +   if (((find_msr_entry(to_vmx(vcpu),
  +  MSR_EFER)-data  EFER_LMA)  !cs.l)) {
 

 is_long_mode(), and you can avoid the #ifdef.
I will change it.


 VMXON is supposed to block INIT, please add that (in a separate patch).
I will add it.

 --
 I have a truly marvellous patch that fixes the bug which this
 signature is too narrow to contain.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] Nested VMX patch 3 implements vmptrld and vmptrst

2009-10-22 Thread Orit Wasserman


Avi Kivity a...@redhat.com wrote on 20/10/2009 06:24:33:

 From:

 Avi Kivity a...@redhat.com

 To:

 Orit Wasserman/Haifa/i...@ibmil

 Cc:

 kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/i...@ibmil, Abel Gordon/
 Haifa/i...@ibmil, Muli Ben-Yehuda/Haifa/i...@ibmil,
 aligu...@us.ibm.com, md...@us.ibm.com

 Date:

 20/10/2009 06:24

 Subject:

 Re: [PATCH 3/5] Nested VMX patch 3 implements vmptrld and vmptrst

 On 10/15/2009 11:41 PM, or...@il.ibm.com wrote:
 
  +
  +struct __attribute__ ((__packed__)) shadow_vmcs {
 

 Since this is in guest memory, we need it packed so the binary format is
 preserved across migration.  Please add a comment so it isn't changed
 (at least without changing the revision_id).
I will add a comment.

 vmclear state should be here, that will help multiguest support.
Working on it ...

 
struct nested_vmx {
   /* Has the level1 guest done vmxon? */
   bool vmxon;
  -
  +   /* What is the location of the  vmcs l1 keeps for l2? (in level1
gpa) */
  +   u64 vmptr;
 

 Need to expose it for live migration.
I will look into it.

   /*
* Level 2 state : includes vmcs,registers and
* a copy of vmcs12 for vmread/vmwrite
*/
   struct level_state *l2_state;
  +   /* Level 1 state for switching to level 2 and back */
  +   struct level_state *l1_state;
 

 This creates a ton of duplication.

 Some of the data is completely unnecessary, for example we can
 recalculate cr0 from HOST_CR0 and GUEST_CR0.
I will look into it .

  +
  +static int vmptrld(struct kvm_vcpu *vcpu,
  + u64 phys_addr)
  +{
  +   u8 error;
  +
  +   asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) ; setna %0
  +: =g(error) : a(phys_addr), m(phys_addr)
  +: cc);
  +   if (error) {
  +  printk(KERN_ERR kvm: %s vmptrld %llx failed\n,
  + __func__, phys_addr);
  +  return 1;
  +   }
  +
  +   return 0;
  +}
  +
/*
 * Switches to specified vcpu, until a matching vcpu_put(), but
assumes
 * vcpu mutex is already taken.
  @@ -736,15 +923,8 @@ static void vmx_vcpu_load(struct kvm_vcpu
 *vcpu, int cpu)
   }
 
   if (per_cpu(current_vmcs, cpu) != vmx-vmcs) {
  -  u8 error;
  -
  per_cpu(current_vmcs, cpu) = vmx-vmcs;
  -  asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) ; setna %0
  -   : =g(error) : a(phys_addr), m(phys_addr)
  -   : cc);
  -  if (error)
  - printk(KERN_ERR kvm: vmptrld %p/%llx fail\n,
  -vmx-vmcs, phys_addr);
  +  vmptrld(vcpu, phys_addr);
   }
 

 This part of the patch is no longer needed.
I will remove it.
  +   if (cpu_has_vmx_msr_bitmap())
  +  vmx-nested.l2_state-msr_bitmap = vmcs_read64(MSR_BITMAP);
  +   else
  +  vmx-nested.l2_state-msr_bitmap = 0;
  +
  +   vmx-nested.l2_state-io_bitmap_a = vmcs_read64(IO_BITMAP_A);
  +   vmx-nested.l2_state-io_bitmap_b = vmcs_read64(IO_BITMAP_B);
  +
 

 This no longer works, since we don't load the guest vmcs.
I will look into it.

  +int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
  + struct kvm_vcpu *vcpu);
 

 Isn't this in a header somewhere?
I will move it into the header.

  +
  +int read_guest_vmcs_gpa(struct kvm_vcpu *vcpu, u64 *gentry)
  +{
  +
  +   int r = 0;
  +
  +   r = kvm_read_guest_virt(vcpu-arch.regs[VCPU_REGS_RAX], gentry,
  +sizeof(u64), vcpu);
  +   if (r) {
  +  printk(KERN_ERR %s cannot read guest vmcs addr %lx : %d\n,
  + __func__, vcpu-arch.regs[VCPU_REGS_RAX], r);
  +  return r;
  +   }
  +
  +   if (!IS_ALIGNED(*gentry, PAGE_SIZE)) {
  +  printk(KERN_DEBUG %s addr %llx not aligned\n,
  + __func__, *gentry);
  +  return 1;
  +   }
  +
  +   return 0;
  +}
  +
 

 Should go through the emulator to evaluate arguments.
OK.

  +static int handle_vmptrld(struct kvm_vcpu *vcpu)
  +{
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +   struct page *vmcs_page;
  +   u64 guest_vmcs_addr;
  +
  +   if (!nested_vmx_check_permission(vcpu))
  +  return 1;
  +
  +   if (read_guest_vmcs_gpa(vcpu,guest_vmcs_addr))
  +  return 1;
  +
  +   if (create_l1_state(vcpu)) {
  +  printk(KERN_ERR %s create_l1_state failed\n, __func__);
  +  return 1;
  +   }
  +
  +   if (create_l2_state(vcpu)) {
  +  printk(KERN_ERR %s create_l2_state failed\n, __func__);
  +  return 1;
  +   }
 

 return errors here, so we see the problem.
OK.

  +
  +static int handle_vmptrst(struct kvm_vcpu *vcpu)
  +{
  +   int r = 0;
  +
  +   if (!nested_vmx_check_permission(vcpu))
  +  return 1;
  +
  +   r = kvm_write_guest_virt(vcpu-arch.regs[VCPU_REGS_RAX],
  + (void *)to_vmx(vcpu)-nested.vmptr,
  + sizeof(u64), vcpu);
 

 Emulator again.
OK.

  +void save_vmcs(struct shadow_vmcs *dst)
  +{
 
  +   dst-io_bitmap_a = vmcs_read64(IO_BITMAP_A);
  +   dst-io_bitmap_b = vmcs_read64(IO_BITMAP_B);
 

 These (and many others) can never change due to a nested guest running,
 so no need to save 

Re: [PATCH 4/5] Nested VMX patch 4 implements vmread and vmwrite

2009-10-22 Thread Orit Wasserman


Avi Kivity a...@redhat.com wrote on 20/10/2009 06:44:41:

 From:

 Avi Kivity a...@redhat.com

 To:

 Orit Wasserman/Haifa/i...@ibmil

 Cc:

 kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/i...@ibmil, Abel Gordon/
 Haifa/i...@ibmil, Muli Ben-Yehuda/Haifa/i...@ibmil,
 aligu...@us.ibm.com, md...@us.ibm.com

 Date:

 20/10/2009 06:44

 Subject:

 Re: [PATCH 4/5] Nested VMX patch 4 implements vmread and vmwrite

 On 10/15/2009 11:41 PM, or...@il.ibm.com wrote:
 
  +static int nested_map_shadow_vmcs(struct kvm_vcpu *vcpu)
  +{
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +   struct page *vmcs_page = nested_get_page(vcpu, vmx-nested.vmptr);
  +
  +   if (vmcs_page == NULL) {
  +  printk(KERN_INFO %s: failure in nested_get_page\n,__func__);
  +  return 0;
  +   }
  +
  +   if (vmx-nested.l2_state-shadow_vmcs) {
  +  printk(KERN_INFO %s: shadow vmcs already mapped\n,__func__);
  +  return 0;
  +   }
  +
 

 Consider dropping shadow_vmcs from l2_state and just passing it
 everywhere.  Less convenient but safer.
I will think about it, it is called from many places ...

  +   vmx-nested.l2_state-shadow_vmcs = kmap_atomic(vmcs_page,
KM_USER0);
  +
  +   if (!vmx-nested.l2_state-shadow_vmcs) {
  +  printk(KERN_INFO %s: error in kmap_atomic\n,__func__);
  +  return 0;
  +   }
 

 kmap_atomic() can't fail.
I will remove the check.
 
  +static int handle_vmread(struct kvm_vcpu *vcpu)
  +{
  +#ifndef CONFIG_X86_64
  +   u64 value;
  +#endif
  +
  +   if (!nested_vmx_check_permission(vcpu))
  +  return 1;
  +
  +   if (!nested_map_shadow_vmcs(vcpu)) {
  +  printk(KERN_INFO %s invalid shadow vmcs\n, __func__);
  +  set_rflags_to_vmx_fail_invalid(vcpu);
  +  return 1;
  +   }
 

 return an error.
OK.

  +
  +   switch (vmcs_field_length(vcpu-arch.regs[VCPU_REGS_RDX])) {
  +   case VMCS_FIELD_TYPE_U16:
  +  vcpu-arch.regs[VCPU_REGS_RAX] =
  + nested_vmcs_read16(vcpu,
  +  vcpu-arch.regs[VCPU_REGS_RDX]);
  +  break;
 

 Use the emulator to decode operands.
OK.


 --
 I have a truly marvellous patch that fixes the bug which this
 signature is too narrow to contain.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] monitor: Fix build on 32-bit

2009-10-22 Thread Pierre Riteau
Error was:
monitor.c: In function 'print_cpu_iter':
monitor.c:450: error: format '%ld' expects type 'long int', but argument 3 has 
type 'int64_t'

Signed-off-by: Pierre Riteau pierre.rit...@irisa.fr
---
 monitor.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/monitor.c b/monitor.c
index b4c4878..cf633a4 100644
--- a/monitor.c
+++ b/monitor.c
@@ -447,7 +447,7 @@ static void print_cpu_iter(QObject *obj, void *opaque)
 if (strcmp(qdict_get_str(cpu, halted), yes) == 0)
 monitor_printf(mon,  (halted));
 
-monitor_printf(mon,  thread_id=%ld, qdict_get_int(cpu, thread_id));
+monitor_printf(mon,  thread_id=% PRId64, qdict_get_int(cpu, 
thread_id));
 
 monitor_printf(mon, \n);
 }
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] Nested VMX patch 5 implements vmlaunch and vmresume

2009-10-22 Thread Orit Wasserman


Avi Kivity a...@redhat.com wrote on 20/10/2009 06:56:39:

 From:

 Avi Kivity a...@redhat.com

 To:

 Orit Wasserman/Haifa/i...@ibmil

 Cc:

 kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/i...@ibmil, Abel Gordon/
 Haifa/i...@ibmil, Muli Ben-Yehuda/Haifa/i...@ibmil,
 aligu...@us.ibm.com, md...@us.ibm.com

 Date:

 20/10/2009 06:56

 Subject:

 Re: [PATCH 5/5] Nested VMX patch 5 implements vmlaunch and vmresume

 On 10/15/2009 11:41 PM, or...@il.ibm.com wrote:
  From: Orit Wassermanor...@il.ibm.com
 
  ---
arch/x86/kvm/vmx.c | 1173 ++
 --
1 files changed, 1148 insertions(+), 25 deletions(-)
 
  diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
  index 6a4c252..e814029 100644
  --- a/arch/x86/kvm/vmx.c
  +++ b/arch/x86/kvm/vmx.c
  @@ -209,6 +209,7 @@ struct __attribute__ ((__packed__)) level_state {
   struct vmcs *vmcs;
   int cpu;
   int launched;
  +   bool first_launch;
};
 
struct nested_vmx {
  @@ -216,6 +217,12 @@ struct nested_vmx {
   bool vmxon;
   /* What is the location of the  vmcs l1 keeps for l2? (in
 level1 gpa) */
   u64 vmptr;
  +   /* Are we running nested guest */
  +   bool nested_mode;
  +   /* L1 requested VMLAUNCH or VMRESUME but we didn't run L2 yet */
  +   bool nested_run_pending;
  +   /* flag indicating if there was a valid IDT after exiting from l2
*/
  +   bool nested_valid_idt;
 

 Did you mean valid_idt_vectoring_info?
yes.

 No need to prefix everything with nested_ inside nested_vmx.
OK.

  +void prepare_vmcs_12(struct kvm_vcpu *vcpu)
  +{
  +   struct shadow_vmcs *l2_shadow_vmcs =
  +  get_shadow_vmcs(vcpu);
  +
  +   l2_shadow_vmcs-guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
  +   l2_shadow_vmcs-guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
  +   l2_shadow_vmcs-guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
  +   l2_shadow_vmcs-guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
  +   l2_shadow_vmcs-guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
  +   l2_shadow_vmcs-guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
  +   l2_shadow_vmcs-guest_ldtr_selector = vmcs_read16
(GUEST_LDTR_SELECTOR);
  +   l2_shadow_vmcs-guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
  +
  +   l2_shadow_vmcs-tsc_offset = vmcs_read64(TSC_OFFSET);
  +   l2_shadow_vmcs-guest_physical_address =
  +  vmcs_read64(GUEST_PHYSICAL_ADDRESS);
  +   l2_shadow_vmcs-vmcs_link_pointer = vmcs_read64(VMCS_LINK_POINTER);
 

 Physical addresses need translation,  no?
If you are referring to GUEST_PHYSICAL_ADDRESS than there is no need for
translation for L1.
It need to stay in L2 physical address.

  +   l2_shadow_vmcs-guest_cr0 = vmcs_readl(GUEST_CR0);
  +
  +   l2_shadow_vmcs-guest_cr4 = vmcs_readl(GUEST_CR4);
 

 We don't allow the guest to modify these, so no need to read them.  If
 you do, you need to remove the bits that we modify.
You are correct for example CR0.TS , it will be fixed in the next set of
patches.

  +
  +int load_vmcs_common(struct shadow_vmcs *src)
  +{
  +
  +   vmcs_write64(VMCS_LINK_POINTER, src-vmcs_link_pointer);
 

 Why load this?
At the moment it is not used , maybe in the future.
I can add a check to see if it was changed.

  +   vmcs_write64(GUEST_IA32_DEBUGCTL, src-guest_ia32_debugctl);
 

 I think some features there are dangerous.
I will look into it.

  +   vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, src-
vm_entry_msr_load_count);
 

 Need to verify?  Also need to validate the loaded MSRs and run them
 through kvm_set_msr() instead of letting the cpu do it.
I will add the checks.

 --
 I have a truly marvellous patch that fixes the bug which this
 signature is too narrow to contain.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Michael S. Tsirkin
On Thu, Oct 22, 2009 at 02:34:56PM +0200, Michael S. Tsirkin wrote:
 On Wed, Oct 21, 2009 at 04:59:20PM -0700, Shirley Ma wrote:
  
  Hello Michael,
  
  There was a recent bugfix in qemu-kvm I pushed.
  Could you please verify that you have
  cec75e39151e49cc90c849eab5d0d729667c9e68 ?
  
  Yes, I cloned your qemu-kvm and kernel git.
 
 
 It seems that the errors you observe are a result of changes
 in guest virtio. I work to fix these, for now please
 revert this commit f68d24082e22ccee3077d11aeb6dc5354f0ca7f1
 in guest kernel.

OK, I sent a patch that should fix the errors for you.
Could you please confirm, preferably on-list, whether
the patch makes the errors go away for you with
userspace virtio?

However, as I said previously, it's good to fix them but I think
they are unrelated to the fact that vhost does not work
for you.

Shirley, if you have the time, please send details
on your setup and observations on tcpdump output
in both guest and host with vhost enabled.

Thanks!

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Q: Stopped VM still using host cpu CPU ?

2009-10-22 Thread Avi Kivity

On 10/21/2009 10:29 PM, Daniel Schwager wrote:


 opreport -l  --symbols | less
CPU: CPU with timer interrupt, speed 0 MHz (estimated)
Profiling through timer interrupt
samples  %image name   app name
symbol name
418814   98.5250  no-vmlinux   no-vmlinux   (no
symbols)
1228  0.2889  qemu-system-x86_64   qemu-system-x86_64
main_loop_wait
888   0.2089  libpthread-2.8.solibpthread-2.8.so
__read_nocancel
...

   


Sorry, I gave you the wrong instructions.  Since most of the time is 
spent in vmlinux, we want to trace vmlinux, not qemu.


Three ways to see what is happening:

- set up vmlinux (you'll need to install the correct kernel-debuginfo 
package to get this).

- strace -p `pgrep qemu` and see what syscalls take place
- run 'top' and look at the wchan field ('f' 'y' will show it if it is 
hidden)


Probably easiest in reverse order as listed.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/27] Add KVM support for Book3s_64 (PPC64) hosts v5

2009-10-22 Thread Arnd Bergmann
On Wednesday 21 October 2009, Alexander Graf wrote:
 
 KVM for PowerPC only supports embedded cores at the moment.
 
 While it makes sense to virtualize on small machines, it's even more fun
 to do so on big boxes. So I figured we need KVM for PowerPC64 as well.
 
 This patchset implements KVM support for Book3s_64 hosts and guest support
 for Book3s_64 and G3/G4.
 
 To really make use of this, you also need a recent version of qemu.
 
 
 Don't want to apply patches? Get the git tree!
 
 $ git clone git://csgraf.de/kvm
 $ git checkout origin/ppc-v4

Whole series Acked-by: Arnd Bergmann a...@arndb.de

Great work, Alex!

Arnd 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH 1/2] KVM: Directly inject interrupts via irqfd

2009-10-22 Thread Avi Kivity

On 10/21/2009 05:42 PM, Gregory Haskins wrote:

I believe Avi, Michael, et. al. were in agreement with me on that design
choice.  I believe the reason is that there is no good way to do EOI/ACK
feedback within the constraints of an eventfd pipe which would be
required for the legacy pin-type interrupts.  Therefore, we won't even
bother trying.  High-performance subsystems will use irqfd/msi, and
legacy emulation can use the existing injection code (which includes the
necessary feedback for ack/eoi).

   


Right.  But we don't actually prevent anyone using non-msi with irqfd, 
which can trigger the bad lock usage from irq context, with a nice boom 
afterwards.  So we need to either prevent it during registration, or to 
gracefully handle it afterwards.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: VMX: flush TLB with INVEPT on cpu migration

2009-10-22 Thread Avi Kivity

On 10/21/2009 03:18 AM, Max Laier wrote:

On Friday 02 October 2009 00:16:58 you wrote:
   

It is possible that stale EPTP-tagged mappings are used, if a
vcpu migrates to a different pcpu.

Set KVM_REQ_TLB_FLUSH in vmx_vcpu_load, when switching pcpus, which
will invalidate both VPID and EPT mappings on the next vm-entry.
 

Thank you - I was at the brink of a nervous break-down before discovering
this.  Maybe it would help for the future to add a comment to
ept_misconfig_inspect_spte that explains that this might be caused by out of
sync tlbs, too (esp. when it doesn't show an apparent cause of the misconfig)
   


In fact ept_misconfig_inspect_spte() was added in order to track down 
this bug, so it will probably be removed in the future.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH 1/2] KVM: Directly inject interrupts via irqfd

2009-10-22 Thread Gregory Haskins
Avi Kivity wrote:
 On 10/21/2009 05:42 PM, Gregory Haskins wrote:
 I believe Avi, Michael, et. al. were in agreement with me on that design
 choice.  I believe the reason is that there is no good way to do EOI/ACK
 feedback within the constraints of an eventfd pipe which would be
 required for the legacy pin-type interrupts.  Therefore, we won't even
 bother trying.  High-performance subsystems will use irqfd/msi, and
 legacy emulation can use the existing injection code (which includes the
 necessary feedback for ack/eoi).


 
 Right.  But we don't actually prevent anyone using non-msi with irqfd,
 which can trigger the bad lock usage from irq context, with a nice boom
 afterwards.  So we need to either prevent it during registration, or to
 gracefully handle it afterwards.
 

Yeah, I was thinking about that after I initially responded to Gleb.

I am thinking something along these lines:

Provide a function that lets you query a GSI for whether it supports
LOCKLESS or not.  Then we can either do one of two things:

1) Check for the LOCKLESS attribute at irqfd registration, fail if not
present

2) Cache the LOCKLESS attribute in the irqfd structure, and either go
direct or defer to a workqueue depending on the flag.

Thoughts?
-Greg



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 1/2] kvm-kmod: Use the main development tree of kvm as Linux submodule

2009-10-22 Thread Avi Kivity

On 10/20/2009 02:11 PM, wolfgang.maue...@siemens.com wrote:

From: Wolfgang Mauererwolfgang.maue...@siemens.com

Most people won't have the sources installed in the path
that is the current default setting.

--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
  [submodule linux-2.6]
path = linux-2.6
-   url = ../kvm.git
+   url = git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git
   


'../kvm.git' is interpreted relative to the origin url, which is usually 
git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm-kmod.git, so it 
all works out (including when kvm-kmod.git was cloned using the http 
protocol, or from a mirror).


What exactly are you trying to fix?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH 1/2] KVM: Directly inject interrupts via irqfd

2009-10-22 Thread Avi Kivity

On 10/22/2009 05:14 PM, Gregory Haskins wrote:

Yeah, I was thinking about that after I initially responded to Gleb.

I am thinking something along these lines:

Provide a function that lets you query a GSI for whether it supports
LOCKLESS or not.  Then we can either do one of two things:

1) Check for the LOCKLESS attribute at irqfd registration, fail if not
present
   


This is the most practical path and leads to the smallest code.  However 
it has the deficiency of exposing internal implementation details to 
userspace.  In theory userspace could use msi and edge-triggered 
pic/ioapic interrupts equally well, it shouldn't have to know that we 
didn't bother to lockfree ioapic/pic.



2) Cache the LOCKLESS attribute in the irqfd structure, and either go
direct or defer to a workqueue depending on the flag.
   


While this leads to larger code, it is more consistent.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [ANNOUNCE] Sheepdog: Distributed Storage System for KVM

2009-10-22 Thread Avi Kivity

On 10/21/2009 07:13 AM, MORITA Kazutaka wrote:

Hi everyone,

Sheepdog is a distributed storage system for KVM/QEMU. It provides
highly available block level storage volumes to VMs like Amazon EBS.
Sheepdog supports advanced volume management features such as snapshot,
cloning, and thin provisioning. Sheepdog runs on several tens or hundreds
of nodes, and the architecture is fully symmetric; there is no central
node such as a meta-data server.


Very interesting!  From a very brief look at the code, it looks like the 
sheepdog block format driver is a network client that is able to access 
highly available images, yes?


If so, is it reasonable to compare this to a cluster file system setup 
(like GFS) with images as files on this filesystem?  The difference 
would be that clustering is implemented in userspace in sheepdog, but in 
the kernel for a clustering filesystem.


How is load balancing implemented?  Can you move an image transparently 
while a guest is running?  Will an image be moved closer to its guest?  
Can you stripe an image across nodes?


Do you support multiple guests accessing the same image?

What about fault tolerance - storing an image redundantly on multiple nodes?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH 1/2] KVM: Directly inject interrupts via irqfd

2009-10-22 Thread Gregory Haskins
Avi Kivity wrote:
 On 10/22/2009 05:14 PM, Gregory Haskins wrote:
 Yeah, I was thinking about that after I initially responded to Gleb.

 I am thinking something along these lines:

 Provide a function that lets you query a GSI for whether it supports
 LOCKLESS or not.  Then we can either do one of two things:

 1) Check for the LOCKLESS attribute at irqfd registration, fail if not
 present

 
 This is the most practical path and leads to the smallest code.  However
 it has the deficiency of exposing internal implementation details to
 userspace.  In theory userspace could use msi and edge-triggered
 pic/ioapic interrupts equally well, it shouldn't have to know that we
 didn't bother to lockfree ioapic/pic.
 
 2) Cache the LOCKLESS attribute in the irqfd structure, and either go
 direct or defer to a workqueue depending on the flag.

 
 While this leads to larger code, it is more consistent.
 

Yeah, I think you are right.  Consider these two patches retracted, and
I will rewrite it with this concept in place.

Kind Regards,
-Greg



signature.asc
Description: OpenPGP digital signature


[PATCH -tip] sched, cpuacct: fix niced guest time accounting

2009-10-22 Thread Ryota Ozaki
Hi list,
(CC: Ingo and Avi)

CPU time of a guest is always accounted in 'user' time
without concern for the nice value of its counterpart
process although the guest is scheduled under the nice
value.

This patch fixes the defect and accounts cpu time of
a niced guest in 'nice' time as same as a niced process.

And also the patch adds 'guest_nice' to cpuacct. The
value provides niced guest cpu time which is like 'nice'
to 'user'.

This patch has already reviewed and acked by Avi on KVM ML.

The original discussions can be found here.
http://www.mail-archive.com/kvm@vger.kernel.org/msg23982.html
http://www.mail-archive.com/kvm@vger.kernel.org/msg23860.html

Thanks,
  ozaki-r


From 2be4eb881b35a879b15f4ac58f117455d10535fa Mon Sep 17 00:00:00 2001
From: Ryota Ozaki ozaki.ry...@gmail.com
Date: Tue, 20 Oct 2009 22:41:12 +0900
Subject: [PATCH] sched, cpuacct: fix niced guest time accounting

CPU time of a guest is always accounted in 'user' time
without concern for the nice value of its counterpart
process although the guest is scheduled under the nice
value.

This patch fixes the defect and accounts cpu time of
a niced guest in 'nice' time as same as a niced process.

And also the patch adds 'guest_nice' to cpuacct. The
value provides niced guest cpu time which is like 'nice'
to 'user'.

Signed-off-by: Ryota Ozaki ozaki.ry...@gmail.com
Acked-by: Avi Kivity a...@redhat.com
---
 Documentation/filesystems/proc.txt |3 ++-
 fs/proc/stat.c |   17 +++--
 include/linux/kernel_stat.h|1 +
 kernel/sched.c |9 +++--
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/Documentation/filesystems/proc.txt
b/Documentation/filesystems/proc.txt
index 2c48f94..4af0018 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1072,7 +1072,8 @@ second).  The meanings of the columns are as
follows, from left to right:
 - irq: servicing interrupts
 - softirq: servicing softirqs
 - steal: involuntary wait
-- guest: running a guest
+- guest: running a normal guest
+- guest_nice: running a niced guest

 The intr line gives counts of interrupts  serviced since boot time, for each
 of the  possible system interrupts.   The first  column  is the  total of  all
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 7cc726c..67c30a7 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
int i, j;
unsigned long jif;
cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
-   cputime64_t guest;
+   cputime64_t guest, guest_nice;
u64 sum = 0;
u64 sum_softirq = 0;
unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v)

user = nice = system = idle = iowait =
irq = softirq = steal = cputime64_zero;
-   guest = cputime64_zero;
+   guest = guest_nice = cputime64_zero;
getboottime(boottime);
jif = boottime.tv_sec;

@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v)
softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
+   guest_nice = cputime64_add(guest_nice,
+   kstat_cpu(i).cpustat.guest_nice);
for_each_irq_nr(j) {
sum += kstat_irqs_cpu(j, i);
}
@@ -65,7 +67,7 @@ static int show_stat(struct seq_file *p, void *v)
}
sum += arch_irq_stat();

-   seq_printf(p, cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu¥n,
+   seq_printf(p, cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu 
%llu¥n,
(unsigned long long)cputime64_to_clock_t(user),
(unsigned long long)cputime64_to_clock_t(nice),
(unsigned long long)cputime64_to_clock_t(system),
@@ -74,7 +76,8 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(irq),
(unsigned long long)cputime64_to_clock_t(softirq),
(unsigned long long)cputime64_to_clock_t(steal),
-   (unsigned long long)cputime64_to_clock_t(guest));
+   (unsigned long long)cputime64_to_clock_t(guest),
+   (unsigned long long)cputime64_to_clock_t(guest_nice));
for_each_online_cpu(i) {

/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
@@ -88,8 +91,9 @@ static int show_stat(struct seq_file *p, void *v)
softirq = kstat_cpu(i).cpustat.softirq;
steal = kstat_cpu(i).cpustat.steal;
guest = kstat_cpu(i).cpustat.guest;
+   guest_nice = kstat_cpu(i).cpustat.guest_nice;
seq_printf(p,
-   cpu%d %llu %llu %llu 

Re: [PATCH 5/5] Nested VMX patch 5 implements vmlaunch and vmresume

2009-10-22 Thread Orit Wasserman


Gleb Natapov g...@redhat.com wrote on 22/10/2009 11:04:58:

 From:

 Gleb Natapov g...@redhat.com

 To:

 Orit Wasserman/Haifa/i...@ibmil

 Cc:

 Abel Gordon/Haifa/i...@ibmil, aligu...@us.ibm.com, Ben-Ami Yassour1/
 Haifa/i...@ibmil, kvm@vger.kernel.org, md...@us.ibm.com, Muli Ben-
 Yehuda/Haifa/i...@ibmil

 Date:

 22/10/2009 11:05

 Subject:

 Re: [PATCH 5/5] Nested VMX patch 5 implements vmlaunch and vmresume

 On Wed, Oct 21, 2009 at 04:43:44PM +0200, Orit Wasserman wrote:
@@ -4641,10 +4955,13 @@ static void vmx_complete_interrupts(struct
   vcpu_vmx *vmx)
int type;
bool idtv_info_valid;
   
-   exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
vmx-exit_reason = vmcs_read32(VM_EXIT_REASON);
   
+   if (vmx-nested.nested_mode)
+  return;
+
   Why return here? What the function does that should not be done in
   nested mode?
  In nested mode L0 injects an interrupt to L2 only in one scenario,
  if there is an IDT_VALID event and L0 decides to run L2 again and not
to
  switch back to L1.
  In all other cases the injection is handled by L1.
 This is exactly the kind of scenario that is handled by
 vmx_complete_interrupts(). (vmx|svm)_complete_interrups() store
 pending event in arch agnostic way and re-injection is handled by
 x86.c You bypass this logic by inserting return here and introducing
 nested_handle_valid_idt() function below.
The only location we can truly know if we are switching to L1 is in
vmx_vcpu_run
because enable_irq_window (that is called after handling the exit) can
decide to
switch to L1 because of an interrupt.
In order to simplify our code it was simpler to bypass
vmx_complete_interrupts when it is called (after
running L2) and to add nested_handle_valid_idt just before running L2.
  
+   exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
/* Handle machine checks before interrupts are enabled */
if ((vmx-exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
|| (vmx-exit_reason == EXIT_REASON_EXCEPTION_NMI
@@ -4747,6 +5064,60 @@ static void fixup_rmode_irq(struct vcpu_vmx
  *vmx)
   | vmx-rmode.irq.vector;
 }
   
+static int nested_handle_valid_idt(struct kvm_vcpu *vcpu)
+{
   It seems by this function you are trying to bypass general event
   reinjection logic. Why?
  See above.
 The logic implemented by this function is handled in x86.c in arch
 agnostic way. Is there something wrong with this?
See my comment before

+   vmx-launched = vmx-nested.l2_state-launched;
+
   Can you explain why -launched logic is needed?
  It is possible L1 called vmlaunch but we didn't actually run L2 (for
  example there was an interrupt and
  enable_irq_window switched back to L1 before running L2). L1 thinks the
  vmlaunch was successful and call vmresume in the next time
  but KVM needs to call vmlaunch for L2.
 handle_vmlauch() and handle_vmresume() are exactly the same. Why KVM
needs
 to run one and not the other?
Yes they are very similar (almost the same code) the only difference is the
check of vmclear,
we need to emulate the vmx hardware behavior for those two commands and
check VMC12 state.

+static int nested_vmx_vmexit(struct kvm_vcpu *vcpu,
+  bool is_interrupt)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   int initial_pfu_active = vcpu-fpu_active;
+
+   if (!vmx-nested.nested_mode) {
+  printk(KERN_INFO WARNING: %s called but not in nested mode
\n,
+ __func__);
+  return 0;
+   }
+
+   save_msrs(vmx-guest_msrs, vmx-save_nmsrs);
+
+   sync_cached_regs_to_vmcs(vcpu);
+
+   if (!nested_map_shadow_vmcs(vcpu)) {
+  printk(KERN_INFO Error mapping shadow vmcs\n);
+  set_rflags_to_vmx_fail_valid(vcpu);
   Error during vmexit should set abort flag, not change flags.
  I think this is more a vmlaunch/vmresume error (in the code that switch
  back to L1).
 How is this vmlaunch/vmresume error? This function is called to exit
 from L2 guest while on L2 vcms. It is called asynchronously in respect
 to L2 guest and you modify L2 guest rflags register at unpredictable
 place here.
OK.

 --
  Gleb.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] KVM test: Add new utility functions to kvm_utils

2009-10-22 Thread Lucas Meneghel Rodrigues
Some distributors ship CD and DVD files with SHA1 hash sums instead
of MD5 hash sums, so let's extend the kvm_utils functions to
evaluate and compare SHA1 hashes:

* hash_file(): Calculate a hash sum for a file, supports SHA1 check as well,
replaces md5sum_file()
* unmap_url_cache(): Reimplementation of a function present on
autotest utils that downloads a file and caches it. The reason
I'm keeping it is that I want more testing before I move all
needed function definitions to the autotest API

Also, extend VM.create() method to support SHA1 check.

2nd try, addressing Uri's comments.

Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com
---
 client/tests/kvm/calc_md5sum_1m.py |2 +-
 client/tests/kvm/kvm_utils.py  |  105 
 client/tests/kvm/kvm_vm.py |   20 +---
 3 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/client/tests/kvm/calc_md5sum_1m.py 
b/client/tests/kvm/calc_md5sum_1m.py
index 2325673..153a1e0 100755
--- a/client/tests/kvm/calc_md5sum_1m.py
+++ b/client/tests/kvm/calc_md5sum_1m.py
@@ -18,4 +18,4 @@ else:
 if not os.access(fname, os.F_OK) or not os.access(fname, os.R_OK):
 print 'bad file name or permissions'
 else:
-print kvm_utils.md5sum_file(fname, 1024*1024)
+print kvm_utils.hash_file(fname, 1024*1024, method=md5)
diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
index 53b664a..f72984a 100644
--- a/client/tests/kvm/kvm_utils.py
+++ b/client/tests/kvm/kvm_utils.py
@@ -4,8 +4,8 @@ KVM test utility functions.
 @copyright: 2008-2009 Red Hat Inc.
 
 
-import md5, thread, subprocess, time, string, random, socket, os, signal, pty
-import select, re, logging, commands, cPickle
+import md5, sha, thread, subprocess, time, string, random, socket, os, signal
+import select, re, logging, commands, cPickle, pty
 from autotest_lib.client.bin import utils
 from autotest_lib.client.common_lib import error
 import kvm_subprocess
@@ -760,23 +760,35 @@ def wait_for(func, timeout, first=0.0, step=1.0, 
text=None):
 return None
 
 
-def md5sum_file(filename, size=None):
+def hash_file(filename, size=None, method=md5):
 
-Calculate the md5sum of filename.
+Calculate the hash of filename.
 If size is not None, limit to first size bytes.
 Throw exception if something is wrong with filename.
 Can be also implemented with bash one-liner (assuming size%1024==0):
-dd if=filename bs=1024 count=size/1024 | md5sum -
+dd if=filename bs=1024 count=size/1024 | sha1sum -
 
-@param filename: Path of the file that will have its md5sum calculated.
-@param returns: md5sum of the file.
+@param filename: Path of the file that will have its hash calculated.
+@param method: Method used to calculate the hash. Supported methods:
+* md5
+* sha1
+@returns: Hash of the file, if something goes wrong, return None.
 
 chunksize = 4096
 fsize = os.path.getsize(filename)
-if not size or sizefsize:
+
+if not size or size  fsize:
 size = fsize
 f = open(filename, 'rb')
-o = md5.new()
+
+if method == md5:
+hash = md5.new()
+elif method == sha1:
+hash = sha.new()
+else:
+logging.error(Unknown hash type %s, returning None % method)
+return None
+
 while size  0:
 if chunksize  size:
 chunksize = size
@@ -784,7 +796,78 @@ def md5sum_file(filename, size=None):
 if len(data) == 0:
 logging.debug(Nothing left to read but size=%d % size)
 break
-o.update(data)
+hash.update(data)
 size -= len(data)
 f.close()
-return o.hexdigest()
+return hash.hexdigest()
+
+
+def get_hash_from_file(hash_path, dvd_basename):
+
+Get the a hash from a given DVD image from a hash file
+(Hash files are usually named MD5SUM or SHA1SUM and are located inside the
+download directories of the DVDs)
+
+@param hash_path: Local path to a hash file.
+@param cd_image: Basename of a CD image
+
+hash_file = open(hash_path, 'r')
+for line in hash_file.readlines():
+if dvd_basename in line:
+return line.split()[0]
+
+
+def unmap_url_cache(cachedir, url, expected_hash, method=md5):
+
+Downloads a file from a URL to a cache directory. If the file is already
+at the expected position and has the expected hash, let's not download it
+again.
+
+@param cachedir: Directory that might hold a copy of the file we want to
+download.
+@param url: URL for the file we want to download.
+@param expected_hash: Hash string that we expect the file downloaded to
+have.
+@param method: Method used to calculate the hash string (md5, sha1).
+
+# Let's convert cachedir to a canonical path, if it's not already
+cachedir = os.path.realpath(cachedir)
+if not os.path.isdir(cachedir):
+try:
+

Re: [PATCH 1/2] kvm-kmod: Use the main development tree of kvm as Linux submodule

2009-10-22 Thread Wolfgang Mauerer
Hi,

Avi Kivity wrote:
 On 10/20/2009 02:11 PM, wolfgang.maue...@siemens.com wrote:
 From: Wolfgang Mauererwolfgang.maue...@siemens.com

 Most people won't have the sources installed in the path
 that is the current default setting.

 --- a/.gitmodules
 +++ b/.gitmodules
 @@ -1,3 +1,3 @@
   [submodule linux-2.6]
  path = linux-2.6
 -url = ../kvm.git
 +url = git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git

 
 '../kvm.git' is interpreted relative to the origin url, which is usually 
 git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm-kmod.git, so it 
 all works out (including when kvm-kmod.git was cloned using the http 
 protocol, or from a mirror).
 
 What exactly are you trying to fix?

consider that you clone kvm-kmod from your repo and
then create local clones from which you do the build. If you don't
happen to have /path/to/my/kvm-kmod/../kvm, then the relative
submodule URL won't work, but the absolute one will.

Since the absolute URL wouldn't break anything AFAIK, but
makes this scenario work, I'd favour it, although my
life would not be much worse with the relative default
URL ;-)

Cheers, Wolfgang
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] kvm-kmod: Use the main development tree of kvm as Linux submodule

2009-10-22 Thread Avi Kivity

On 10/22/2009 06:13 PM, Wolfgang Mauerer wrote:

Hi,

Avi Kivity wrote:
   

On 10/20/2009 02:11 PM, wolfgang.maue...@siemens.com wrote:
 

From: Wolfgang Mauererwolfgang.maue...@siemens.com

Most people won't have the sources installed in the path
that is the current default setting.

--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
   [submodule linux-2.6]
path = linux-2.6
-   url = ../kvm.git
+   url = git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git

   

'../kvm.git' is interpreted relative to the origin url, which is usually
git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm-kmod.git, so it
all works out (including when kvm-kmod.git was cloned using the http
protocol, or from a mirror).

What exactly are you trying to fix?
 

consider that you clone kvm-kmod from your repo and
then create local clones from which you do the build. If you don't
happen to have /path/to/my/kvm-kmod/../kvm, then the relative
submodule URL won't work, but the absolute one will.
   


Typically you create the clones using 'git submodule init  git 
submodule update', not clone them manually.



Since the absolute URL wouldn't break anything AFAIK, but
makes this scenario work, I'd favour it, although my
life would not be much worse with the relative default
URL ;-)
   


You can still make it work by editing .git/config and updating the URLs.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] v4: use upstream kvm_vcpu_ioctl

2009-10-22 Thread Marcelo Tosatti
On Tue, Oct 20, 2009 at 04:39:04PM -0200, Glauber Costa wrote:
 [v2: we already return -errno, so fix testers ]
 [v3: keep error message for apic related failures ]
 [v4: fix silly compile mistake ]
 
 Signed-off-by: Glauber Costa glom...@redhat.com

Applied, thanks (please mind the indentation).

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] kvm-kmod: Use the main development tree of kvm as Linux submodule

2009-10-22 Thread Wolfgang Mauerer
Avi Kivity wrote:
 On 10/22/2009 06:13 PM, Wolfgang Mauerer wrote:
 Hi,

 Avi Kivity wrote:

 On 10/20/2009 02:11 PM, wolfgang.maue...@siemens.com wrote:
  
 From: Wolfgang Mauererwolfgang.maue...@siemens.com

 Most people won't have the sources installed in the path
 that is the current default setting.

 --- a/.gitmodules
 +++ b/.gitmodules
 @@ -1,3 +1,3 @@
[submodule linux-2.6]
path = linux-2.6
 -  url = ../kvm.git
 +  url = git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git


 '../kvm.git' is interpreted relative to the origin url, which is usually
 git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm-kmod.git, so it
 all works out (including when kvm-kmod.git was cloned using the http
 protocol, or from a mirror).

 What exactly are you trying to fix?
  
 consider that you clone kvm-kmod from your repo and
 then create local clones from which you do the build. If you don't
 happen to have /path/to/my/kvm-kmod/../kvm, then the relative
 submodule URL won't work, but the absolute one will.

 
 Typically you create the clones using 'git submodule init  git 
 submodule update', not clone them manually.
But that refers to linux-2.6/, I suppose, while I was talking
about kvm-kmod.
 
 Since the absolute URL wouldn't break anything AFAIK, but
 makes this scenario work, I'd favour it, although my
 life would not be much worse with the relative default
 URL ;-)

 
 You can still make it work by editing .git/config and updating the URLs.
 
that's certainly true. I'd say it's a matter of taste, so
I'll just add a hint about .git/config to the README and leave
.gitmodules unmolested in the future.

Cheers, Wolfgang
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] allow userspace to adjust kvmclock offset

2009-10-22 Thread Avi Kivity

On 10/15/2009 04:58 PM, Glauber Costa wrote:



The motivation for relative adjustment is when you have a jitter
resistant place to gather timing information (like the kernel, which can
disable interrupts and preemption), then pass it on to kvm without
losing information due to scheduling.  For migration there is no such
place since it involves two hosts, but it makes sense to support
relative adjustments.
 

Since we added the padding you asked for, we could use that bit of information
to define whether it will be a relative or absolute adjustment, then. Right now,
I don't see the point of implementing a code path that will be completely 
untested.

I'd leave it this way until someone comes up with a need.
   


I agree with that, but padding by itself is insufficient.  You also need 
a flags field.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [ANNOUNCE] Sheepdog: Distributed Storage System for KVM

2009-10-22 Thread Anthony Liguori

Avi Kivity wrote:

On 10/21/2009 07:13 AM, MORITA Kazutaka wrote:

Hi everyone,

Sheepdog is a distributed storage system for KVM/QEMU. It provides
highly available block level storage volumes to VMs like Amazon EBS.
Sheepdog supports advanced volume management features such as snapshot,
cloning, and thin provisioning. Sheepdog runs on several tens or 
hundreds

of nodes, and the architecture is fully symmetric; there is no central
node such as a meta-data server.


Very interesting!  From a very brief look at the code, it looks like 
the sheepdog block format driver is a network client that is able to 
access highly available images, yes?


If so, is it reasonable to compare this to a cluster file system setup 
(like GFS) with images as files on this filesystem?  The difference 
would be that clustering is implemented in userspace in sheepdog, but 
in the kernel for a clustering filesystem.


I'm still in the process of reading the code, but that's the impression 
I got too.  It made me think that the protocol for qemu to communicate 
with sheepdog could be a filesystem protocol (like 9p) and sheepdog 
could expose itself as a synthetic.  There are some interesting 
ramifications to something like that--namely that you could mount 
sheepdog on localhost and interact with it through the vfs.


Very interesting stuff, I'm looking forward to examining more closely.

Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Shirley Ma
On Thu, 2009-10-22 at 15:13 +0200, Michael S. Tsirkin wrote:
 OK, I sent a patch that should fix the errors for you.
 Could you please confirm, preferably on-list, whether
 the patch makes the errors go away for you with
 userspace virtio?

Confirmed, your patch has fixed irq handler mismatch errors.

 However, as I said previously, it's good to fix them but I think
 they are unrelated to the fact that vhost does not work
 for you.

Yes, agreed. One observation is when I enable PCI MSI in guest kernel, I
found that even without vhost supportin host kernel the network doesn't
work either. So I think this is nothing related to vhost. I need to find
why PCI MSI doesn't work for me.

 Shirley, if you have the time, please send details
 on your setup and observations on tcpdump output
 in both guest and host with vhost enabled.

Yes, here are details:

HW and distros:
T61 laptop, cpu is 
vendor_id   : GenuineIntel
cpu family  : 6
model   : 15
model name  : Intel(R) Core(TM)2 Duo CPU T7700  @ 2.40GHz
stepping: 11
cpu MHz : 2393.902
cache size  : 4096 KB

Guest installed Fedora 10, Host installed Fedora 10 and updated to
Fedora11, both doesn't work with guest kernel MSI enabled w/i, w/o vhost
support.


Source trees:
clone your qemu-kvm and vhost git. host kernel is vhost git, guest
kernel 2.6.32-rc5

Compile option:
qemu: ./configure (default target-list = x86_64-softmmu)
guest kernel: PCI MSI, virio enabled
host kernel: vhost, kvm, evenfd all enabled

I tried tap, raw, vnet0, eth0, here are some qemu commandline example:

mst/qemu-kvm/x86_64-softmmu/qemu-system-x86_64
-s /home/xma/images/fedora10-2-vm -name guest2 -boot c -M pc -m 512 -smp
1 -monitor pty -drive
file=/home/xma/images/fedora10-2-vm,if=virtio,boot=on -net
raw,ifname=eth0 -net nic,macaddr=54:52:00:46:26:80,model=virtio,vhost

mst/qemu-kvm/x86_64-softmmu/qemu-system-x86_64
-s /home/xma/images/fedora10-2-vm -name guest2 -boot c -M pc -m 512 -smp
1 -monitor pty -drive
file=/home/xma/images/fedora10-2-vm,if=virtio,boot=on -net
raw,ifname=vnet0 -net nic,model,macaddr=54:52:00:46:26:80=virtio,vhost 

Please let me know what I can do next. I am eager to test the vhost
performance for both latency and bw.

tcpdump output:
When I ping from guest to host, I didn't see any packets from any
interfaces, not even an arp request.

When I ping from host to guest, guest interface didn't see any packets
either, not even an arp request.

Thanks for your help!
Shirley





--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Shirley Ma
On Thu, 2009-10-22 at 10:23 -0700, Shirley Ma wrote:
 Yes, agreed. One observation is when I enable PCI MSI in guest kernel,
 I
 found that even without vhost supportin host kernel the network
 doesn't
 work either. So I think this is nothing related to vhost. I need to
 find
 why PCI MSI doesn't work for me.

Let me clarify this, here is the table:

1. upstream qemu + guest kernel MSI support --- doesn't work
2. upstream qemu + guest kernel no MSI support --- work
3. mst qemu git tree + guest kernel MSI support - vhost --- work
4. mst qemu git tree + guest kernel MSI support + vhost -- doesn't work

I hope it's clear.

thanks
Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Michael S. Tsirkin
On Thu, Oct 22, 2009 at 10:32:55AM -0700, Shirley Ma wrote:
 On Thu, 2009-10-22 at 10:23 -0700, Shirley Ma wrote:
  Yes, agreed. One observation is when I enable PCI MSI in guest kernel,
  I
  found that even without vhost supportin host kernel the network
  doesn't
  work either. So I think this is nothing related to vhost. I need to
  find
  why PCI MSI doesn't work for me.
 
 Let me clarify this, here is the table:
 
 1. upstream qemu + guest kernel MSI support --- doesn't work
 2. upstream qemu + guest kernel no MSI support --- work
 3. mst qemu git tree + guest kernel MSI support - vhost --- work
 4. mst qemu git tree + guest kernel MSI support + vhost -- doesn't work

Upstream is Avi's qemu-kvm.git?
So, for a moment taking vhost out of the equation, it seems that MSI was
broken in Avi's tree again, after I forked my tree?


 
 I hope it's clear.
 
 thanks
 Shirley
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Shirley Ma
On Thu, 2009-10-22 at 19:36 +0200, Michael S. Tsirkin wrote:
 Upstream is Avi's qemu-kvm.git?
 So, for a moment taking vhost out of the equation, it seems that MSI
 was
 broken in Avi's tree again, after I forked my tree?

The upper stream qemu git tree never worked for me w/i MSI, the boot
hung on booting. I tried to debug this, hasn't found any hint. But
Sridhar has same configure, it works for him.

But with your git tree, guest kernel works without vhost; it doesn't
work with vhost. So the vhost might have some issues.

Thanks
Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Michael S. Tsirkin
On Thu, Oct 22, 2009 at 10:23:44AM -0700, Shirley Ma wrote:
 On Thu, 2009-10-22 at 15:13 +0200, Michael S. Tsirkin wrote:
  OK, I sent a patch that should fix the errors for you.
  Could you please confirm, preferably on-list, whether
  the patch makes the errors go away for you with
  userspace virtio?
 
 Confirmed, your patch has fixed irq handler mismatch errors.
 
  However, as I said previously, it's good to fix them but I think
  they are unrelated to the fact that vhost does not work
  for you.
 
 Yes, agreed. One observation is when I enable PCI MSI in guest kernel, I
 found that even without vhost supportin host kernel the network doesn't
 work either.

Yes, but that's on Avi's qemu-kvm.git, so probably merge problems again.

 So I think this is nothing related to vhost. I need to find
 why PCI MSI doesn't work for me.

Probably just bisect until you find the bad commit.

  Shirley, if you have the time, please send details
  on your setup and observations on tcpdump output
  in both guest and host with vhost enabled.
 
 Yes, here are details:
 
 HW and distros:
 T61 laptop, cpu is 
 vendor_id : GenuineIntel
 cpu family: 6
 model : 15
 model name: Intel(R) Core(TM)2 Duo CPU T7700  @ 2.40GHz
 stepping  : 11
 cpu MHz   : 2393.902
 cache size: 4096 KB
 
 Guest installed Fedora 10, Host installed Fedora 10 and updated to
 Fedora11, both doesn't work with guest kernel MSI enabled w/i, w/o vhost
 support.

So, IIUC, my tree with MSI without vhost works?

 
 Source trees:
 clone your qemu-kvm and vhost git. host kernel is vhost git, guest
 kernel 2.6.32-rc5
 
 Compile option:
 qemu: ./configure (default target-list = x86_64-softmmu)
 guest kernel: PCI MSI, virio enabled
 host kernel: vhost, kvm, evenfd all enabled
 
 I tried tap, raw, vnet0, eth0,

what are vnet0 and eth0?
Do you have a bridge in host?
What do brctl show and ifconfig -a show in host?

 here are some qemu commandline example:
 
 mst/qemu-kvm/x86_64-softmmu/qemu-system-x86_64
 -s /home/xma/images/fedora10-2-vm -name guest2 -boot c -M pc -m 512 -smp
 1 -monitor pty -drive
 file=/home/xma/images/fedora10-2-vm,if=virtio,boot=on -net
 raw,ifname=eth0 -net nic,macaddr=54:52:00:46:26:80,model=virtio,vhost
 
 mst/qemu-kvm/x86_64-softmmu/qemu-system-x86_64
 -s /home/xma/images/fedora10-2-vm -name guest2 -boot c -M pc -m 512 -smp
 1 -monitor pty -drive
 file=/home/xma/images/fedora10-2-vm,if=virtio,boot=on -net
 raw,ifname=vnet0 -net nic,model,macaddr=54:52:00:46:26:80=virtio,vhost 
 
 Please let me know what I can do next. I am eager to test the vhost
 performance for both latency and bw.
 tcpdump output:
 When I ping from guest to host, I didn't see any packets from any
 interfaces, not even an arp request.
 
 When I ping from host to guest, guest interface didn't see any packets
 either, not even an arp request.

Did you try tcpdump on the tap interface in host?

 Thanks for your help!
 Shirley
 
 
 
 

Possibly we'll have to debug this in vhost in host kernel.
I would debug this directly, it's just that my setup is somehow
different and I do not see this issue, otherwise I would not
waste your time.

Can we add some printks?
handle_tx has this at the top:

if (!sock || !sock_writeable(sock-sk))
return;

Could you please add

 printk(KERN_ERR %s: !sock = %d, !sock || !sock_writeable(sock-sk) =
%d,
__func__, !sock , !sock || !sock_writeable(sock-sk));

*Before* these checks?
Then make modules modules_install
rmmod vhost_net
insmod vhost_net
and re-run.
If you want me to send a patch that does this, let me know please.

Thanks!
-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Michael S. Tsirkin
On Thu, Oct 22, 2009 at 10:44:29AM -0700, Shirley Ma wrote:
 On Thu, 2009-10-22 at 19:36 +0200, Michael S. Tsirkin wrote:
  Upstream is Avi's qemu-kvm.git?
  So, for a moment taking vhost out of the equation, it seems that MSI
  was
  broken in Avi's tree again, after I forked my tree?
 
 The upper stream qemu git tree never worked for me w/i MSI, the boot
 hung on booting. I tried to debug this, hasn't found any hint. But
 Sridhar has same configure, it works for him.
 
 But with your git tree, guest kernel works without vhost;

What happens if you reset my tree to commit
47e465f031fc43c53ea8f08fa55cc3482c6435c8?

 it doesn't work with vhost.
 So the vhost might have some issues.

Looks like there are 2 issues:
- upstream issue
- vhost issue
Let's try to debug both.

 Thanks
 Shirley
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Shirley Ma
On Thu, 2009-10-22 at 19:47 +0200, Michael S. Tsirkin wrote:
 What happens if you reset my tree to commit
 47e465f031fc43c53ea8f08fa55cc3482c6435c8?

I am going to clean up my upperstream git tree and retest first. Then I
will try back up this commit.

 Looks like there are 2 issues:
 - upstream issue
 - vhost issue
 Let's try to debug both.

Sounds good.

Thanks
Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Sridhar Samudrala
On Thu, 2009-10-22 at 19:43 +0200, Michael S. Tsirkin wrote:

 
 Possibly we'll have to debug this in vhost in host kernel.
 I would debug this directly, it's just that my setup is somehow
 different and I do not see this issue, otherwise I would not
 waste your time.
 
 Can we add some printks?
 handle_tx has this at the top:
 
 if (!sock || !sock_writeable(sock-sk))
 return;

I added some debug printks in handle_rx and handle_tx
get_user() calls are failing with EFAULT.

Oct 21 10:51:41 IBM-19B5263ED41-009047018100 kernel: handle_rx: net:f5494800
Oct 21 10:51:41 IBM-19B5263ED41-009047018100 kernel: Failed to enable 
notification: -14
Oct 21 10:51:41 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:51:41 IBM-19B5263ED41-009047018100 kernel: Failed to disable 
notification: -14
Oct 21 10:51:41 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:51:41 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:51:41 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:51:48 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:51:48 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:51:49 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:51:49 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:51:53 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:51:53 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:52:03 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:52:03 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:52:22 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:52:22 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:52:23 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:52:23 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:53:17 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:53:17 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14
Oct 21 10:56:56 IBM-19B5263ED41-009047018100 kernel: handle_tx: net:f5494800
Oct 21 10:56:56 IBM-19B5263ED41-009047018100 kernel: Failed to access avail idx 
at 0002 ret:-14

Thanks
Sridhar
 Could you please add
 
  printk(KERN_ERR %s: !sock = %d, !sock || !sock_writeable(sock-sk) =
 %d,
 __func__, !sock , !sock || !sock_writeable(sock-sk));
 
 *Before* these checks?
 Then make modules modules_install
 rmmod vhost_net
 insmod vhost_net
 and re-run.
 If you want me to send a patch that does this, let me know please.
 
 Thanks!

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost-net patches

2009-10-22 Thread Shirley Ma
On Thu, 2009-10-22 at 10:56 -0700, Shirley Ma wrote:
 On Thu, 2009-10-22 at 19:47 +0200, Michael S. Tsirkin wrote:
  What happens if you reset my tree to commit
  47e465f031fc43c53ea8f08fa55cc3482c6435c8?
 
 I am going to clean up my upperstream git tree and retest first. Then
 I
 will try back up this commit.

Cleaned my local qemu upperstream git tree and guest kernel works with
MSI enabled.

So only vhost does seem to have some issues in my configuration. 

Thanks
Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm: sigsegv at exit

2009-10-22 Thread Marcelo Tosatti
On Thu, Oct 22, 2009 at 02:00:15PM +0200, Michael S. Tsirkin wrote:
 Hi!
 I'm sometimes getting segfaults when I kill qemu.
 This time I caught it when qemu was under gdb:
 
 
 Program received signal SIGSEGV, Segmentation fault.
 [Switching to Thread 0x411d0940 (LWP 14446)]
 0x0040afb4 in qemu_mod_timer (ts=0x19f0fd0, expire_time=62275467335)
 at /home/mst/scm/qemu-kvm/vl.c:1009
 1009if ((alarm_timer-flags  ALARM_FLAG_EXPIRED) == 0) {
 (gdb) l
 1004ts-next = *pt;
 1005*pt = ts;
 1006
 1007/* Rearm if necessary  */
 1008if (pt == active_timers[ts-clock-type]) {
 1009if ((alarm_timer-flags  ALARM_FLAG_EXPIRED) == 0) {
 1010qemu_rearm_alarm_timer(alarm_timer);
 1011}
 1012/* Interrupt execution to force deadline recalculation.  */
 1013if (use_icount)
 (gdb) p alarm_timer
 $1 = (struct qemu_alarm_timer *) 0x0
 (gdb) where
 #0  0x0040afb4 in qemu_mod_timer (ts=0x19f0fd0, 
 expire_time=62275467335)
 at /home/mst/scm/qemu-kvm/vl.c:1009
 #1  0x0041aadf in virtio_net_handle_tx (vdev=value optimized out, 
 vq=0x19f5af0)
 at /home/mst/scm/qemu-kvm/hw/virtio-net.c:696
 #2  0x00421669 in kvm_run (vcpu=0x19d46a0, env=0x19c2250) at 
 /home/mst/scm/qemu-kvm/qemu-kvm.c:797
 #3  0x004216d6 in kvm_cpu_exec (env=0x83d0f8) at 
 /home/mst/scm/qemu-kvm/qemu-kvm.c:1714
 #4  0x00422981 in ap_main_loop (_env=value optimized out) at 
 /home/mst/scm/qemu-kvm/qemu-kvm.c:1969
 #5  0x00377dc06367 in start_thread () from /lib64/libpthread.so.0
 #6  0x00377d0d30ad in clone () from /lib64/libc.so.6
 (gdb)
 
 So this probably means that we have already run quit_timers:
 
 static void quit_timers(void)
 {
 alarm_timer-stop(alarm_timer);
 alarm_timer = NULL;
 }
 
 but kvm vcpu thread is still running.
 
 
 Not sure what the right fix is here: should we stop
 kvm after main loop has exited?

kvm_main_loop_wait(env, 0) can process the stop request (signalling
iothread that vcpu is stopped, so its OK to exit) and continue to
kvm_cpu_exec.

Can you please try this:

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 87ece3d..141c8b1 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1931,7 +1931,8 @@ static int kvm_main_loop_cpu(CPUState *env)
 }
 if (run_cpu) {
 kvm_main_loop_wait(env, 0);
-kvm_cpu_exec(env);
+if (!is_cpu_stopped(env))
+kvm_cpu_exec(env);
 } else {
 kvm_main_loop_wait(env, 1000);
 }
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: I/O performance of VirtIO

2009-10-22 Thread Alexander Graf


Am 22.10.2009 um 18:29 schrieb Avi Kivity a...@redhat.com:


On 10/13/2009 08:35 AM, Jan Kiszka wrote:

It can be particularly slow if you use in-kernel irqchips and the
default NIC emulation (up to 10 times slower), some effect I always
wanted to understand on a rainy day. So, when you actually want -net
user, try -no-kvm-irqchip.



This might be due to a missing SIGIO or SIGALRM; -no-kvm-irqchip  
generates a lot of extra signals and thus polling opportunities.


Isn't that what dedicated io threads are supposed to solve?

Alex
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [ANNOUNCE] Sheepdog: Distributed Storage System for KVM

2009-10-22 Thread Alexander Graf


Am 22.10.2009 um 18:28 schrieb Anthony Liguori anth...@codemonkey.ws:


Avi Kivity wrote:

On 10/21/2009 07:13 AM, MORITA Kazutaka wrote:

Hi everyone,

Sheepdog is a distributed storage system for KVM/QEMU. It provides
highly available block level storage volumes to VMs like Amazon EBS.
Sheepdog supports advanced volume management features such as  
snapshot,
cloning, and thin provisioning. Sheepdog runs on several tens or  
hundreds
of nodes, and the architecture is fully symmetric; there is no  
central

node such as a meta-data server.


Very interesting!  From a very brief look at the code, it looks  
like the sheepdog block format driver is a network client that is  
able to access highly available images, yes?


If so, is it reasonable to compare this to a cluster file system  
setup (like GFS) with images as files on this filesystem?  The  
difference would be that clustering is implemented in userspace in  
sheepdog, but in the kernel for a clustering filesystem.


I'm still in the process of reading the code, but that's the  
impression I got too.  It made me think that the protocol for qemu  
to communicate with sheepdog could be a filesystem protocol (like 9p)


Speaking about 9p, what's the status there?

Alex
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM test: Fixing small bug on unattended install cfg

2009-10-22 Thread Lucas Meneghel Rodrigues
As in other tests, we should append the qemu extra
options instead of replacing them.

Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com
---
 client/tests/kvm/kvm_tests.cfg.sample |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/client/tests/kvm/kvm_tests.cfg.sample 
b/client/tests/kvm/kvm_tests.cfg.sample
index 296449d..7a12981 100644
--- a/client/tests/kvm/kvm_tests.cfg.sample
+++ b/client/tests/kvm/kvm_tests.cfg.sample
@@ -56,7 +56,7 @@ variants:
 force_create_image = yes
 pre_command = scripts/unattended.py
 floppy = images/floppy.img
-extra_params = -boot d
+extra_params +=  -boot d
 
 - setup:install
 type = steps
@@ -319,7 +319,7 @@ variants:
 md5sum = e3b1e2d1ba42aa4705fa5f41771b3927
 md5sum_1m = dc8ddf90648c247339c721395aa49714
 tftp = images/tftpboot
-extra_params = -bootp /pxelinux.0 -boot n
+extra_params +=  -bootp /pxelinux.0 -boot n
 kernel_args = ks=floppy nicdelay=60
 unattended_file = unattended/Fedora-11.ks
 
@@ -336,7 +336,7 @@ variants:
 md5sum = 9d419844adeb93120215fe7505c9bce8
 md5sum_1m = 405ee05e2387a2e4328b008d5bcbdd1e
 tftp = images/tftpboot
-extra_params = -bootp /pxelinux.0 -boot n
+extra_params +=  -bootp /pxelinux.0 -boot n
 kernel_args = ks=floppy nicdelay=60
 unattended_file = unattended/Fedora-11.ks
 
-- 
1.6.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/27] Add KVM support for Book3s_64 (PPC64) hosts v5

2009-10-22 Thread Hollis Blanchard
On Wed, 2009-10-21 at 17:03 +0200, Alexander Graf wrote:
 KVM for PowerPC only supports embedded cores at the moment.
 
 While it makes sense to virtualize on small machines, it's even more fun
 to do so on big boxes. So I figured we need KVM for PowerPC64 as well.
 
 This patchset implements KVM support for Book3s_64 hosts and guest support
 for Book3s_64 and G3/G4.

Acked-by: Hollis Blanchard holl...@us.ibm.com

Avi, please apply these patches, and one more (unrelated) to fix the
Book E build that I will send in just a moment.

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v2 0/2] irqfd enhancements

2009-10-22 Thread Gregory Haskins
(Applies to kvm.git/master:11b06403)

The following patches are cleanups/enhancements for IRQFD now that
we have lockless interrupt injection.  For more details, please see
the patch headers.

These patches pass checkpatch, and are fully tested.  Please consider
for merging.  They are an enhancement only, so there is no urgency
to push to mainline until a suitable merge window presents itself.

Kind Regards,
-Greg

[ Change log:

  v2:
 *) dropped original cleanup which relied on the user registering
MSI based GSIs or we may crash at runtime.  Instead, we now
check at registration whether the GSI supports lockless
operation and dynamically adapt to either the original
deferred path for lock-based injections, or direct for lockless.

  v1:
 *) original release
]

---

Gregory Haskins (2):
  KVM: Directly inject interrupts if they support lockless operation
  KVM: export lockless GSI attribute


 include/linux/kvm_host.h |2 ++
 virt/kvm/eventfd.c   |   31 +++
 virt/kvm/irq_comm.c  |   19 +++
 3 files changed, 48 insertions(+), 4 deletions(-)

-- 
Signature
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v2 1/2] KVM: export lockless GSI attribute

2009-10-22 Thread Gregory Haskins
Certain GSI's support lockless injecton, but we have no way to detect
which ones at the GSI level.  Knowledge of this attribute will be
useful later in the series so that we can optimize irqfd injection
paths for cases where we know the code will not sleep.  Therefore,
we provide an API to query a specific GSI.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 include/linux/kvm_host.h |2 ++
 virt/kvm/irq_comm.c  |   19 +++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bd5a616..93393a4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -119,6 +119,7 @@ struct kvm_memory_slot {
 struct kvm_kernel_irq_routing_entry {
u32 gsi;
u32 type;
+   bool lockless;
int (*set)(struct kvm_kernel_irq_routing_entry *e,
   struct kvm *kvm, int irq_source_id, int level);
union {
@@ -417,6 +418,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic 
*ioapic,
   unsigned long *deliver_bitmask);
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_irq_check_lockless(struct kvm *kvm, u32 irq);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 00c68d2..04f0134 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -174,6 +174,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 
irq, int level)
return ret;
 }
 
+int kvm_irq_check_lockless(struct kvm *kvm, u32 irq)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct hlist_node *n;
+   int ret = -ENOENT;
+
+   rcu_read_lock();
+   irq_rt = rcu_dereference(kvm-irq_routing);
+   if (irq  irq_rt-nr_rt_entries)
+   hlist_for_each_entry(e, n, irq_rt-map[irq], link)
+   ret = e-lockless ? 1 : 0;
+   rcu_read_unlock();
+
+   return ret;
+}
+
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
struct kvm_irq_ack_notifier *kian;
@@ -314,6 +331,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table 
*rt,
 
e-gsi = ue-gsi;
e-type = ue-type;
+   e-lockless = false;
switch (ue-type) {
case KVM_IRQ_ROUTING_IRQCHIP:
delta = 0;
@@ -342,6 +360,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table 
*rt,
e-msi.address_lo = ue-u.msi.address_lo;
e-msi.address_hi = ue-u.msi.address_hi;
e-msi.data = ue-u.msi.data;
+   e-lockless = true;
break;
default:
goto out;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v2 2/2] KVM: Directly inject interrupts if they support lockless operation

2009-10-22 Thread Gregory Haskins
IRQFD currently uses a deferred workqueue item to execute the injection
operation.  It was originally designed this way because kvm_set_irq()
required the caller to hold the irq_lock mutex, and the eventfd callback
is invoked from within a non-preemptible critical section.

With the advent of lockless injection support for certain GSIs, the
deferment mechanism is no longer technically needed in all cases.
Since context switching to the workqueue is a source of interrupt
latency, lets switch to a direct method whenever possible.  Fortunately
for us, the most common use of irqfd (MSI-based GSIs) readily support
lockless injection.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 virt/kvm/eventfd.c |   31 +++
 1 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 30f70fd..e6cc958 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -51,20 +51,34 @@ struct _irqfd {
wait_queue_t  wait;
struct work_structinject;
struct work_structshutdown;
+   void (*execute)(struct _irqfd *);
 };
 
 static struct workqueue_struct *irqfd_cleanup_wq;
 
 static void
-irqfd_inject(struct work_struct *work)
+irqfd_inject(struct _irqfd *irqfd)
 {
-   struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd-kvm;
 
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
 }
 
+static void
+irqfd_deferred_inject(struct work_struct *work)
+{
+   struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
+
+   irqfd_inject(irqfd);
+}
+
+static void
+irqfd_schedule(struct _irqfd *irqfd)
+{
+   schedule_work(irqfd-inject);
+}
+
 /*
  * Race-free decouple logic (ordering is critical)
  */
@@ -126,7 +140,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, 
void *key)
 
if (flags  POLLIN)
/* An event has been signaled, inject an interrupt */
-   schedule_work(irqfd-inject);
+   irqfd-execute(irqfd);
 
if (flags  POLLHUP) {
/* The eventfd is closing, detach from KVM */
@@ -179,7 +193,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
irqfd-kvm = kvm;
irqfd-gsi = gsi;
INIT_LIST_HEAD(irqfd-list);
-   INIT_WORK(irqfd-inject, irqfd_inject);
+   INIT_WORK(irqfd-inject, irqfd_deferred_inject);
INIT_WORK(irqfd-shutdown, irqfd_shutdown);
 
file = eventfd_fget(fd);
@@ -209,6 +223,15 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
list_add_tail(irqfd-list, kvm-irqfds.items);
spin_unlock_irq(kvm-irqfds.lock);
 
+   ret = kvm_irq_check_lockless(kvm, gsi);
+   if (ret  0)
+   goto fail;
+
+   if (ret)
+   irqfd-execute = irqfd_inject;
+   else
+   irqfd-execute = irqfd_schedule;
+
/*
 * Check if there was an event already pending on the eventfd
 * before we registered, and trigger it as if we didn't miss it.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v2 1/2] KVM: export lockless GSI attribute

2009-10-22 Thread Gregory Haskins
Gregory Haskins wrote:
 Certain GSI's support lockless injecton, but we have no way to detect
 which ones at the GSI level.  Knowledge of this attribute will be
 useful later in the series so that we can optimize irqfd injection
 paths for cases where we know the code will not sleep.  Therefore,
 we provide an API to query a specific GSI.
 
 Signed-off-by: Gregory Haskins ghask...@novell.com
 ---
 
  include/linux/kvm_host.h |2 ++
  virt/kvm/irq_comm.c  |   19 +++
  2 files changed, 21 insertions(+), 0 deletions(-)
 
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index bd5a616..93393a4 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -119,6 +119,7 @@ struct kvm_memory_slot {
  struct kvm_kernel_irq_routing_entry {
   u32 gsi;
   u32 type;
 + bool lockless;
   int (*set)(struct kvm_kernel_irq_routing_entry *e,
  struct kvm *kvm, int irq_source_id, int level);
   union {
 @@ -417,6 +418,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic 
 *ioapic,
  unsigned long *deliver_bitmask);
  #endif
  int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 +int kvm_irq_check_lockless(struct kvm *kvm, u32 irq);
  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
  void kvm_register_irq_ack_notifier(struct kvm *kvm,
  struct kvm_irq_ack_notifier *kian);
 diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
 index 00c68d2..04f0134 100644
 --- a/virt/kvm/irq_comm.c
 +++ b/virt/kvm/irq_comm.c
 @@ -174,6 +174,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 
 irq, int level)
   return ret;
  }
  
 +int kvm_irq_check_lockless(struct kvm *kvm, u32 irq)
 +{
 + struct kvm_kernel_irq_routing_entry *e;
 + struct kvm_irq_routing_table *irq_rt;
 + struct hlist_node *n;
 + int ret = -ENOENT;
 +
 + rcu_read_lock();
 + irq_rt = rcu_dereference(kvm-irq_routing);
 + if (irq  irq_rt-nr_rt_entries)
 + hlist_for_each_entry(e, n, irq_rt-map[irq], link)
 + ret = e-lockless ? 1 : 0;

Sigh...  just noticed this as it hits the list.  I should probably break
out of the loop here.

 + rcu_read_unlock();
 +
 + return ret;
 +}
 +
  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
  {
   struct kvm_irq_ack_notifier *kian;
 @@ -314,6 +331,7 @@ static int setup_routing_entry(struct 
 kvm_irq_routing_table *rt,
  
   e-gsi = ue-gsi;
   e-type = ue-type;
 + e-lockless = false;
   switch (ue-type) {
   case KVM_IRQ_ROUTING_IRQCHIP:
   delta = 0;
 @@ -342,6 +360,7 @@ static int setup_routing_entry(struct 
 kvm_irq_routing_table *rt,
   e-msi.address_lo = ue-u.msi.address_lo;
   e-msi.address_hi = ue-u.msi.address_hi;
   e-msi.data = ue-u.msi.data;
 + e-lockless = true;
   break;
   default:
   goto out;
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] Enable 32bit dirty log pointers on 64bit host

2009-10-22 Thread Alexander Graf


On 22.10.2009, at 12:23, Avi Kivity wrote:


On 10/21/2009 04:08 PM, Alexander Graf wrote:

From: Arnd Bergmanna...@arndb.de

With big endian userspace, we can't quite figure out if a pointer
is 32 bit (shifted  32) or 64 bit when we read a 64 bit pointer.

This is what happens with dirty logging. To get the pointer  
interpreted

correctly, we thus need Arnd's patch to implement a compat layer for
the ioctl:

A better way to do this is to add a separate compat_ioctl() method  
that

converts this for you.

From: Arnd Bergmanna...@arndb.de
Signed-off-by: Arnd Bergmanna...@arndb.de
Acked-by: Alexander Grafag...@suse.de




If you send someone's patch, you need to sign this off.  That says  
you are legally allowed to send it along.


Ack means I have a say in this area and it looks good to me, you  
add it when someone else is doing the sending.


Ok, so is it still a From: Arnd then? Is it still signed-off by Arnd  
even though I change it?


Is there a Based-on-patch-by: tag? :-)

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Enable 32bit dirty log pointers on 64bit host

2009-10-22 Thread Alexander Graf
From: Arnd Bergmann a...@arndb.de

With big endian userspace, we can't quite figure out if a pointer
is 32 bit (shifted  32) or 64 bit when we read a 64 bit pointer.

This is what happens with dirty logging. To get the pointer interpreted
correctly, we thus need Arnd's patch to implement a compat layer for
the ioctl:

A better way to do this is to add a separate compat_ioctl() method that
converts this for you.

Based on initial patch from Arnd Bergmann.

From: Arnd Bergmann a...@arndb.de
Signed-off-by: Arnd Bergmann a...@arndb.de
Signed-off-by: Alexander Graf ag...@suse.de

---

Changes from Arnd's example version:

  - s/log.log/log/ (Avi)
  - use sizeof(compat_log) (Avi)
  - compile fixes
---
 virt/kvm/kvm_main.c |   49 -
 1 files changed, 48 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cac69c4..54a272f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -43,6 +43,7 @@
 #include linux/swap.h
 #include linux/bitops.h
 #include linux/spinlock.h
+#include linux/compat.h
 
 #include asm/processor.h
 #include asm/io.h
@@ -1542,6 +1543,52 @@ out:
return r;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_kvm_dirty_log {
+   __u32 slot;
+   __u32 padding1;
+   union {
+   compat_uptr_t dirty_bitmap; /* one bit per page */
+   __u64 padding2;
+   };
+};
+
+static long kvm_vm_compat_ioctl(struct file *filp,
+  unsigned int ioctl, unsigned long arg)
+{
+   struct kvm *kvm = filp-private_data;
+   int r;
+
+   if (kvm-mm != current-mm)
+   return -EIO;
+   switch (ioctl) {
+   case KVM_GET_DIRTY_LOG: {
+   struct compat_kvm_dirty_log compat_log;
+   struct kvm_dirty_log log;
+
+   r = -EFAULT;
+   if (copy_from_user(compat_log, (void __user *)arg,
+  sizeof(compat_log)))
+   goto out;
+   log.slot = compat_log.slot;
+   log.padding1 = compat_log.padding1;
+   log.padding2 = compat_log.padding2;
+   log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
+
+   r = kvm_vm_ioctl_get_dirty_log(kvm, log);
+   if (r)
+   goto out;
+   break;
+   }
+   default:
+   r = kvm_vm_ioctl(filp, ioctl, arg);
+   }
+
+out:
+   return r;
+}
+#endif
+
 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
struct page *page[1];
@@ -1576,7 +1623,7 @@ static int kvm_vm_mmap(struct file *file, struct 
vm_area_struct *vma)
 static struct file_operations kvm_vm_fops = {
.release= kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,
-   .compat_ioctl   = kvm_vm_ioctl,
+   .compat_ioctl   = kvm_vm_compat_ioctl,
.mmap   = kvm_vm_mmap,
 };
 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/27] Add KVM support for Book3s_64 (PPC64) hosts v5

2009-10-22 Thread Arnd Bergmann
On Wednesday 21 October 2009, Alexander Graf wrote:
 
 KVM for PowerPC only supports embedded cores at the moment.
 
 While it makes sense to virtualize on small machines, it's even more fun
 to do so on big boxes. So I figured we need KVM for PowerPC64 as well.
 
 This patchset implements KVM support for Book3s_64 hosts and guest support
 for Book3s_64 and G3/G4.
 
 To really make use of this, you also need a recent version of qemu.
 
 
 Don't want to apply patches? Get the git tree!
 
 $ git clone git://csgraf.de/kvm
 $ git checkout origin/ppc-v4

Whole series Acked-by: Arnd Bergmann a...@arndb.de

Great work, Alex!

Arnd 
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Enable 32bit dirty log pointers on 64bit host

2009-10-22 Thread Marcelo Tosatti
On Wed, Oct 21, 2009 at 04:08:29PM +0200, Alexander Graf wrote:
 From: Arnd Bergmann a...@arndb.de
 
 With big endian userspace, we can't quite figure out if a pointer
 is 32 bit (shifted  32) or 64 bit when we read a 64 bit pointer.
 
 This is what happens with dirty logging. To get the pointer interpreted
 correctly, we thus need Arnd's patch to implement a compat layer for
 the ioctl:
 
 A better way to do this is to add a separate compat_ioctl() method that
 converts this for you.
 
 From: Arnd Bergmann a...@arndb.de
 Signed-off-by: Arnd Bergmann a...@arndb.de
 Acked-by: Alexander Graf ag...@suse.de
 
 ---
 
 Changes from Arnd's example version:
 
   - s/log.log/log/ (Avi)
   - use sizeof(compat_log) (Avi)
   - compile fixes

Applied, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/27] Add KVM support for Book3s_64 (PPC64) hosts v5

2009-10-22 Thread Hollis Blanchard
On Wed, 2009-10-21 at 17:03 +0200, Alexander Graf wrote:
 KVM for PowerPC only supports embedded cores at the moment.
 
 While it makes sense to virtualize on small machines, it's even more fun
 to do so on big boxes. So I figured we need KVM for PowerPC64 as well.
 
 This patchset implements KVM support for Book3s_64 hosts and guest support
 for Book3s_64 and G3/G4.

Acked-by: Hollis Blanchard holl...@us.ibm.com

Avi, please apply these patches, and one more (unrelated) to fix the
Book E build that I will send in just a moment.

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvmppc: Fix BUILD_BUG_ON condition

2009-10-22 Thread Hollis Blanchard
The old BUILD_BUG_ON implementation didn't work with __builtin_constant_p().
Fixing that revealed this test had been inverted for a long time without
anybody noticing...

Signed-off-by: Hollis Blanchard holl...@us.ibm.com

diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -48,7 +48,7 @@ static inline void kvmppc_set_exit_type(
 static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 {
/* type has to be known at build time for optimization */
-   BUILD_BUG_ON(__builtin_constant_p(type));
+   BUILD_BUG_ON(!__builtin_constant_p(type));
switch (type) {
case EXT_INTR_EXITS:
vcpu-stat.ext_intr_exits++;
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] BUILD_BUG_ON: make it handle more cases

2009-10-22 Thread Américo Wang
On Tue, Oct 20, 2009 at 10:43 PM, Alan Jenkins
sourcejedi.l...@googlemail.com wrote:
 On 10/20/09, Américo Wang xiyou.wangc...@gmail.com wrote:
 On Tue, Oct 20, 2009 at 02:15:33PM +1030, Rusty Russell wrote:
BUILD_BUG_ON used to use the optimizer to do code elimination or fail
at link time; it was changed to first the size of a negative array (a
nicer compile time error), then (in
8c87df457cb58fe75b9b893007917cf8095660a0) to a bitfield.

bitfields: needs a literal constant at parse time, and can't be put under
      if (__builtin_constant_p(x)) for example.
negative array: can handle anything, but if the compiler can't tell it's
      a constant, silently has no effect.
link time: breaks link if the compiler can't determine the value, but the
      linker output is not usually as informative as a compiler error.

If we use the negative-array-size method *and* the link time trick,
we get the ability to use BUILD_BUG_ON() under __builtin_constant_p()
branches, and maximal ability for the compiler to detect errors at
build time.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -683,12 +683,6 @@ struct sysinfo {
      char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. 
 */
 };

-/* Force a compilation error if condition is true */
-#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
-
-/* Force a compilation error if condition is constant and true */
-#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
-
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
@@ -696,6 +690,33 @@ struct sysinfo {
 #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))

+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @cond: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ *
+ * The implementation uses gcc's reluctance to create a negative array,
 but
+ * gcc (as of 4.4) only emits that error for obvious cases (eg. not
 arguments
+ * to inline functions).  So as a fallback we use the optimizer; if it
 can't
+ * prove the condition is false, it will cause a link error on the
 undefined
+ * __build_bug_on_failed.  This error message can be harder to track
 down
+ * though, hence the two different methods.
+ */
+#ifndef __OPTIMIZE__
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+extern int __build_bug_on_failed;

 Hmm, what exactly is __build_bug_on_failed?

 Well, we haven't added a definition for it in this patch.  I'm sure
 grep will tell you it wasn't defined before hand either.  So any
 reference to it is an error - which will be reported at link time.

+#define BUILD_BUG_ON(condition)                                      \
+     do {                                                    \
+             ((void)sizeof(char[1 - 2*!!(condition)]));      \
+             if (condition) __build_bug_on_failed = 1;       \

 If condition is known false at compile time, gcc -O will eliminate
 the code which refers to __build_bug_on_failed.  If it's not proved to
 be false - it will break the build, which is exactly what we want
 BUILD_BUG_ON to do.

Ah, clever trick! Got it.
Thanks!

Reviewed-by: WANG Cong xiyou.wangc...@gmail.com
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html