date:20130131

Re: [PATCH] tcm_vhost: Multi-target support

2013-01-31 Thread Asias He

Hello Nicholas,

On 01/31/2013 03:33 PM, Asias He wrote:
 In order to take advantages of Paolo's multi-queue virito-scsi, we need
 multi-target support in tcm_vhost first. Otherwise all the requests go
 to one queue and other queues are idle.
 
 This patch makes:
 
 1. All the targets under the wwpn is seen and can be used by guest.
 2. No need to pass the tpgt number in struct vhost_scsi_target to
tcm_vhost.ko. Only wwpn is needed.
 3. We can always pass max_target = 255 to guest now, since we abort the
request who's target id does not exist.
 
 Signed-off-by: Asias He as...@redhat.com
 ---
  drivers/vhost/tcm_vhost.c | 115 
 --
  drivers/vhost/tcm_vhost.h |   4 +-
  2 files changed, 74 insertions(+), 45 deletions(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 218deb6..d50cb95 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -59,13 +59,18 @@ enum {
   VHOST_SCSI_VQ_IO = 2,
  };
  
 +#define VHOST_SCSI_MAX_TARGET 256
 +
  struct vhost_scsi {
 - struct tcm_vhost_tpg *vs_tpg;   /* Protected by vhost_scsi-dev.mutex */
 + /* Protected by vhost_scsi-dev.mutex */
 + struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
   struct vhost_dev dev;
   struct vhost_virtqueue vqs[3];
  
   struct vhost_work vs_completion_work; /* cmd completion work item */
   struct llist_head vs_completion_list; /* cmd completion queue */
 + char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
 + int vs_num_target;
  };
  
  /* Local pointer to allocated TCM configfs fabric module */
 @@ -564,13 +569,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
   u32 exp_data_len, data_first, data_num, data_direction;
   unsigned out, in, i;
   int head, ret;
 -
 - /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
 - tv_tpg = vs-vs_tpg;
 - if (unlikely(!tv_tpg)) {
 - pr_err(%s endpoint not set\n, __func__);
 - return;
 - }
 + u8 target;
  
   mutex_lock(vq-mutex);
   vhost_disable_notify(vs-dev, vq);
 @@ -637,6 +636,35 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
   break;
   }
  
 + /* Extract the tpgt */
 + target = v_req.lun[1];
 +
 + /* Target does not exit, fail the request */
 + if (unlikely(target = vs-vs_num_target)) {
 + struct virtio_scsi_cmd_resp __user *resp;
 + struct virtio_scsi_cmd_resp rsp;
 +
 + memset(rsp, 0, sizeof(rsp));
 + rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
 + resp = vq-iov[out].iov_base;
 + ret = copy_to_user(resp, rsp, sizeof(rsp));
 + if (!ret)
 + vhost_add_used_and_signal(vs-dev,
 + vs-vqs[2], head, 0);
 + else
 + pr_err(Faulted on virtio_scsi_cmd_resp\n);
 +
 + continue;
 + }
 +
 + tv_tpg = vs-vs_tpg[target];
 + if (unlikely(!tv_tpg)) {
 + /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
 + pr_err(endpoint not set, target = %d\n, target);
 + vhost_discard_vq_desc(vq, 1);
 + break;
 + }
 +
   exp_data_len = 0;
   for (i = 0; i  data_num; i++)
   exp_data_len += vq-iov[data_first + i].iov_len;
 @@ -771,14 +799,11 @@ static int vhost_scsi_set_endpoint(
   }
   tv_tport = tv_tpg-tport;
  
 - if (!strcmp(tv_tport-tport_name, t-vhost_wwpn) 
 - (tv_tpg-tport_tpgt == t-vhost_tpgt)) {
 + if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
   tv_tpg-tv_tpg_vhost_count++;
 - mutex_unlock(tv_tpg-tv_tpg_mutex);
 - mutex_unlock(tcm_vhost_mutex);
  
   mutex_lock(vs-dev.mutex);
 - if (vs-vs_tpg) {
 + if (vs-vs_tpg[tv_tpg-tport_tpgt - 1]) {
   mutex_unlock(vs-dev.mutex);
   mutex_lock(tv_tpg-tv_tpg_mutex);
   tv_tpg-tv_tpg_vhost_count--;
 @@ -786,15 +811,17 @@ static int vhost_scsi_set_endpoint(
   return -EEXIST;
   }
  
 - vs-vs_tpg = tv_tpg;
 + vs-vs_tpg[tv_tpg-tport_tpgt - 1] = tv_tpg;


tv_tpg-tport_tpgt starts from 0, right? I thought it starts from 1,
because I always got it starts from 1 in targetcli.

o- vhost
   o- naa.6001405bd4e8476d
  o- tpg1
 o- luns
o- lun0
  o- tpg2
 o- luns
o- lun0
  o- tpg3
 o- luns
o- lun0
  o- tpg4
 o- luns
o- lun0

Re: [PATCH 2/2] x86, apicv: Add Posted Interrupt supporting

2013-01-31 Thread Gleb Natapov

On Thu, Dec 13, 2012 at 03:29:40PM +0800, Yang Zhang wrote:
 From: Yang Zhang yang.z.zh...@intel.com
 
 Posted Interrupt allows APIC interrupts to inject into guest directly
 without any vmexit.
 
 - When delivering a interrupt to guest, if target vcpu is running,
   update Posted-interrupt requests bitmap and send a notification event
   to the vcpu. Then the vcpu will handle this interrupt automatically,
   without any software involvemnt.
 
 - If target vcpu is not running or there already a notification event
   pending in the vcpu, do nothing. The interrupt will be handled by
   next vm entry.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  arch/x86/include/asm/entry_arch.h  |1 +
  arch/x86/include/asm/hw_irq.h  |1 +
  arch/x86/include/asm/irq.h |1 +
  arch/x86/include/asm/irq_vectors.h |4 +
  arch/x86/include/asm/kvm_host.h|3 +
  arch/x86/include/asm/vmx.h |4 +
  arch/x86/kernel/entry_64.S |2 +
  arch/x86/kernel/irq.c  |   25 +++
  arch/x86/kernel/irqinit.c  |2 +
  arch/x86/kvm/lapic.c   |   16 +++-
  arch/x86/kvm/lapic.h   |1 +
  arch/x86/kvm/vmx.c |  133 
 +---
  12 files changed, 180 insertions(+), 13 deletions(-)
 
 diff --git a/arch/x86/include/asm/entry_arch.h 
 b/arch/x86/include/asm/entry_arch.h
 index 40afa00..7b0a29e 100644
 --- a/arch/x86/include/asm/entry_arch.h
 +++ b/arch/x86/include/asm/entry_arch.h
 @@ -18,6 +18,7 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
  #endif
  
  BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 +BUILD_INTERRUPT(posted_intr_ipi, POSTED_INTR_VECTOR)
  
  /*
   * every pentium local APIC has two 'local interrupts', with a
 diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
 index eb92a6e..ee61af3 100644
 --- a/arch/x86/include/asm/hw_irq.h
 +++ b/arch/x86/include/asm/hw_irq.h
 @@ -28,6 +28,7 @@
  /* Interrupt handlers registered during init_IRQ */
  extern void apic_timer_interrupt(void);
  extern void x86_platform_ipi(void);
 +extern void posted_intr_ipi(void);
  extern void error_interrupt(void);
  extern void irq_work_interrupt(void);
  
 diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
 index ba870bb..cff9933 100644
 --- a/arch/x86/include/asm/irq.h
 +++ b/arch/x86/include/asm/irq.h
 @@ -30,6 +30,7 @@ extern void irq_force_complete_move(int);
  #endif
  
  extern void (*x86_platform_ipi_callback)(void);
 +extern void (*posted_intr_callback)(void);
  extern void native_init_IRQ(void);
  extern bool handle_irq(unsigned irq, struct pt_regs *regs);
  
 diff --git a/arch/x86/include/asm/irq_vectors.h 
 b/arch/x86/include/asm/irq_vectors.h
 index 1508e51..8f2e383 100644
 --- a/arch/x86/include/asm/irq_vectors.h
 +++ b/arch/x86/include/asm/irq_vectors.h
 @@ -102,6 +102,10 @@
   */
  #define X86_PLATFORM_IPI_VECTOR  0xf7
  
 +#ifdef CONFIG_HAVE_KVM
Users of POSTED_INTR_VECTOR are not under ifdef, which means compilation
will fails with kvm disabled. Test it please.

 +#define POSTED_INTR_VECTOR   0xf2
 +#endif
 +
  /*
   * IRQ work vector:
   */
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 7e26d1a..82423a8 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -700,6 +700,9 @@ struct kvm_x86_ops {
   int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu);
   void (*update_irq)(struct kvm_vcpu *vcpu);
   void (*update_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector, bool set);
 + int (*has_posted_interrupt)(struct kvm_vcpu *vcpu);
 + int (*send_nv)(struct kvm_vcpu *vcpu, int vector);
 + void (*update_irr)(struct kvm_vcpu *vcpu);
   int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
   int (*get_tdp_level)(void);
   u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
 index 1003341..7b9e1d0 100644
 --- a/arch/x86/include/asm/vmx.h
 +++ b/arch/x86/include/asm/vmx.h
 @@ -152,6 +152,7 @@
  #define PIN_BASED_EXT_INTR_MASK 0x0001
  #define PIN_BASED_NMI_EXITING   0x0008
  #define PIN_BASED_VIRTUAL_NMIS  0x0020
 +#define PIN_BASED_POSTED_INTR   0x0080
  
  #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x0002
  #define VM_EXIT_HOST_ADDR_SPACE_SIZE0x0200
 @@ -174,6 +175,7 @@
  /* VMCS Encodings */
  enum vmcs_field {
   VIRTUAL_PROCESSOR_ID= 0x,
 + POSTED_INTR_NV  = 0x0002,
   GUEST_ES_SELECTOR   = 0x0800,
   GUEST_CS_SELECTOR   = 0x0802,
   GUEST_SS_SELECTOR   = 0x0804,
 @@ -208,6 +210,8 @@ enum vmcs_field {
   VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x2013,
   APIC_ACCESS_ADDR= 0x2014,

Re: [PATCH 2/2] x86, apicv: Add Posted Interrupt supporting

2013-01-31 Thread Gleb Natapov

On Wed, Jan 30, 2013 at 09:03:11PM -0200, Marcelo Tosatti wrote:
 Posted interrupt patch:
 2) Must move IN_GUEST_MODE assignment after local_irq_disable, in
 vcpu_enter_guest function. Otherwise:
 
 cpu0  vcpu1-cpu1
 
   vcpu-mode = IN_GUEST_MODE
 
 if IN_GUEST_MODE == true
   send IPI
   local_irq_disable
 
 PIR not transferred to VIRR, misses interrupt.
 
cpu0 will set KVM_REQ_EVENT, so vmentry will be aborted after
local_irq_disable() by -requests check.

 3) Must check outstanding PIR notification bit unconditionally on
 every VM-entry, because:
 
   1. local_irq_disable
   2. vcpu-mode = IN_GUEST_MODE
   3. vmenter
   4. vmexit
   5. vcpu-mode = OUTSIDE_GUEST_MODE
 
 If PIR-IPI-interrupt is sent between an event which triggers VM-exit
 (for example, an external interrupt due to a device), and step 5
 (assignment of vcpu-mode), the PIR-VIRR transfer before vmentry must
 be made.
Not sure I understand, but I think KVM_REQ_EVENT will cover that too.

 
 4) Today, an interrupt notification is cached on IRR until its delivered - 
 further
 interrupt injection is not generating further interrupt notification
 bits. With PIR, behaviour changes: Its possible to have one bit in PIR and 
 another 
 on IRR APIC page (if timing is right). Is this harmless? Why?
 
 

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] KVM: set_memory_region: Cleanup and new restriction

2013-01-31 Thread Gleb Natapov

On Wed, Jan 30, 2013 at 07:38:37PM +0900, Takuya Yoshikawa wrote:
 Patch 1: just rebased for this series.
 Patch 2: an API change, so please let me know if you notice any problems.
 
 Takuya Yoshikawa (2):
   KVM: set_memory_region: Identify the requested change explicitly
   KVM: set_memory_region: Disallow changing read-only attribute later
 
  Documentation/virtual/kvm/api.txt |   12 ++--
  virt/kvm/kvm_main.c   |   95 
 +
  2 files changed, 60 insertions(+), 47 deletions(-)
 
Reviewed-by: Gleb Natapov g...@redhat.com

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Michael S. Tsirkin

On Wed, Jan 30, 2013 at 04:28:30PM -0700, Alex Williamson wrote:
 On Thu, 2013-01-31 at 10:02 +1100, Benjamin Herrenschmidt wrote:
  On Thu, 2013-01-31 at 00:49 +0200, Michael S. Tsirkin wrote:
In practice they do (VGA at least)

From a SW modelling standpoint, I don't think it's worth
   differentiating
PCI and PCIE.

Cheers,
Ben.
   
   Interesting.
   Do you have such hardware? Could you please dump
   the output of lspci -vv?
  
  Any ATI or nVidia card still supports hard decoding of VGA regions for
  the sake of legacy operating systems and BIOSes :-) I don't know about
  Intel but I suppose it's the same.
 
 For example:
 
 -[:00]-+-00.0  Advanced Micro Devices [AMD] nee ATI RD890 PCI to PCI 
 bridge (external gfx0 p
+-04.0-[02]--+-00.0  Advanced Micro Devices [AMD] nee ATI Cedar 
 PRO [Radeon HD 5450/6350]
 
 00:04.0 PCI bridge: Advanced Micro Devices [AMD] nee ATI RD890 PCI to PCI 
 bridge (PCI express gpp port D) (prog-if 00 [Normal decode])
   Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
 Stepping- SERR- FastB2B- DisINTx-
   Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- 
 MAbort- SERR- PERR- INTx-
   Latency: 0, Cache Line Size: 64 bytes
   Bus: primary=00, secondary=02, subordinate=02, sec-latency=0
   I/O behind bridge: c000-cfff
   Memory behind bridge: fd10-fd1f
   Prefetchable memory behind bridge: d000-dfff
   Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- 
 MAbort+ SERR- PERR-
   BridgeCtl: Parity- SERR- NoISA- VGA+ MAbort- Reset- FastB2B-
   
 VGA+ (VGA Enable) indicates positive decode of 0x3b0 - 0x3bb, 0x3c0 -
 0x3df, and 0xa - 0xbfff.  Device 2:00.0 of course doesn't report
 these ISA ranges as they're implicit in the VGA class code.

OK but this appears behind a bridge.  So the bridge configuration tells
the root complex where to send accesses to the VGA.

But qemu currently puts devices directly on root bus.

And as far as I can tell when we present devices directly on bus 0, we
pretend these are integrated in the root complex. The spec seems to
say explicitly that root complex integrated devices should not use legacy
addresses or support hotplug. So I would be surprised if such one
appears in real world.

Luckily guests do not seem to be worried as long as we use ACPI.

 
 BTW, I've been working on vfio-pci support of VGA assignment which makes
 use of the VGA arbiter in the host to manipulate the VGA Enable control
 register, allowing us to select which device to access.  The qemu side
 is simply registering memory regions for the VGA areas and expecting to
 be used with -vga none, but I'll adopt whatever strategy we choose for
 hard coded address range support.  Current base patches at the links
 below.  Thanks,
 
 Alex
 
 https://github.com/awilliam/qemu-vfio/commit/ea2befa59010a429dcf13c10dbccdf8b64e82fbd
 https://github.com/awilliam/linux-vfio/commit/bae182d929229cbf1eaeb01e5fad4f77f81a4c61
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [kvmarm] [RFC v5 7/8] hw/kvm/arm_gic: Implement support for KVM in-kernel ARM GIC

2013-01-31 Thread Andreas Färber

Am 31.01.2013 11:52, schrieb KONRAD Frédéric:
 On 24/01/2013 16:43, Peter Maydell wrote:
 Implement support for using the KVM in-kernel GIC for ARM.

 Signed-off-by: Peter Maydell peter.mayd...@linaro.org
 ---
   hw/a15mpcore.c   |8 ++-
   hw/arm/Makefile.objs |1 +
   hw/kvm/arm_gic.c |  169
 ++
   3 files changed, 177 insertions(+), 1 deletion(-)
   create mode 100644 hw/kvm/arm_gic.c

 diff --git a/hw/a15mpcore.c b/hw/a15mpcore.c
 index fe6c34c..1ca6f28 100644
 --- a/hw/a15mpcore.c
 +++ b/hw/a15mpcore.c
 @@ -19,6 +19,7 @@
*/
 #include sysbus.h
 +#include sysemu/kvm.h
 /* A15MP private memory region.  */
   @@ -40,8 +41,13 @@ static int a15mp_priv_init(SysBusDevice *dev)
   {
   A15MPPrivState *s = FROM_SYSBUS(A15MPPrivState, dev);
   SysBusDevice *busdev;
 +const char *gictype = arm-gic;
 s/arm-gic/arm_gic/ ^^ ?
 
 Christoffer and I had trouble with that:
 
 qemu-system-arm: Unknown device 'arm-gic' for default sysbus

Since you already ran into issues here, even better would be to use a
TYPE_ARM_GIC constant or so.

Andreas

 
 Fred
   -s-gic = qdev_create(NULL, arm_gic);
 +if (kvm_irqchip_in_kernel()) {
 +gictype = kvm-arm-gic;
 +}
 +
 +s-gic = qdev_create(NULL, gictype);
   qdev_prop_set_uint32(s-gic, num-cpu, s-num_cpu);
   qdev_prop_set_uint32(s-gic, num-irq, s-num_irq);
 


-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [kvmarm] [RFC v5 7/8] hw/kvm/arm_gic: Implement support for KVM in-kernel ARM GIC

2013-01-31 Thread KONRAD Frédéric


On 24/01/2013 16:43, Peter Maydell wrote:

Implement support for using the KVM in-kernel GIC for ARM.

Signed-off-by: Peter Maydell peter.mayd...@linaro.org
---
  hw/a15mpcore.c   |8 ++-
  hw/arm/Makefile.objs |1 +
  hw/kvm/arm_gic.c |  169 ++
  3 files changed, 177 insertions(+), 1 deletion(-)
  create mode 100644 hw/kvm/arm_gic.c

diff --git a/hw/a15mpcore.c b/hw/a15mpcore.c
index fe6c34c..1ca6f28 100644
--- a/hw/a15mpcore.c
+++ b/hw/a15mpcore.c
@@ -19,6 +19,7 @@
   */
  
  #include sysbus.h

+#include sysemu/kvm.h
  
  /* A15MP private memory region.  */
  
@@ -40,8 +41,13 @@ static int a15mp_priv_init(SysBusDevice *dev)

  {
  A15MPPrivState *s = FROM_SYSBUS(A15MPPrivState, dev);
  SysBusDevice *busdev;
+const char *gictype = arm-gic;

s/arm-gic/arm_gic/ ^^ ?

Christoffer and I had trouble with that:

qemu-system-arm: Unknown device 'arm-gic' for default sysbus

Fred
  
-s-gic = qdev_create(NULL, arm_gic);

+if (kvm_irqchip_in_kernel()) {
+gictype = kvm-arm-gic;
+}
+
+s-gic = qdev_create(NULL, gictype);
  qdev_prop_set_uint32(s-gic, num-cpu, s-num_cpu);
  qdev_prop_set_uint32(s-gic, num-irq, s-num_irq);


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcm_vhost: Multi-target support

2013-01-31 Thread Michael S. Tsirkin

On Thu, Jan 31, 2013 at 05:28:21PM +0800, Asias He wrote:
 Hello Nicholas,
 
 On 01/31/2013 03:33 PM, Asias He wrote:
  In order to take advantages of Paolo's multi-queue virito-scsi, we need
  multi-target support in tcm_vhost first. Otherwise all the requests go
  to one queue and other queues are idle.
  
  This patch makes:
  
  1. All the targets under the wwpn is seen and can be used by guest.
  2. No need to pass the tpgt number in struct vhost_scsi_target to
 tcm_vhost.ko. Only wwpn is needed.
  3. We can always pass max_target = 255 to guest now, since we abort the
 request who's target id does not exist.
  
  Signed-off-by: Asias He as...@redhat.com
  ---
   drivers/vhost/tcm_vhost.c | 115 
  --
   drivers/vhost/tcm_vhost.h |   4 +-
   2 files changed, 74 insertions(+), 45 deletions(-)
  
  diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
  index 218deb6..d50cb95 100644
  --- a/drivers/vhost/tcm_vhost.c
  +++ b/drivers/vhost/tcm_vhost.c
  @@ -59,13 +59,18 @@ enum {
  VHOST_SCSI_VQ_IO = 2,
   };
   
  +#define VHOST_SCSI_MAX_TARGET 256
  +
   struct vhost_scsi {
  -   struct tcm_vhost_tpg *vs_tpg;   /* Protected by vhost_scsi-dev.mutex */
  +   /* Protected by vhost_scsi-dev.mutex */
  +   struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
  struct vhost_dev dev;
  struct vhost_virtqueue vqs[3];
   
  struct vhost_work vs_completion_work; /* cmd completion work item */
  struct llist_head vs_completion_list; /* cmd completion queue */
  +   char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
  +   int vs_num_target;
   };
   
   /* Local pointer to allocated TCM configfs fabric module */
  @@ -564,13 +569,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
  u32 exp_data_len, data_first, data_num, data_direction;
  unsigned out, in, i;
  int head, ret;
  -
  -   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
  -   tv_tpg = vs-vs_tpg;
  -   if (unlikely(!tv_tpg)) {
  -   pr_err(%s endpoint not set\n, __func__);
  -   return;
  -   }
  +   u8 target;
   
  mutex_lock(vq-mutex);
  vhost_disable_notify(vs-dev, vq);
  @@ -637,6 +636,35 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
  break;
  }
   
  +   /* Extract the tpgt */
  +   target = v_req.lun[1];
  +
  +   /* Target does not exit, fail the request */
  +   if (unlikely(target = vs-vs_num_target)) {
  +   struct virtio_scsi_cmd_resp __user *resp;
  +   struct virtio_scsi_cmd_resp rsp;
  +
  +   memset(rsp, 0, sizeof(rsp));
  +   rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
  +   resp = vq-iov[out].iov_base;
  +   ret = copy_to_user(resp, rsp, sizeof(rsp));
  +   if (!ret)
  +   vhost_add_used_and_signal(vs-dev,
  +   vs-vqs[2], head, 0);
  +   else
  +   pr_err(Faulted on virtio_scsi_cmd_resp\n);
  +
  +   continue;
  +   }
  +
  +   tv_tpg = vs-vs_tpg[target];
  +   if (unlikely(!tv_tpg)) {
  +   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
  +   pr_err(endpoint not set, target = %d\n, target);
  +   vhost_discard_vq_desc(vq, 1);
  +   break;
  +   }
  +
  exp_data_len = 0;
  for (i = 0; i  data_num; i++)
  exp_data_len += vq-iov[data_first + i].iov_len;
  @@ -771,14 +799,11 @@ static int vhost_scsi_set_endpoint(
  }
  tv_tport = tv_tpg-tport;
   
  -   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn) 
  -   (tv_tpg-tport_tpgt == t-vhost_tpgt)) {
  +   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
  tv_tpg-tv_tpg_vhost_count++;
  -   mutex_unlock(tv_tpg-tv_tpg_mutex);
  -   mutex_unlock(tcm_vhost_mutex);
   
  mutex_lock(vs-dev.mutex);
  -   if (vs-vs_tpg) {
  +   if (vs-vs_tpg[tv_tpg-tport_tpgt - 1]) {
  mutex_unlock(vs-dev.mutex);
  mutex_lock(tv_tpg-tv_tpg_mutex);
  tv_tpg-tv_tpg_vhost_count--;
  @@ -786,15 +811,17 @@ static int vhost_scsi_set_endpoint(
  return -EEXIST;
  }
   
  -   vs-vs_tpg = tv_tpg;
  +   vs-vs_tpg[tv_tpg-tport_tpgt - 1] = tv_tpg;
 
 
 tv_tpg-tport_tpgt starts from 0, right? I thought it starts from 1,
 because I always got it starts from 1 in targetcli.
 
 o- vhost
o- naa.6001405bd4e8476d
   o- tpg1
  o- luns
 o- lun0
   o- tpg2
  o- luns
 o- lun0
   o- tpg3
  o- luns
 o-

Re: [kvmarm] [RFC v5 7/8] hw/kvm/arm_gic: Implement support for KVM in-kernel ARM GIC

2013-01-31 Thread Peter Maydell

On 31 January 2013 10:54, Andreas Färber afaer...@suse.de wrote:
 Am 31.01.2013 11:52, schrieb KONRAD Frédéric:
 +const char *gictype = arm-gic;
 s/arm-gic/arm_gic/ ^^ ?

 Christoffer and I had trouble with that:

 qemu-system-arm: Unknown device 'arm-gic' for default sysbus

Oops, nice catch.

 Since you already ran into issues here, even better would be to use a
 TYPE_ARM_GIC constant or so.

Hmm, I kind of agree, but QOM idiom doesn't seem to encourage
having that define be publicly visible. Should we have a
hw/my_device.h [with the public bits like the TYPE_ and
FOO_CLASS/FOO_GET_CLASS macros] for every type? (and a
hw/my_device_priv.h if needed]

-- PMM
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Alexander Graf

On 30.01.2013, at 12:12, Bhushan Bharat-R65777 wrote:

 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Alexander Graf
 Sent: Friday, January 25, 2013 5:44 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to 
 guest

 On 16.01.2013, at 09:24, Bharat Bhushan wrote:

 Allow userspace to inject debug interrupt to guest. QEMU can

 s/QEMU/user space.

 inject the debug interrupt to guest if it is not able to handle the
 debug interrupt.

 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 arch/powerpc/kvm/booke.c  |   32 +++-
 arch/powerpc/kvm/e500mc.c |   10 +-
 2 files changed, 40 insertions(+), 2 deletions(-)

 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
 faa0a0b..547797f 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -133,6 +133,13 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu
 *vcpu) #endif }

 +#ifdef CONFIG_KVM_BOOKE_HV
 +static int kvmppc_core_pending_debug(struct kvm_vcpu *vcpu) {
 +   return test_bit(BOOKE_IRQPRIO_DEBUG,
 +vcpu-arch.pending_exceptions); } #endif
 +
 /*
 * Helper function for full MSR writes.  No need to call this if only
 * EE/CE/ME/DE/RI are changing.
 @@ -144,7 +151,11 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 #ifdef CONFIG_KVM_BOOKE_HV
 new_msr |= MSR_GS;

 -   if (vcpu-guest_debug)
 +   /*
 +* Set MSR_DE if the hardware debug resources are owned by user-space
 +* and there is no debug interrupt pending for guest to handle.

 Why?

 QEMU is using the IAC/DAC registers to set hardware breakpoint/watchpoints 
 via debug ioctls. As debug events are enabled/gated by MSR_DE so somehow we 
 need to set MSR_DE on hardware MSR when guest is running in this case.

Reading this 5 times I still have no idea what you're really checking for here. 
Maybe the naming for kvmppc_core_pending_debug is just unnatural? What does 
that function do really?

 On bookehv this is how I am controlling the MSR_DE in hardware MSR.  

 And why is this whole thing only executed on HV?

 On e500v2 we always enable MSR_DE using vcpu-arch.shadow_msr in e500.c
 #ifndef CONFIG_KVM_BOOKE_HV
 -   vcpu-arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
 +   vcpu-arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;

Why? How is e500v2 any different wrt debug? And why wouldn't that work for 
e500mc?

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler

2013-01-31 Thread Alexander Graf


On 30.01.2013, at 12:30, Bhushan Bharat-R65777 wrote:

 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:13 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
 On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 Installed debug handler will be used for guest debug support and debug
 facility emulation features (patches for these features will follow
 this patch).
 
 Signed-off-by: Liu Yu yu@freescale.com
 [bharat.bhus...@freescale.com: Substantial changes]
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/kernel/asm-offsets.c   |1 +
 arch/powerpc/kvm/booke_interrupts.S |   49 
 ++-
 3 files changed, 44 insertions(+), 7 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h
 b/arch/powerpc/include/asm/kvm_host.h
 index 8a72d59..f4ba881 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -503,6 +503,7 @@ struct kvm_vcpu_arch {
 u32 tlbcfg[4];
 u32 mmucfg;
 u32 epr;
 +   u32 crit_save;
 struct kvmppc_booke_debug_reg dbg_reg; #endif
 gpa_t paddr_accessed;
 diff --git a/arch/powerpc/kernel/asm-offsets.c
 b/arch/powerpc/kernel/asm-offsets.c
 index 46f6afd..02048f3 100644
 --- a/arch/powerpc/kernel/asm-offsets.c
 +++ b/arch/powerpc/kernel/asm-offsets.c
 @@ -562,6 +562,7 @@ int main(void)
 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
 +   DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* CONFIG_KVM */
 
 diff --git a/arch/powerpc/kvm/booke_interrupts.S
 b/arch/powerpc/kvm/booke_interrupts.S
 index eae8483..dd9c5d4 100644
 --- a/arch/powerpc/kvm/booke_interrupts.S
 +++ b/arch/powerpc/kvm/booke_interrupts.S
 @@ -52,12 +52,7 @@
   (1BOOKE_INTERRUPT_PROGRAM) | \
   (1BOOKE_INTERRUPT_DTLB_MISS))
 
 -.macro KVM_HANDLER ivor_nr scratch srr0
 -_GLOBAL(kvmppc_handler_\ivor_nr)
 -   /* Get pointer to vcpu and record exit number. */
 -   mtspr   \scratch , r4
 -   mfspr   r4, SPRN_SPRG_THREAD
 -   lwz r4, THREAD_KVM_VCPU(r4)
 +.macro __KVM_HANDLER ivor_nr scratch srr0
 stw r3, VCPU_GPR(R3)(r4)
 stw r5, VCPU_GPR(R5)(r4)
 stw r6, VCPU_GPR(R6)(r4)
 @@ -74,6 +69,46 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
 bctr
 .endm
 
 +.macro KVM_HANDLER ivor_nr scratch srr0
 +_GLOBAL(kvmppc_handler_\ivor_nr)
 +   /* Get pointer to vcpu and record exit number. */
 +   mtspr   \scratch , r4
 +   mfspr   r4, SPRN_SPRG_THREAD
 +   lwz r4, THREAD_KVM_VCPU(r4)
 +   __KVM_HANDLER \ivor_nr \scratch \srr0 .endm
 +
 +.macro KVM_DBG_HANDLER ivor_nr scratch srr0
 +_GLOBAL(kvmppc_handler_\ivor_nr)
 +   mtspr   \scratch, r4
 +   mfspr   r4, SPRN_SPRG_THREAD
 +   lwz r4, THREAD_KVM_VCPU(r4)
 +   stw r3, VCPU_CRIT_SAVE(r4)
 +   mfcrr3
 +   mfspr   r4, SPRN_CSRR1
 +   andi.   r4, r4, MSR_PR
 +   bne 1f
 
 
 +   /* debug interrupt happened in enter/exit path */
 +   mfspr   r4, SPRN_CSRR1
 +   rlwinm  r4, r4, 0, ~MSR_DE
 +   mtspr   SPRN_CSRR1, r4
 +   lis r4, 0x
 +   ori r4, r4, 0x
 +   mtspr   SPRN_DBSR, r4
 +   mfspr   r4, SPRN_SPRG_THREAD
 +   lwz r4, THREAD_KVM_VCPU(r4)
 +   mtcrr3
 +   lwz r3, VCPU_CRIT_SAVE(r4)
 +   mfspr   r4, \scratch
 +   rfci
 
 What is this part doing? Try to ignore the debug exit?
 
 As BOOKE doesn't have hardware support for virtualization, hardware never 
 know current pc is in guest or in host.
 So when enable hardware single step for guest, it cannot be disabled at the 
 time guest exit. Thus, we'll see that an single step interrupt happens at the 
 beginning of guest exit path.
 
 With the above code we recognize this kind of single step interrupt disable 
 single step and rfci.
 
 Why would we have MSR_DE
 enabled in the first place when we can't handle it?
 
 When QEMU is using hardware debug resource then we always set MSR_DE during 
 guest is running.

Right, but why is MSR_DE enabled during the exit path? If MSR_DE wasn't set, 
you wouldn't get a single step exit. During the exit code path, you could then 
swap DBSR back to what the host expects (which means no single step). Only 
after that enable MSR_DE again.

 
 
 +1: /* debug interrupt happened in guest */
 +   mtcrr3
 +   mfspr   r4, SPRN_SPRG_THREAD
 +   lwz r4, THREAD_KVM_VCPU(r4)
 +   lwz r3, VCPU_CRIT_SAVE(r4)
 +   __KVM_HANDLER \ivor_nr \scratch \srr0
 
 I don't think you need the __KVM_HANDLER split. This should be quite easily
 refactorable into a simple DBG prolog.
 
 Can you please

Re: [Qemu-devel] QEMU buildbot maintenance state

2013-01-31 Thread Stefan Hajnoczi

On Wed, Jan 30, 2013 at 10:31:22AM +0100, Gerd Hoffmann wrote:
   Hi,
 
  Gerd: Are you willing to co-maintain the QEMU buildmaster with Daniel
  and Christian?  It would be awesome if you could do this given your
  experience running and customizing buildbot.
 
 I'll try to set aside some time for that.  Christians idea to host the
 config at github is good, that certainly makes it easier to balance
 things to more people.
 
 Another thing which would be helpful:  Any chance we can setup a
 maintainer tree mirror @ git.qemu.org?  A single repository where each
 maintainer tree shows up as a branch?
 
 This would make the buildbot setup *alot* easier.  We can go for a
 AnyBranchScheduler then with BuildFactory and BuildConfig shared,
 instead of needing one BuildFactory and BuildConfig per branch.  Also
 makes the buildbot web interface less cluttered as we don't have a
 insane amount of BuildConfigs any more.  And saves some resources
 (bandwidth + diskspace) for the buildslaves.
 
 I think people who want to look what is coming or who want to test stuff
 cooking it would be a nice service too if they have a one-stop shop
 where they can get everything.

I sent a pull request that makes the BuildFactory definitions simpler
using a single create_build_factory() function:

https://github.com/b1-systems/buildbot/pull/1

Keep in mind that BuildFactories differ not just by repo/branch but
also:
 * in-tree or out-of-tree
 * extra ./configure arguments
 * gmake instead of make

I think this means it is not as simple as defining a single
BuildFactory.

Stefan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] QEMU buildbot maintenance state

2013-01-31 Thread Stefan Hajnoczi

On Wed, Jan 30, 2013 at 10:31:22AM +0100, Gerd Hoffmann wrote:
   Hi,
 
  Gerd: Are you willing to co-maintain the QEMU buildmaster with Daniel
  and Christian?  It would be awesome if you could do this given your
  experience running and customizing buildbot.
 
 I'll try to set aside some time for that.

Excellent, thank you!

Stefan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] QEMU buildbot maintenance state

2013-01-31 Thread Christian Berendt


On 01/31/2013 01:54 PM, Stefan Hajnoczi wrote:

I sent a pull request that makes the BuildFactory definitions simpler
using a single create_build_factory() function:

https://github.com/b1-systems/buildbot/pull/1


Stefan, I'll have a look later this day.

Christian.

--
Christian Berendt
Tel.: +49-171-5542175
Mail: bere...@b1-systems.de

B1 Systems GmbH
Osterfeldstraße 7 / 85088 Vohburg / http://www.b1-systems.de
GF: Ralph Dehner / Unternehmenssitz: Vohburg / AG: Ingolstadt,HRB 3537
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter defined

2013-01-31 Thread Alexander Graf

On 30.01.2013, at 15:15, Bhushan Bharat-R65777 wrote:

 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:24 PM
 To: Bhushan Bharat-R65777
 Cc: Paul Mackerras; kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter defined

 On 17.01.2013, at 12:11, Bhushan Bharat-R65777 wrote:

 -Original Message-
 From: Paul Mackerras [mailto:pau...@samba.org]
 Sent: Thursday, January 17, 2013 12:53 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; ag...@suse.de;
 Bhushan Bharat-
 R65777
 Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter
 defined

 On Wed, Jan 16, 2013 at 01:54:42PM +0530, Bharat Bhushan wrote:
 This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
 ioctl support. Follow up patches will use this for setting up
 hardware breakpoints, watchpoints and software breakpoints.

 [snip]

 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
 index 453a10f..7d5a51c 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -1483,6 +1483,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct
 kvm_vcpu *vcpu,
 struct kvm_one_reg *reg)
   return r;
 }

 +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 +  struct kvm_guest_debug *dbg)
 +{
 + return -EINVAL;
 +}
 +
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
 kvm_fpu
 *fpu)  {
   return -ENOTSUPP;
 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
 index 934413c..4c94ca9 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -532,12 +532,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif  }

 -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 -struct kvm_guest_debug *dbg)
 -{
 - return -EINVAL;
 -}
 -

 This will break the build for non-book E machines, since
 kvm_arch_vcpu_ioctl_set_guest_debug() is referenced from generic code.
 You need to add it to arch/powerpc/kvm/book3s.c as well.

 right,  I will correct this.

 Would the implementation actually be different on booke vs book3s? My 
 feeling is
 that powerpc.c is actually the right place for this.

 I am not sure there will be anything common between book3s and booke. Should 
 we define the cpu specific function something like 
 kvm_ppc_vcpu_ioctl_set_guest_debug() for booke and book3s and call this new 
 defined function from kvm_arch_vcpu_ioctl_set_guest_debug() in powerpc.c ?

No, just put it into the subarch directories then :). No need to overengineer 
anything for now.

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register initialization earlier

2013-01-31 Thread Alexander Graf


On 30.01.2013, at 14:29, Mihai Caraman wrote:

 VCPU's MMUCFG register initialization should not depend on KVM_CAP_SW_TLB
 ioctl call. Move it earlier into tlb initalization phase.

Quite the contrary. The fact that there is an mfspr() in e500_mmu.c already 
tells us that the code is broken. The TLB guest code should only depend on 
input from the SW_TLB configuration. It's completely orthogonal to the host 
capabilities.


Alex

 
 Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
 ---
 arch/powerpc/kvm/e500_mmu.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
 index 5c44759..bb1b2b0 100644
 --- a/arch/powerpc/kvm/e500_mmu.c
 +++ b/arch/powerpc/kvm/e500_mmu.c
 @@ -692,8 +692,6 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
   vcpu_e500-gtlb_offset[0] = 0;
   vcpu_e500-gtlb_offset[1] = params.tlb_sizes[0];
 
 - vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
 -
   vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
   if (params.tlb_sizes[0] = 2048)
   vcpu-arch.tlbcfg[0] |= params.tlb_sizes[0];
 @@ -781,6 +779,8 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 
 *vcpu_e500)
   if (!vcpu_e500-g2h_tlb1_map)
   goto err;
 
 + vcpu-arch.mmucfg = mfspr(SPRN_MMUCFG)  ~MMUCFG_LPIDSIZE;
 +
   /* Init TLB configuration register */
   vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 -- 
 1.7.4.1
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/5] KVM: PPC: e500: Emulate TLBnPS registers

2013-01-31 Thread Alexander Graf


On 30.01.2013, at 14:29, Mihai Caraman wrote:

 Emulate TLBnPS registers which are available in MMU Architecture Version
 (MAV) 2.0.
 
 Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
 ---
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/kvm/e500.h |5 +
 arch/powerpc/kvm/e500_emulate.c |   10 ++
 arch/powerpc/kvm/e500_mmu.c |5 +
 4 files changed, 21 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h 
 b/arch/powerpc/include/asm/kvm_host.h
 index 8a72d59..88fcfe6 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -501,6 +501,7 @@ struct kvm_vcpu_arch {
   spinlock_t wdt_lock;
   struct timer_list wdt_timer;
   u32 tlbcfg[4];
 + u32 tlbps[4];
   u32 mmucfg;
   u32 epr;
   struct kvmppc_booke_debug_reg dbg_reg;
 diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
 index 41cefd4..b9f76d8 100644
 --- a/arch/powerpc/kvm/e500.h
 +++ b/arch/powerpc/kvm/e500.h
 @@ -303,4 +303,9 @@ static inline unsigned int get_tlbmiss_tid(struct 
 kvm_vcpu *vcpu)
 #define get_tlb_sts(gtlbe)  (MAS1_TS)
 #endif /* !BOOKE_HV */
 
 +static inline unsigned int has_mmu_v2(const struct kvm_vcpu *vcpu)

bool. Also rename it to is_... then.

 +{
 + return ((vcpu-arch.mmucfg  MMUCFG_MAVN) == MMUCFG_MAVN_V2);
 +}
 +
 #endif /* KVM_E500_H */
 diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
 index e78f353..5515dc5 100644
 --- a/arch/powerpc/kvm/e500_emulate.c
 +++ b/arch/powerpc/kvm/e500_emulate.c
 @@ -329,6 +329,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int 
 sprn, ulong *spr_val)
   *spr_val = vcpu-arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
   break;
 #endif
 + case SPRN_TLB0PS:
 + if (!has_mmu_v2(vcpu))
 + return EMULATE_FAIL;
 + *spr_val = vcpu-arch.tlbps[0];
 + break;
 + case SPRN_TLB1PS:
 + if (!has_mmu_v2(vcpu))
 + return EMULATE_FAIL;
 + *spr_val = vcpu-arch.tlbps[1];
 + break;
   default:
   emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
   }
 diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
 index bb1b2b0..129299a 100644
 --- a/arch/powerpc/kvm/e500_mmu.c
 +++ b/arch/powerpc/kvm/e500_mmu.c
 @@ -794,6 +794,11 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 
 *vcpu_e500)
   vcpu-arch.tlbcfg[1] |=
   vcpu_e500-gtlb_params[1].ways  TLBnCFG_ASSOC_SHIFT;
 
 + if (has_mmu_v2(vcpu)) {
 + vcpu-arch.tlbps[0] = mfspr(SPRN_TLB0PS);
 + vcpu-arch.tlbps[1] = mfspr(SPRN_TLB1PS);

So I suppose that means that user space doesn't tell us the possible TLB entry 
sizes through the SW_TLB config? Then we should add them there.

To not break untested code paths, we can still compare if the values user space 
asks for are identical to what physical hardware does. But eventually we 
shouldn't care.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 3/5] KVM: PPC: e500: Remove E.PT category from VCPUs

2013-01-31 Thread Alexander Graf


On 30.01.2013, at 14:29, Mihai Caraman wrote:

 Embedded.Page Table (E.PT) category in VMs requires indirect tlb entries
 emulation which is not supported yet. Configure TLBnCFG to remove E.PT
 category from VCPUs.
 
 Signed-off-by: Mihai Caraman mihai.cara...@freescale.com

Please do this in a separate function that you call from these locations. That 
way the code is self-documenting on what it actually does.

Also add a comment to this one function that removes E.PT related bits from 
TLBCFG that our _guest_ mmu emulation currently doesn't handle E.PT.


Alex

 ---
 arch/powerpc/kvm/e500_mmu.c |   10 ++
 1 files changed, 6 insertions(+), 4 deletions(-)
 
 diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
 index 129299a..9a1f7b7 100644
 --- a/arch/powerpc/kvm/e500_mmu.c
 +++ b/arch/powerpc/kvm/e500_mmu.c
 @@ -692,12 +692,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
   vcpu_e500-gtlb_offset[0] = 0;
   vcpu_e500-gtlb_offset[1] = params.tlb_sizes[0];
 
 - vcpu-arch.tlbcfg[0] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 + vcpu-arch.tlbcfg[0] =
 +   ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC | TLBnCFG_IND);
   if (params.tlb_sizes[0] = 2048)
   vcpu-arch.tlbcfg[0] |= params.tlb_sizes[0];
   vcpu-arch.tlbcfg[0] |= params.tlb_ways[0]  TLBnCFG_ASSOC_SHIFT;
 
 - vcpu-arch.tlbcfg[1] = ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 + vcpu-arch.tlbcfg[1] =
 +   ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC | TLBnCFG_IND);
   vcpu-arch.tlbcfg[1] |= params.tlb_sizes[1];
   vcpu-arch.tlbcfg[1] |= params.tlb_ways[1]  TLBnCFG_ASSOC_SHIFT;
 
 @@ -783,13 +785,13 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 
 *vcpu_e500)
 
   /* Init TLB configuration register */
   vcpu-arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) 
 -  ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 +  ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC | TLBnCFG_IND);
   vcpu-arch.tlbcfg[0] |= vcpu_e500-gtlb_params[0].entries;
   vcpu-arch.tlbcfg[0] |=
   vcpu_e500-gtlb_params[0].ways  TLBnCFG_ASSOC_SHIFT;
 
   vcpu-arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) 
 -  ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
 +  ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC | TLBnCFG_IND);
   vcpu-arch.tlbcfg[1] |= vcpu_e500-gtlb_params[1].entries;
   vcpu-arch.tlbcfg[1] |=
   vcpu_e500-gtlb_params[1].ways  TLBnCFG_ASSOC_SHIFT;
 -- 
 1.7.4.1
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/5] KVM: PPC: e500: Emulate EPTCFG register

2013-01-31 Thread Alexander Graf


On 30.01.2013, at 14:29, Mihai Caraman wrote:

 EPTCFG register defined by E.PT is accessed unconditionally by Linux guests
 in the presence of MAV 2.0. Emulate EPTCFG register now.
 
 Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
 ---
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/kvm/e500.h |6 ++
 arch/powerpc/kvm/e500_emulate.c |9 +
 arch/powerpc/kvm/e500_mmu.c |5 +
 4 files changed, 21 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h 
 b/arch/powerpc/include/asm/kvm_host.h
 index 88fcfe6..f480b20 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -503,6 +503,7 @@ struct kvm_vcpu_arch {
   u32 tlbcfg[4];
   u32 tlbps[4];
   u32 mmucfg;
 + u32 eptcfg;

This too needs to be settable through SW_TLB.

   u32 epr;
   struct kvmppc_booke_debug_reg dbg_reg;
 #endif
 diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
 index b9f76d8..983eb95 100644
 --- a/arch/powerpc/kvm/e500.h
 +++ b/arch/powerpc/kvm/e500.h
 @@ -308,4 +308,10 @@ static inline unsigned int has_mmu_v2(const struct 
 kvm_vcpu *vcpu)
   return ((vcpu-arch.mmucfg  MMUCFG_MAVN) == MMUCFG_MAVN_V2);
 }
 
 +static inline unsigned int supports_page_tables(const struct kvm_vcpu *vcpu)

bool again. Can we generalize this a bit more? How about a small framework that 
allows us to differentiate across e.XX features?

if (has_feature(vcpu, FEATURE_E_PT))
   ...


 +{
 + return ((vcpu-arch.tlbcfg[0]  TLBnCFG_IND)
 + || (vcpu-arch.tlbcfg[1]  TLBnCFG_IND));
 +}
 +
 #endif /* KVM_E500_H */
 diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
 index 5515dc5..493e231 100644
 --- a/arch/powerpc/kvm/e500_emulate.c
 +++ b/arch/powerpc/kvm/e500_emulate.c
 @@ -339,6 +339,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int 
 sprn, ulong *spr_val)
   return EMULATE_FAIL;
   *spr_val = vcpu-arch.tlbps[1];
   break;
 + case SPRN_EPTCFG:
 + if (!has_mmu_v2(vcpu))
 + return EMULATE_FAIL;
 + /*
 +  * Legacy Linux guests access EPTCFG register even if the E.PT
 +  * category is disabled in the VM. Give them a chance to live.
 +  */
 + *spr_val = vcpu-arch.eptcfg;
 + break;
   default:
   emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
   }
 diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
 index 9a1f7b7..199c11e 100644
 --- a/arch/powerpc/kvm/e500_mmu.c
 +++ b/arch/powerpc/kvm/e500_mmu.c
 @@ -799,6 +799,11 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 
 *vcpu_e500)
   if (has_mmu_v2(vcpu)) {
   vcpu-arch.tlbps[0] = mfspr(SPRN_TLB0PS);
   vcpu-arch.tlbps[1] = mfspr(SPRN_TLB1PS);
 +
 + if (supports_page_tables(vcpu))
 + vcpu-arch.eptcfg = mfspr(SPRN_EPTCFG);

Please don't introduce new mfspr()s here :). Just have user space set it.


Alex

 + else
 + vcpu-arch.eptcfg = 0;
   }
 
   kvmppc_recalc_tlb1map_range(vcpu_e500);
 -- 
 1.7.4.1
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 5/5] KVM: PPC: e500mc: Enable e6500 cores

2013-01-31 Thread Alexander Graf


On 30.01.2013, at 14:29, Mihai Caraman wrote:

 Extend processor compatibility names to e6500 cores.
 
 Signed-off-by: Mihai Caraman mihai.cara...@freescale.com

Looks good to me.

Reviewed-by: Alexander Graf ag...@suse.de


Alex

 ---
 arch/powerpc/kvm/e500mc.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
 index 1f89d26..6c87299 100644
 --- a/arch/powerpc/kvm/e500mc.c
 +++ b/arch/powerpc/kvm/e500mc.c
 @@ -172,6 +172,8 @@ int kvmppc_core_check_processor_compat(void)
   r = 0;
   else if (strcmp(cur_cpu_spec-cpu_name, e5500) == 0)
   r = 0;
 + else if (strcmp(cur_cpu_spec-cpu_name, e6500) == 0)
 + r = 0;
   else
   r = -ENOTSUPP;
 
 -- 
 1.7.4.1
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/5] KVM: PPC: e500: Emulate TLBnPS registers

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 14:24, Alexander Graf wrote:

 
 On 30.01.2013, at 14:29, Mihai Caraman wrote:
 
 Emulate TLBnPS registers which are available in MMU Architecture Version
 (MAV) 2.0.
 
 Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
 ---
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/kvm/e500.h |5 +
 arch/powerpc/kvm/e500_emulate.c |   10 ++
 arch/powerpc/kvm/e500_mmu.c |5 +
 4 files changed, 21 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h 
 b/arch/powerpc/include/asm/kvm_host.h
 index 8a72d59..88fcfe6 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -501,6 +501,7 @@ struct kvm_vcpu_arch {
  spinlock_t wdt_lock;
  struct timer_list wdt_timer;
  u32 tlbcfg[4];
 +u32 tlbps[4];
  u32 mmucfg;
  u32 epr;
  struct kvmppc_booke_debug_reg dbg_reg;
 diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
 index 41cefd4..b9f76d8 100644
 --- a/arch/powerpc/kvm/e500.h
 +++ b/arch/powerpc/kvm/e500.h
 @@ -303,4 +303,9 @@ static inline unsigned int get_tlbmiss_tid(struct 
 kvm_vcpu *vcpu)
 #define get_tlb_sts(gtlbe)  (MAS1_TS)
 #endif /* !BOOKE_HV */
 
 +static inline unsigned int has_mmu_v2(const struct kvm_vcpu *vcpu)
 
 bool. Also rename it to is_... then.

In light of the comment I did in a later patch, this too could be convert to 
feature flags.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] x86, apicv: Add Posted Interrupt supporting

2013-01-31 Thread Marcelo Tosatti

On Thu, Jan 31, 2013 at 11:43:48AM +0200, Gleb Natapov wrote:
 On Wed, Jan 30, 2013 at 09:03:11PM -0200, Marcelo Tosatti wrote:
  Posted interrupt patch:
  2) Must move IN_GUEST_MODE assignment after local_irq_disable, in
  vcpu_enter_guest function. Otherwise:
  
  cpu0vcpu1-cpu1
  
  vcpu-mode = IN_GUEST_MODE
  
  if IN_GUEST_MODE == true
  send IPI
  local_irq_disable
  
  PIR not transferred to VIRR, misses interrupt.
  
 cpu0 will set KVM_REQ_EVENT, so vmentry will be aborted after
 local_irq_disable() by -requests check.

Yes, but you don't want KVM_REQ_EVENT+kick. It defeats the purpose 
of posted interrupts. You want

if vcpu in guest mode
send posted interrupt IPI
else
KVM_REQ_EVENT+kick

  3) Must check outstanding PIR notification bit unconditionally on
  every VM-entry, because:
  
  1. local_irq_disable
  2. vcpu-mode = IN_GUEST_MODE
  3. vmenter
  4. vmexit
  5. vcpu-mode = OUTSIDE_GUEST_MODE
  
  If PIR-IPI-interrupt is sent between an event which triggers VM-exit
  (for example, an external interrupt due to a device), and step 5
  (assignment of vcpu-mode), the PIR-VIRR transfer before vmentry must
  be made.
 Not sure I understand, but I think KVM_REQ_EVENT will cover that too.

See above.

  
  4) Today, an interrupt notification is cached on IRR until its delivered - 
  further
  interrupt injection is not generating further interrupt notification
  bits. With PIR, behaviour changes: Its possible to have one bit in PIR and 
  another 
  on IRR APIC page (if timing is right). Is this harmless? Why?
  
  
 
 --
   Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] x86, apicv: Add Posted Interrupt supporting

2013-01-31 Thread Gleb Natapov

On Thu, Jan 31, 2013 at 11:32:45AM -0200, Marcelo Tosatti wrote:
 On Thu, Jan 31, 2013 at 11:43:48AM +0200, Gleb Natapov wrote:
  On Wed, Jan 30, 2013 at 09:03:11PM -0200, Marcelo Tosatti wrote:
   Posted interrupt patch:
   2) Must move IN_GUEST_MODE assignment after local_irq_disable, in
   vcpu_enter_guest function. Otherwise:
   
   cpu0  vcpu1-cpu1
   
 vcpu-mode = IN_GUEST_MODE
   
   if IN_GUEST_MODE == true
 send IPI
 local_irq_disable
   
   PIR not transferred to VIRR, misses interrupt.
   
  cpu0 will set KVM_REQ_EVENT, so vmentry will be aborted after
  local_irq_disable() by -requests check.
 
 Yes, but you don't want KVM_REQ_EVENT+kick. It defeats the purpose 
 of posted interrupts. You want
 
 if vcpu in guest mode
   send posted interrupt IPI
 else
   KVM_REQ_EVENT+kick
 
I am thinking:

 set KVM_REQ_EVENT
 if pi is enabled
 send posted interrupt IPI
 else
 kick

   3) Must check outstanding PIR notification bit unconditionally on
   every VM-entry, because:
   
 1. local_irq_disable
 2. vcpu-mode = IN_GUEST_MODE
 3. vmenter
 4. vmexit
 5. vcpu-mode = OUTSIDE_GUEST_MODE
   
   If PIR-IPI-interrupt is sent between an event which triggers VM-exit
   (for example, an external interrupt due to a device), and step 5
   (assignment of vcpu-mode), the PIR-VIRR transfer before vmentry must
   be made.
  Not sure I understand, but I think KVM_REQ_EVENT will cover that too.
 
 See above.
 
   
   4) Today, an interrupt notification is cached on IRR until its delivered 
   - further
   interrupt injection is not generating further interrupt notification
   bits. With PIR, behaviour changes: Its possible to have one bit in PIR 
   and another 
   on IRR APIC page (if timing is right). Is this harmless? Why?
   
   
  
  --
  Gleb.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] x86, apicv: Add Posted Interrupt supporting

2013-01-31 Thread Marcelo Tosatti

On Thu, Jan 31, 2013 at 03:38:37PM +0200, Gleb Natapov wrote:
 On Thu, Jan 31, 2013 at 11:32:45AM -0200, Marcelo Tosatti wrote:
  On Thu, Jan 31, 2013 at 11:43:48AM +0200, Gleb Natapov wrote:
   On Wed, Jan 30, 2013 at 09:03:11PM -0200, Marcelo Tosatti wrote:
Posted interrupt patch:
2) Must move IN_GUEST_MODE assignment after local_irq_disable, in
vcpu_enter_guest function. Otherwise:

cpu0vcpu1-cpu1

vcpu-mode = IN_GUEST_MODE

if IN_GUEST_MODE == true
send IPI
local_irq_disable

PIR not transferred to VIRR, misses interrupt.

   cpu0 will set KVM_REQ_EVENT, so vmentry will be aborted after
   local_irq_disable() by -requests check.
  
  Yes, but you don't want KVM_REQ_EVENT+kick. It defeats the purpose 
  of posted interrupts. You want
  
  if vcpu in guest mode
  send posted interrupt IPI
  else
  KVM_REQ_EVENT+kick
  
 I am thinking:
 
  set KVM_REQ_EVENT
  if pi is enabled
  send posted interrupt IPI
  else
  kick

KVM_REQ_EVENT must be after sending posted interrupt IPI. Otherwise on
the vcpu entry side

test_and_clear(KVM_REQ_EVENT) {
No bits set in PIR
   }

What about item 4 below?

3) Must check outstanding PIR notification bit unconditionally on
every VM-entry, because:

1. local_irq_disable
2. vcpu-mode = IN_GUEST_MODE
3. vmenter
4. vmexit
5. vcpu-mode = OUTSIDE_GUEST_MODE

If PIR-IPI-interrupt is sent between an event which triggers VM-exit
(for example, an external interrupt due to a device), and step 5
(assignment of vcpu-mode), the PIR-VIRR transfer before vmentry must
be made.
   Not sure I understand, but I think KVM_REQ_EVENT will cover that too.
  
  See above.
  

4) Today, an interrupt notification is cached on IRR until its 
delivered - further
interrupt injection is not generating further interrupt notification
bits. With PIR, behaviour changes: Its possible to have one bit in PIR 
and another 
on IRR APIC page (if timing is right). Is this harmless? Why?


   
   --
 Gleb.
 
 --
   Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH V4 00/22] Multiqueue virtio-net

2013-01-31 Thread Eric Blake

On 01/31/2013 12:00 AM, Jason Wang wrote:
 On 01/31/2013 02:29 AM, Eric Blake wrote:
 On 01/30/2013 04:12 AM, Jason Wang wrote:

 With this changes, user could start a multiqueue virtio-net device through

 ./qemu -netdev tap,id=hn0,queues=2,vhost=on -device 
 virtio-net-pci,netdev=hn0

 Management tools such as libvirt can pass multiple pre-created fds/vhostfds 
 through

 ./qemu -netdev tap,id=hn0,fds=X:Y,vhostfds=M:N -device 
 virtio-net-pci,netdev=hn0
 Do we really need specific fds= parsing, or can we reuse the existing
 -add-fd command line option to our advantage?  I guess what I'm asking
 is how hotplug will work; and if hotplug takes a file name, shouldn't
 the command line also take a name; and if the command line takes a name,
 what's wrong with:

 ./qemu -add-fd fdset=1,fd=X -add-fd fdset=2,fd=Y -add-fd fdset=3,fd=M
 -add-fd fdset=4,fd=N -netdev
 tap,id=hn0,fds=/dev/fdset/1:/dev/fdset/2,vhostfds=/dev/fdset/3:/dev/fdset/4
 -device virtio-net-pci,netdev=hn0

 
 AFAIK, tap does not support fdset now, so this requirement is beyond the
 scope of multiqueue itself. We can do this in the future. Btw does
 libvirt support add-fd now?

Anything that uses qemu_open() supports fdset now.  The question I'm
asking is whether the command line has a way to pass /path/to/name
(which can be presented as /dev/fdset/nnn for add-fd usage) now, or
whether it only supports fds=integers.

 
 For hotplug, it just work if you pass multiple file descriptors one by
 one through getfd and then use fds=X:Y,vhostfds=M:N.

For hotplug, you can't pass integers; you have to name the fds either
way.  Either you name it with getfd, or you name it with add-fd.  But
getfd is not as nice as add-fd when it comes to ensuring that fds are
not leaked in qemu, even when the management app such as libvirt
restarts.  Furthermore, if it is possible to specify taps by pathname
instead of by fd inheritance, then using getfd means you have to support
two different approaches in QMP to distinguish which string is being
supplied, while supporting add-fd means you only have to support
qemu_open() which handles both direct names and fd passing in a single
string interface.

As for libvirt support of add-fd, I'm currently working with Stefan
Berger to get patches applied; the goal is tha libvirt 1.0.3 (end of
February) will support add-fd.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH V4 00/22] Multiqueue virtio-net

2013-01-31 Thread Michael S. Tsirkin

On Thu, Jan 31, 2013 at 06:44:49AM -0700, Eric Blake wrote:
 On 01/31/2013 12:00 AM, Jason Wang wrote:
  On 01/31/2013 02:29 AM, Eric Blake wrote:
  On 01/30/2013 04:12 AM, Jason Wang wrote:
 
  With this changes, user could start a multiqueue virtio-net device through
 
  ./qemu -netdev tap,id=hn0,queues=2,vhost=on -device 
  virtio-net-pci,netdev=hn0
 
  Management tools such as libvirt can pass multiple pre-created 
  fds/vhostfds through
 
  ./qemu -netdev tap,id=hn0,fds=X:Y,vhostfds=M:N -device 
  virtio-net-pci,netdev=hn0
  Do we really need specific fds= parsing, or can we reuse the existing
  -add-fd command line option to our advantage?  I guess what I'm asking
  is how hotplug will work; and if hotplug takes a file name, shouldn't
  the command line also take a name; and if the command line takes a name,
  what's wrong with:
 
  ./qemu -add-fd fdset=1,fd=X -add-fd fdset=2,fd=Y -add-fd fdset=3,fd=M
  -add-fd fdset=4,fd=N -netdev
  tap,id=hn0,fds=/dev/fdset/1:/dev/fdset/2,vhostfds=/dev/fdset/3:/dev/fdset/4
  -device virtio-net-pci,netdev=hn0
 
  
  AFAIK, tap does not support fdset now, so this requirement is beyond the
  scope of multiqueue itself. We can do this in the future. Btw does
  libvirt support add-fd now?
 
 Anything that uses qemu_open() supports fdset now.  The question I'm
 asking is whether the command line has a way to pass /path/to/name
 (which can be presented as /dev/fdset/nnn for add-fd usage) now, or
 whether it only supports fds=integers.
 
  
  For hotplug, it just work if you pass multiple file descriptors one by
  one through getfd and then use fds=X:Y,vhostfds=M:N.
 
 For hotplug, you can't pass integers; you have to name the fds either
 way.  Either you name it with getfd, or you name it with add-fd.  But
 getfd is not as nice as add-fd when it comes to ensuring that fds are
 not leaked in qemu, even when the management app such as libvirt
 restarts.  Furthermore, if it is possible to specify taps by pathname
 instead of by fd inheritance,

I don't think there's a way to specify taps by pathname.

 then using getfd means you have to support
 two different approaches in QMP to distinguish which string is being
 supplied, while supporting add-fd means you only have to support
 qemu_open() which handles both direct names and fd passing in a single
 string interface.
 
 As for libvirt support of add-fd, I'm currently working with Stefan
 Berger to get patches applied; the goal is tha libvirt 1.0.3 (end of
 February) will support add-fd.
 
 -- 
 Eric Blake   eblake redhat com+1-919-301-3266
 Libvirt virtualization library http://libvirt.org
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] x86, apicv: Add Posted Interrupt supporting

2013-01-31 Thread Gleb Natapov

On Thu, Jan 31, 2013 at 11:44:43AM -0200, Marcelo Tosatti wrote:
 On Thu, Jan 31, 2013 at 03:38:37PM +0200, Gleb Natapov wrote:
  On Thu, Jan 31, 2013 at 11:32:45AM -0200, Marcelo Tosatti wrote:
   On Thu, Jan 31, 2013 at 11:43:48AM +0200, Gleb Natapov wrote:
On Wed, Jan 30, 2013 at 09:03:11PM -0200, Marcelo Tosatti wrote:
 Posted interrupt patch:
 2) Must move IN_GUEST_MODE assignment after local_irq_disable, in
 vcpu_enter_guest function. Otherwise:
 
 cpu0  vcpu1-cpu1
 
   vcpu-mode = IN_GUEST_MODE
 
 if IN_GUEST_MODE == true
   send IPI
   local_irq_disable
 
 PIR not transferred to VIRR, misses interrupt.
 
cpu0 will set KVM_REQ_EVENT, so vmentry will be aborted after
local_irq_disable() by -requests check.
   
   Yes, but you don't want KVM_REQ_EVENT+kick. It defeats the purpose 
   of posted interrupts. You want
   
   if vcpu in guest mode
 send posted interrupt IPI
   else
 KVM_REQ_EVENT+kick
   
  I am thinking:
  
   set KVM_REQ_EVENT
   if pi is enabled
   send posted interrupt IPI
   else
   kick
 
 KVM_REQ_EVENT must be after sending posted interrupt IPI. Otherwise on
 the vcpu entry side
 
 test_and_clear(KVM_REQ_EVENT) {
   No bits set in PIR
}
 
It should be after updating PIR, but before sending posted interrupt
IPI. Otherwise:

 cpu0 cpu1/vcpu

  KVM_REQ_EVENT is not set
set pir 
send IPI
  irq_disable()
  -request is empty.
set KVM_REQ_EVENT

That's the same sequence as with IRR update, KVM_REQ_EVENT and kick
today.

 What about item 4 below?
 
That's for Yang to answer :)

 3) Must check outstanding PIR notification bit unconditionally on
 every VM-entry, because:
 
   1. local_irq_disable
   2. vcpu-mode = IN_GUEST_MODE
   3. vmenter
   4. vmexit
   5. vcpu-mode = OUTSIDE_GUEST_MODE
 
 If PIR-IPI-interrupt is sent between an event which triggers VM-exit
 (for example, an external interrupt due to a device), and step 5
 (assignment of vcpu-mode), the PIR-VIRR transfer before vmentry must
 be made.
Not sure I understand, but I think KVM_REQ_EVENT will cover that too.
   
   See above.
   
 
 4) Today, an interrupt notification is cached on IRR until its 
 delivered - further
 interrupt injection is not generating further interrupt notification
 bits. With PIR, behaviour changes: Its possible to have one bit in 
 PIR and another 
 on IRR APIC page (if timing is right). Is this harmless? Why?
 
 

--
Gleb.
  
  --
  Gleb.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 5/8] KVM: PPC: debug stub interface parameter defined

2013-01-31 Thread Bhushan Bharat-R65777

 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Alexander Graf
 Sent: Thursday, January 31, 2013 6:31 PM
 To: Bhushan Bharat-R65777
 Cc: Paul Mackerras; kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter defined

 On 30.01.2013, at 15:15, Bhushan Bharat-R65777 wrote:

  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Friday, January 25, 2013 5:24 PM
  To: Bhushan Bharat-R65777
  Cc: Paul Mackerras; kvm-...@vger.kernel.org; kvm@vger.kernel.org
  Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter
  defined

  On 17.01.2013, at 12:11, Bhushan Bharat-R65777 wrote:

  -Original Message-
  From: Paul Mackerras [mailto:pau...@samba.org]
  Sent: Thursday, January 17, 2013 12:53 PM
  To: Bhushan Bharat-R65777
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; ag...@suse.de;
  Bhushan Bharat-
  R65777
  Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter
  defined

  On Wed, Jan 16, 2013 at 01:54:42PM +0530, Bharat Bhushan wrote:
  This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
  ioctl support. Follow up patches will use this for setting up
  hardware breakpoints, watchpoints and software breakpoints.

  [snip]

  diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
  index 453a10f..7d5a51c 100644
  --- a/arch/powerpc/kvm/booke.c
  +++ b/arch/powerpc/kvm/booke.c
  @@ -1483,6 +1483,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct
  kvm_vcpu *vcpu,
  struct kvm_one_reg *reg)
  return r;
  }

  +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
  +struct kvm_guest_debug *dbg) {
  +   return -EINVAL;
  +}
  +
  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
  kvm_fpu
  *fpu)  {
  return -ENOTSUPP;
  diff --git a/arch/powerpc/kvm/powerpc.c
  b/arch/powerpc/kvm/powerpc.c index 934413c..4c94ca9 100644
  --- a/arch/powerpc/kvm/powerpc.c
  +++ b/arch/powerpc/kvm/powerpc.c
  @@ -532,12 +532,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  #endif  }

  -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
  -struct kvm_guest_debug *dbg)
  -{
  -   return -EINVAL;
  -}
  -

  This will break the build for non-book E machines, since
  kvm_arch_vcpu_ioctl_set_guest_debug() is referenced from generic code.
  You need to add it to arch/powerpc/kvm/book3s.c as well.

  right,  I will correct this.

  Would the implementation actually be different on booke vs book3s? My
  feeling is that powerpc.c is actually the right place for this.

  I am not sure there will be anything common between book3s and booke. Should
 we define the cpu specific function something like
 kvm_ppc_vcpu_ioctl_set_guest_debug() for booke and book3s and call this new
 defined function from kvm_arch_vcpu_ioctl_set_guest_debug() in powerpc.c ?

 No, just put it into the subarch directories then :). No need to overengineer
 anything for now.

What you mean by subarch?  Above you mentioned that powerpc.c is right place? 
Is not this patch is doing partially :)

Thanks
-Bharat

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V4 00/22] Multiqueue virtio-net

2013-01-31 Thread Michael S. Tsirkin

On Wed, Jan 30, 2013 at 07:12:19PM +0800, Jason Wang wrote:
 Hello all:
 
 This seires is an update of last version of multiqueue virtio-net support.
 
 This series tries to brings multiqueue support to virtio-net through a
 multiqueue support tap backend and multiple vhost threads.
 
 Patch 1 converts bitfield in TAPState to bool. Patch 2 replace assert(0) with
 abort() in tap.
 
 To support this, multiqueue nic support were added to qemu. This is done by
 introducing an array of NetClientStates in NICState, and make each pair of 
 peers
 to be an queue of the nic. This is done in patch 3-9.
 
 Tap were also converted to be able to create a multiple queue
 backend. Currently, only linux support this by issuing TUNSETIFF N times with
 the same device name to create N queues. Each fd returned by TUNSETIFF were a
 queue supported by kernel. Three new command lines were introduced, queues
 were used to tell how many queues will be created by qemu; fds were used to
 pass multiple pre-created tap file descriptors to qemu; vhostfds were used 
 to
 pass multiple pre-created vhost descriptors to qemu. This is done in patch 
 10-15.
 
 A method of deleting a queue and queue_index were also introduce for virtio,
 this is done in patch 16-17.
 
 Vhost were also changed to support multiqueue by introducing a start vq index
 which tracks the first virtqueue that will be used by vhost instead of the
 assumption that the vhost always use virtqueue from index 0. This is done in
 patch 18.
 
 The last part is the multiqueue userspace changes, this is done in patch 
 19-22.
 
 With this changes, user could start a multiqueue virtio-net device through
 
 ./qemu -netdev tap,id=hn0,queues=2,vhost=on -device virtio-net-pci,netdev=hn0
 
 Management tools such as libvirt can pass multiple pre-created fds/vhostfds 
 through
 
 ./qemu -netdev tap,id=hn0,fds=X:Y,vhostfds=M:N -device 
 virtio-net-pci,netdev=hn0
 
 For the one who wants to try, a git tree is available at:
 git://github.com/jasowang/qemu.git
 
 Changes from V3:
 - convert bitfield to bool in TAPState (Blue)
 - use abort() instead of assert(0) in tap code (Blue)
 - rebase to the latest
 - fix a bug that breaks the non-tap network

This conflicts with the pull request I sent, in partucular this adds a
layout assumption.  In the hope this will accelerate things, I did a
rebase and a trivial test with single queue only and it seems ok:

git://github.com/mstsirkin/qemu.git pci

There were some warnings about whitespace at EOF but
otherwise seems ok.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter defined

2013-01-31 Thread Alexander Graf

On 31.01.2013, at 15:05, Bhushan Bharat-R65777 wrote:

 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Alexander Graf
 Sent: Thursday, January 31, 2013 6:31 PM
 To: Bhushan Bharat-R65777
 Cc: Paul Mackerras; kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter defined

 On 30.01.2013, at 15:15, Bhushan Bharat-R65777 wrote:

 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:24 PM
 To: Bhushan Bharat-R65777
 Cc: Paul Mackerras; kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter
 defined

 On 17.01.2013, at 12:11, Bhushan Bharat-R65777 wrote:

 -Original Message-
 From: Paul Mackerras [mailto:pau...@samba.org]
 Sent: Thursday, January 17, 2013 12:53 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; ag...@suse.de;
 Bhushan Bharat-
 R65777
 Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter
 defined

 On Wed, Jan 16, 2013 at 01:54:42PM +0530, Bharat Bhushan wrote:
 This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
 ioctl support. Follow up patches will use this for setting up
 hardware breakpoints, watchpoints and software breakpoints.

 [snip]

 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
 index 453a10f..7d5a51c 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -1483,6 +1483,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct
 kvm_vcpu *vcpu,
 struct kvm_one_reg *reg)
 return r;
 }

 +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 +struct kvm_guest_debug *dbg) {
 +   return -EINVAL;
 +}
 +
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
 kvm_fpu
 *fpu)  {
 return -ENOTSUPP;
 diff --git a/arch/powerpc/kvm/powerpc.c
 b/arch/powerpc/kvm/powerpc.c index 934413c..4c94ca9 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -532,12 +532,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif  }

 -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 -struct kvm_guest_debug *dbg)
 -{
 -   return -EINVAL;
 -}
 -

 This will break the build for non-book E machines, since
 kvm_arch_vcpu_ioctl_set_guest_debug() is referenced from generic code.
 You need to add it to arch/powerpc/kvm/book3s.c as well.

 right,  I will correct this.

 Would the implementation actually be different on booke vs book3s? My
 feeling is that powerpc.c is actually the right place for this.

 I am not sure there will be anything common between book3s and booke. Should
 we define the cpu specific function something like
 kvm_ppc_vcpu_ioctl_set_guest_debug() for booke and book3s and call this new
 defined function from kvm_arch_vcpu_ioctl_set_guest_debug() in powerpc.c ?

 No, just put it into the subarch directories then :). No need to overengineer
 anything for now.

 What you mean by subarch?  Above you mentioned that powerpc.c is right place? 
 Is not this patch is doing partially :)

If the code in powerpc.c only says

void kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct 
kvm_guest_debug *dbg) {
kvmppc_core_set_guest_debug(vcpu, dbg);
}

then doing it in powerpc.c is obviously moot. Since there is no other debug 
implementation, it's ok if we try and find (and create) commonalities later. So 
yes, it's ok if you put it into booke.c or even e500.c. Just make sure to not 
break any other archs (440, book3s_pr, book3s_hv).

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V4 00/22] Multiqueue virtio-net

2013-01-31 Thread Michael S. Tsirkin

On Thu, Jan 31, 2013 at 04:21:49PM +0200, Michael S. Tsirkin wrote:
 On Wed, Jan 30, 2013 at 07:12:19PM +0800, Jason Wang wrote:
  Hello all:
  
  This seires is an update of last version of multiqueue virtio-net support.
  
  This series tries to brings multiqueue support to virtio-net through a
  multiqueue support tap backend and multiple vhost threads.
  
  Patch 1 converts bitfield in TAPState to bool. Patch 2 replace assert(0) 
  with
  abort() in tap.
  
  To support this, multiqueue nic support were added to qemu. This is done by
  introducing an array of NetClientStates in NICState, and make each pair of 
  peers
  to be an queue of the nic. This is done in patch 3-9.
  
  Tap were also converted to be able to create a multiple queue
  backend. Currently, only linux support this by issuing TUNSETIFF N times 
  with
  the same device name to create N queues. Each fd returned by TUNSETIFF were 
  a
  queue supported by kernel. Three new command lines were introduced, queues
  were used to tell how many queues will be created by qemu; fds were used 
  to
  pass multiple pre-created tap file descriptors to qemu; vhostfds were 
  used to
  pass multiple pre-created vhost descriptors to qemu. This is done in patch 
  10-15.
  
  A method of deleting a queue and queue_index were also introduce for virtio,
  this is done in patch 16-17.
  
  Vhost were also changed to support multiqueue by introducing a start vq 
  index
  which tracks the first virtqueue that will be used by vhost instead of the
  assumption that the vhost always use virtqueue from index 0. This is done in
  patch 18.
  
  The last part is the multiqueue userspace changes, this is done in patch 
  19-22.
  
  With this changes, user could start a multiqueue virtio-net device through
  
  ./qemu -netdev tap,id=hn0,queues=2,vhost=on -device 
  virtio-net-pci,netdev=hn0
  
  Management tools such as libvirt can pass multiple pre-created fds/vhostfds 
  through
  
  ./qemu -netdev tap,id=hn0,fds=X:Y,vhostfds=M:N -device 
  virtio-net-pci,netdev=hn0
  
  For the one who wants to try, a git tree is available at:
  git://github.com/jasowang/qemu.git
  
  Changes from V3:
  - convert bitfield to bool in TAPState (Blue)
  - use abort() instead of assert(0) in tap code (Blue)
  - rebase to the latest
  - fix a bug that breaks the non-tap network
 
 This conflicts with the pull request I sent, in partucular this adds a
 layout assumption.  In the hope this will accelerate things, I did a
 rebase and a trivial test with single queue only and it seems ok:
 
 git://github.com/mstsirkin/qemu.git pci
 
 There were some warnings about whitespace at EOF but
 otherwise seems ok.

Pushed to my pci branch on kernel.org too.

 -- 
 MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 5/8] KVM: PPC: debug stub interface parameter defined

2013-01-31 Thread Bhushan Bharat-R65777

 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
 Of
 Alexander Graf
 Sent: Thursday, January 31, 2013 7:58 PM
 To: Bhushan Bharat-R65777
 Cc: Paul Mackerras; kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter defined

 On 31.01.2013, at 15:05, Bhushan Bharat-R65777 wrote:

  -Original Message-
  From: kvm-ppc-ow...@vger.kernel.org
  [mailto:kvm-ppc-ow...@vger.kernel.org] On Behalf Of Alexander Graf
  Sent: Thursday, January 31, 2013 6:31 PM
  To: Bhushan Bharat-R65777
  Cc: Paul Mackerras; kvm-...@vger.kernel.org; kvm@vger.kernel.org
  Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter
  defined

  On 30.01.2013, at 15:15, Bhushan Bharat-R65777 wrote:

  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Friday, January 25, 2013 5:24 PM
  To: Bhushan Bharat-R65777
  Cc: Paul Mackerras; kvm-...@vger.kernel.org; kvm@vger.kernel.org
  Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter
  defined

  On 17.01.2013, at 12:11, Bhushan Bharat-R65777 wrote:

  -Original Message-
  From: Paul Mackerras [mailto:pau...@samba.org]
  Sent: Thursday, January 17, 2013 12:53 PM
  To: Bhushan Bharat-R65777
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; ag...@suse.de;
  Bhushan Bharat-
  R65777
  Subject: Re: [PATCH 5/8] KVM: PPC: debug stub interface parameter
  defined

  On Wed, Jan 16, 2013 at 01:54:42PM +0530, Bharat Bhushan wrote:
  This patch defines the interface parameter for
  KVM_SET_GUEST_DEBUG ioctl support. Follow up patches will use
  this for setting up hardware breakpoints, watchpoints and software
 breakpoints.

  [snip]

  diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
  index 453a10f..7d5a51c 100644
  --- a/arch/powerpc/kvm/booke.c
  +++ b/arch/powerpc/kvm/booke.c
  @@ -1483,6 +1483,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct
  kvm_vcpu *vcpu,
  struct kvm_one_reg *reg)
return r;
  }

  +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
  +  struct kvm_guest_debug *dbg) {
  + return -EINVAL;
  +}
  +
  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
  kvm_fpu
  *fpu)  {
return -ENOTSUPP;
  diff --git a/arch/powerpc/kvm/powerpc.c
  b/arch/powerpc/kvm/powerpc.c index 934413c..4c94ca9 100644
  --- a/arch/powerpc/kvm/powerpc.c
  +++ b/arch/powerpc/kvm/powerpc.c
  @@ -532,12 +532,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu
  *vcpu) #endif  }

  -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
  -struct kvm_guest_debug *dbg)
  -{
  - return -EINVAL;
  -}
  -

  This will break the build for non-book E machines, since
  kvm_arch_vcpu_ioctl_set_guest_debug() is referenced from generic code.
  You need to add it to arch/powerpc/kvm/book3s.c as well.

  right,  I will correct this.

  Would the implementation actually be different on booke vs book3s?
  My feeling is that powerpc.c is actually the right place for this.

  I am not sure there will be anything common between book3s and
  booke. Should
  we define the cpu specific function something like
  kvm_ppc_vcpu_ioctl_set_guest_debug() for booke and book3s and call
  this new defined function from kvm_arch_vcpu_ioctl_set_guest_debug() in
 powerpc.c ?

  No, just put it into the subarch directories then :). No need to
  overengineer anything for now.

  What you mean by subarch?  Above you mentioned that powerpc.c is right 
  place?
  Is not this patch is doing partially :)

 If the code in powerpc.c only says

 void kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct
 kvm_guest_debug *dbg) {
 kvmppc_core_set_guest_debug(vcpu, dbg); }

 then doing it in powerpc.c is obviously moot. Since there is no other debug
 implementation, it's ok if we try and find (and create) commonalities later.
 So
 yes, it's ok if you put it into booke.c or even e500.c. Just make sure to not
 break any other archs (440, book3s_pr, book3s_hv).

Right, yes I will correct that it compiles for all archs.

Thanks.
-Bharat

 Alex

 --
 To unsubscribe from this list: send the line unsubscribe kvm in the body of 
 a
 message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register initialization earlier

2013-01-31 Thread Caraman Mihai Claudiu-B02008

 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 3:21 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org
 Subject: Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register
 initialization earlier

 On 30.01.2013, at 14:29, Mihai Caraman wrote:

  VCPU's MMUCFG register initialization should not depend on
 KVM_CAP_SW_TLB
  ioctl call. Move it earlier into tlb initalization phase.

 Quite the contrary. The fact that there is an mfspr() in e500_mmu.c
 already tells us that the code is broken. The TLB guest code should only
 depend on input from the SW_TLB configuration. It's completely orthogonal
 to the host capabilities.

Then we have the same issue for TLBnCFG registers which need to be configured
via SW_TLB ioctl. What is the purpose of guest tlb initalization in e500_mmu.c
if we rely on SW_TLB?

-Mike

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register initialization earlier

2013-01-31 Thread Alexander Graf

On 31.01.2013, at 15:56, Caraman Mihai Claudiu-B02008 wrote:

 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 3:21 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org
 Subject: Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register
 initialization earlier

 On 30.01.2013, at 14:29, Mihai Caraman wrote:

 VCPU's MMUCFG register initialization should not depend on
 KVM_CAP_SW_TLB
 ioctl call. Move it earlier into tlb initalization phase.

 Quite the contrary. The fact that there is an mfspr() in e500_mmu.c
 already tells us that the code is broken. The TLB guest code should only
 depend on input from the SW_TLB configuration. It's completely orthogonal
 to the host capabilities.

 Then we have the same issue for TLBnCFG registers which need to be configured
 via SW_TLB ioctl. What is the purpose of guest tlb initalization in e500_mmu.c
 if we rely on SW_TLB?

It's to provide a fallback to user space that doesn't implement SW_TLB 
configuration yet.

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 4/5] KVM: PPC: e500: Emulate EPTCFG register

2013-01-31 Thread Caraman Mihai Claudiu-B02008

 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 3:31 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org
 Subject: Re: [PATCH 4/5] KVM: PPC: e500: Emulate EPTCFG register
 
 
 On 30.01.2013, at 14:29, Mihai Caraman wrote:
 
  EPTCFG register defined by E.PT is accessed unconditionally by Linux
 guests
  in the presence of MAV 2.0. Emulate EPTCFG register now.
 
  Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
  ---
  arch/powerpc/include/asm/kvm_host.h |1 +
  arch/powerpc/kvm/e500.h |6 ++
  arch/powerpc/kvm/e500_emulate.c |9 +
  arch/powerpc/kvm/e500_mmu.c |5 +
  4 files changed, 21 insertions(+), 0 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/kvm_host.h
 b/arch/powerpc/include/asm/kvm_host.h
  index 88fcfe6..f480b20 100644
  --- a/arch/powerpc/include/asm/kvm_host.h
  +++ b/arch/powerpc/include/asm/kvm_host.h
  @@ -503,6 +503,7 @@ struct kvm_vcpu_arch {
  u32 tlbcfg[4];
  u32 tlbps[4];
  u32 mmucfg;
  +   u32 eptcfg;
 
 This too needs to be settable through SW_TLB.
 
  u32 epr;
  struct kvmppc_booke_debug_reg dbg_reg;
  #endif
  diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
  index b9f76d8..983eb95 100644
  --- a/arch/powerpc/kvm/e500.h
  +++ b/arch/powerpc/kvm/e500.h
  @@ -308,4 +308,10 @@ static inline unsigned int has_mmu_v2(const struct
 kvm_vcpu *vcpu)
  return ((vcpu-arch.mmucfg  MMUCFG_MAVN) == MMUCFG_MAVN_V2);
  }
 
  +static inline unsigned int supports_page_tables(const struct kvm_vcpu
 *vcpu)
 
 bool again. Can we generalize this a bit more? How about a small
 framework that allows us to differentiate across e.XX features? 

I thought you will ask for it :)

-Mike

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH V4 00/22] Multiqueue virtio-net

2013-01-31 Thread Jason Wang

On 01/31/2013 09:44 PM, Eric Blake wrote:
 On 01/31/2013 12:00 AM, Jason Wang wrote:
 On 01/31/2013 02:29 AM, Eric Blake wrote:
 On 01/30/2013 04:12 AM, Jason Wang wrote:

 With this changes, user could start a multiqueue virtio-net device through

 ./qemu -netdev tap,id=hn0,queues=2,vhost=on -device 
 virtio-net-pci,netdev=hn0

 Management tools such as libvirt can pass multiple pre-created 
 fds/vhostfds through

 ./qemu -netdev tap,id=hn0,fds=X:Y,vhostfds=M:N -device 
 virtio-net-pci,netdev=hn0
 Do we really need specific fds= parsing, or can we reuse the existing
 -add-fd command line option to our advantage?  I guess what I'm asking
 is how hotplug will work; and if hotplug takes a file name, shouldn't
 the command line also take a name; and if the command line takes a name,
 what's wrong with:

 ./qemu -add-fd fdset=1,fd=X -add-fd fdset=2,fd=Y -add-fd fdset=3,fd=M
 -add-fd fdset=4,fd=N -netdev
 tap,id=hn0,fds=/dev/fdset/1:/dev/fdset/2,vhostfds=/dev/fdset/3:/dev/fdset/4
 -device virtio-net-pci,netdev=hn0

 AFAIK, tap does not support fdset now, so this requirement is beyond the
 scope of multiqueue itself. We can do this in the future. Btw does
 libvirt support add-fd now?
 Anything that uses qemu_open() supports fdset now.  The question I'm
 asking is whether the command line has a way to pass /path/to/name
 (which can be presented as /dev/fdset/nnn for add-fd usage) now, or
 whether it only supports fds=integers.

Nothing special with 'fds' and 'vhostfds', it just split the params by
':' and pass them one by one through monitor_handle_fd_param() just like
fd and vhostfd. So if 'fd' and 'vhostfd' supports /path/to/name, so
do 'fds' and 'vhostfds'.

So for command line, you do can pass /path/to/name to fd/vhostfd but it
won't work since monitor_handle_fd_param() can not handle it because 1)
it's not an integer 2) it was not named before. But for hotplug,
non-integers works since it has already named by getfd, so does fds and
vhostfds.

For management such as libvirt, what's needed is just to connect the
fdname with ':'.

 For hotplug, it just work if you pass multiple file descriptors one by
 one through getfd and then use fds=X:Y,vhostfds=M:N.
 For hotplug, you can't pass integers; you have to name the fds either
 way.  Either you name it with getfd, or you name it with add-fd.  But
 getfd is not as nice as add-fd when it comes to ensuring that fds are
 not leaked in qemu, even when the management app such as libvirt
 restarts.  Furthermore, if it is possible to specify taps by pathname
 instead of by fd inheritance, then using getfd means you have to support
 two different approaches in QMP to distinguish which string is being
 supplied, while supporting add-fd means you only have to support
 qemu_open() which handles both direct names and fd passing in a single
 string interface.

 As for libvirt support of add-fd, I'm currently working with Stefan
 Berger to get patches applied; the goal is tha libvirt 1.0.3 (end of
 February) will support add-fd.


Thanks, I know there are advantages of add-fd, but current tap does not
use qemu_open() which means it can't support fdset. We can add this in
the future.



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V4 00/22] Multiqueue virtio-net

2013-01-31 Thread Jason Wang

On 01/31/2013 10:36 PM, Michael S. Tsirkin wrote:
 On Thu, Jan 31, 2013 at 04:21:49PM +0200, Michael S. Tsirkin wrote:
 On Wed, Jan 30, 2013 at 07:12:19PM +0800, Jason Wang wrote:
 Hello all:

 This seires is an update of last version of multiqueue virtio-net support.

 This series tries to brings multiqueue support to virtio-net through a
 multiqueue support tap backend and multiple vhost threads.

 Patch 1 converts bitfield in TAPState to bool. Patch 2 replace assert(0) 
 with
 abort() in tap.

 To support this, multiqueue nic support were added to qemu. This is done by
 introducing an array of NetClientStates in NICState, and make each pair of 
 peers
 to be an queue of the nic. This is done in patch 3-9.

 Tap were also converted to be able to create a multiple queue
 backend. Currently, only linux support this by issuing TUNSETIFF N times 
 with
 the same device name to create N queues. Each fd returned by TUNSETIFF were 
 a
 queue supported by kernel. Three new command lines were introduced, queues
 were used to tell how many queues will be created by qemu; fds were used 
 to
 pass multiple pre-created tap file descriptors to qemu; vhostfds were 
 used to
 pass multiple pre-created vhost descriptors to qemu. This is done in patch 
 10-15.

 A method of deleting a queue and queue_index were also introduce for virtio,
 this is done in patch 16-17.

 Vhost were also changed to support multiqueue by introducing a start vq 
 index
 which tracks the first virtqueue that will be used by vhost instead of the
 assumption that the vhost always use virtqueue from index 0. This is done in
 patch 18.

 The last part is the multiqueue userspace changes, this is done in patch 
 19-22.

 With this changes, user could start a multiqueue virtio-net device through

 ./qemu -netdev tap,id=hn0,queues=2,vhost=on -device 
 virtio-net-pci,netdev=hn0

 Management tools such as libvirt can pass multiple pre-created fds/vhostfds 
 through

 ./qemu -netdev tap,id=hn0,fds=X:Y,vhostfds=M:N -device 
 virtio-net-pci,netdev=hn0

 For the one who wants to try, a git tree is available at:
 git://github.com/jasowang/qemu.git

 Changes from V3:
 - convert bitfield to bool in TAPState (Blue)
 - use abort() instead of assert(0) in tap code (Blue)
 - rebase to the latest
 - fix a bug that breaks the non-tap network
 This conflicts with the pull request I sent, in partucular this adds a
 layout assumption.  In the hope this will accelerate things, I did a
 rebase and a trivial test with single queue only and it seems ok:

 git://github.com/mstsirkin/qemu.git pci

 There were some warnings about whitespace at EOF but
 otherwise seems ok.
 Pushed to my pci branch on kernel.org too.

Tested with mq, it works well.

Thanks.

 -- 
 MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH V4 00/22] Multiqueue virtio-net

2013-01-31 Thread Eric Blake

On 01/31/2013 06:58 AM, Michael S. Tsirkin wrote:

 For hotplug, it just work if you pass multiple file descriptors one by
 one through getfd and then use fds=X:Y,vhostfds=M:N.

 For hotplug, you can't pass integers; you have to name the fds either
 way.  Either you name it with getfd, or you name it with add-fd.  But
 getfd is not as nice as add-fd when it comes to ensuring that fds are
 not leaked in qemu, even when the management app such as libvirt
 restarts.  Furthermore, if it is possible to specify taps by pathname
 instead of by fd inheritance,
 
 I don't think there's a way to specify taps by pathname.

Then using fds=integer:integer on the command line makes the most sense,
and QMP uses fds=name:name where name was specified by 'getfd', and
there is no way to wire up qemu_open() nor any need to use 'add-fd'.
Okay, my question has been answered, your approach looks right now that
I know more about how -netdev works to begin with.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

RE: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register initialization earlier

2013-01-31 Thread Caraman Mihai Claudiu-B02008

 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 4:58 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org
 Subject: Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register
 initialization earlier

 On 31.01.2013, at 15:56, Caraman Mihai Claudiu-B02008 wrote:

  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Thursday, January 31, 2013 3:21 PM
  To: Caraman Mihai Claudiu-B02008
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
  d...@lists.ozlabs.org
  Subject: Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register
  initialization earlier

  On 30.01.2013, at 14:29, Mihai Caraman wrote:

  VCPU's MMUCFG register initialization should not depend on
  KVM_CAP_SW_TLB
  ioctl call. Move it earlier into tlb initalization phase.

  Quite the contrary. The fact that there is an mfspr() in e500_mmu.c
  already tells us that the code is broken. The TLB guest code should
 only
  depend on input from the SW_TLB configuration. It's completely
 orthogonal
  to the host capabilities.

  Then we have the same issue for TLBnCFG registers which need to be
 configured
  via SW_TLB ioctl. What is the purpose of guest tlb initalization in
 e500_mmu.c
  if we rely on SW_TLB?

 It's to provide a fallback to user space that doesn't implement SW_TLB
 configuration yet.

Do we have such a case now or is it just hypothetical? For the fallback we
need to initialize the MMUCFG register which I intended to say in the commit
message.

 Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Listing on your website

2013-01-31 Thread Maryam Khan


Hi

Could you please let me know whether it is possible to list us on your 
website?


I recently sent an e-mail to yourselves to query whether it would be 
possible to do this but have not heard anything back yet?


We are Sirius Corporation - an Open Source services provider.

More details on us can be found on our website: 
http://www.siriusopensource.com/about


Please let me know and I can send you our logo and brief description.

Kind regards,
Maz

--
Maz Khan, Marketing and Sales Coordinator
Sirius - stress free technology
www.siriusopensource.com
Tel: +44 870 608 0063
Twitter: @SiriusCorp

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Alex Williamson


On Thu, 2013-01-31 at 12:49 +0200, Michael S. Tsirkin wrote:
 On Wed, Jan 30, 2013 at 04:28:30PM -0700, Alex Williamson wrote:
  On Thu, 2013-01-31 at 10:02 +1100, Benjamin Herrenschmidt wrote:
   On Thu, 2013-01-31 at 00:49 +0200, Michael S. Tsirkin wrote:
 In practice they do (VGA at least)
 
 From a SW modelling standpoint, I don't think it's worth
differentiating
 PCI and PCIE.
 
 Cheers,
 Ben.

Interesting.
Do you have such hardware? Could you please dump
the output of lspci -vv?
   
   Any ATI or nVidia card still supports hard decoding of VGA regions for
   the sake of legacy operating systems and BIOSes :-) I don't know about
   Intel but I suppose it's the same.
  
  For example:
  
  -[:00]-+-00.0  Advanced Micro Devices [AMD] nee ATI RD890 PCI to PCI 
  bridge (external gfx0 p
 +-04.0-[02]--+-00.0  Advanced Micro Devices [AMD] nee ATI Cedar 
  PRO [Radeon HD 5450/6350]
  
  00:04.0 PCI bridge: Advanced Micro Devices [AMD] nee ATI RD890 PCI to PCI 
  bridge (PCI express gpp port D) (prog-if 00 [Normal decode])
  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
  Stepping- SERR- FastB2B- DisINTx-
  Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- 
  MAbort- SERR- PERR- INTx-
  Latency: 0, Cache Line Size: 64 bytes
  Bus: primary=00, secondary=02, subordinate=02, sec-latency=0
  I/O behind bridge: c000-cfff
  Memory behind bridge: fd10-fd1f
  Prefetchable memory behind bridge: d000-dfff
  Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- 
  MAbort+ SERR- PERR-
  BridgeCtl: Parity- SERR- NoISA- VGA+ MAbort- Reset- FastB2B-

  VGA+ (VGA Enable) indicates positive decode of 0x3b0 - 0x3bb, 0x3c0 -
  0x3df, and 0xa - 0xbfff.  Device 2:00.0 of course doesn't report
  these ISA ranges as they're implicit in the VGA class code.
 
 OK but this appears behind a bridge.  So the bridge configuration tells
 the root complex where to send accesses to the VGA.
 
 But qemu currently puts devices directly on root bus.
 
 And as far as I can tell when we present devices directly on bus 0, we
 pretend these are integrated in the root complex. The spec seems to
 say explicitly that root complex integrated devices should not use legacy
 addresses or support hotplug. So I would be surprised if such one
 appears in real world.
 
 Luckily guests do not seem to be worried as long as we use ACPI.

Yes, in fact I just figured out last night that Windows is unhappy with
assigned PCI devices on bus 0 that claim to be an endpoint in their PCIe
capability rather than an integrated endpoint.  We'll need to do extra
mangling of the PCIe capability to massage it into the guest visible
topology.

Section 1.3.2.3 of the 3.0 spec says integrated endpoints must not
require I/O resources claimed through BAR(s).  VGA skirts around this by
not having the legacy resources claimed by BARs, but instead being
implicit.  Are there other sections restricting legacy I/O?

It's common that a plugin VGA card sits behind a root port where the
bridge registers tell us about VGA routing, but integrated VGA devices
are often on bus 0 though, here's an example:

-[:00]-+-00.0  Intel Corporation 2nd Generation Core Processor Family DRAM 
Controller
   +-02.0  Intel Corporation 2nd Generation Core Processor Family 
Integrated Graphics Controller

Often these systems will disable the integrated graphics when a plugin
graphics is installed below a root port.  I'm not sure how the system
knows to route VGA to the integrated device vs the root port otherwise.

Here's a more interesting example:

-+-[:01]-+-00.0  NVIDIA Corporation GT218 [GeForce G210M]
 |   \-00.1  NVIDIA Corporation High Definition Audio Controller
 \-[:00]-+-00.0  Intel Corporation Mobile 4 Series Chipset Memory 
Controller Hub
 +-01.0  Intel Corporation Mobile 4 Series Chipset PCI Express 
Graphics Port

This system seems to have two host bridges with VGA behind each of them.
There's no bridge to control VGA routing, so I don't know how the
selection is done.  It's possible the g210m never sees legacy VGA
accesses in this mode.  This bios has another mode which makes the g210m
the primary graphics and hides the integrated graphics, essentially the
same as I mention above with hiding integrated endpoint graphics when
plugin graphics are used.  Thanks,

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register initialization earlier

2013-01-31 Thread Scott Wood

On 01/31/2013 09:26:20 AM, Caraman Mihai Claudiu-B02008 wrote:

 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 4:58 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org
 Subject: Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG register
 initialization earlier

 On 31.01.2013, at 15:56, Caraman Mihai Claudiu-B02008 wrote:

  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Thursday, January 31, 2013 3:21 PM
  To: Caraman Mihai Claudiu-B02008
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
  d...@lists.ozlabs.org
  Subject: Re: [PATCH 1/5] KVM: PPC: e500: Move VCPU's MMUCFG  
register

  initialization earlier

  On 30.01.2013, at 14:29, Mihai Caraman wrote:

  VCPU's MMUCFG register initialization should not depend on
  KVM_CAP_SW_TLB
  ioctl call. Move it earlier into tlb initalization phase.

  Quite the contrary. The fact that there is an mfspr() in  
e500_mmu.c
  already tells us that the code is broken. The TLB guest code  
should

 only
  depend on input from the SW_TLB configuration. It's completely
 orthogonal
  to the host capabilities.

  Then we have the same issue for TLBnCFG registers which need to be
 configured
  via SW_TLB ioctl. What is the purpose of guest tlb initalization  
in

 e500_mmu.c
  if we rely on SW_TLB?

 It's to provide a fallback to user space that doesn't implement  
SW_TLB

 configuration yet.

Do we have such a case now or is it just hypothetical? For the  
fallback we
need to initialize the MMUCFG register which I intended to say in the  
commit

message.

I don't think we need to support a fallback for e6500, since there's  
nothing to be backwards compatible with.

As for use case, I don't see us ever supporting the guest being a  
different CPU than the host.  Page sizes probably aren't a problem, but  
there are other barriers.

The main reasons that TLBnCFG are settable through SW_TLB are:
1. The guest TLB can be enlarged as a performance hack (like in Topaz,  
though QEMU doesn't currently do this),
2. The legacy default in KVM is based on the e500v1 TLB0 size, which is  
half of what e500v2/e500mc have, and
3. QEMU needs to know the exact geometry of the TLB so that it can  
interpret the shared data properly.

#3 seems like a compelling reason here, to avoid silent weirdness if  
there's a slight mismatch between what QEMU thinks it's modelling and  
what we're actually running on.

-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 3/8] KVM: PPC: booke: Added debug handler

2013-01-31 Thread Bhushan Bharat-R65777



 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 5:47 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
 On 30.01.2013, at 12:30, Bhushan Bharat-R65777 wrote:
 
 
 
  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Friday, January 25, 2013 5:13 PM
  To: Bhushan Bharat-R65777
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan
  Bharat-R65777
  Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
  On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
  From: Bharat Bhushan bharat.bhus...@freescale.com
 
  Installed debug handler will be used for guest debug support and
  debug facility emulation features (patches for these features will
  follow this patch).
 
  Signed-off-by: Liu Yu yu@freescale.com
  [bharat.bhus...@freescale.com: Substantial changes]
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  arch/powerpc/include/asm/kvm_host.h |1 +
  arch/powerpc/kernel/asm-offsets.c   |1 +
  arch/powerpc/kvm/booke_interrupts.S |   49 
  ++---
 --
  3 files changed, 44 insertions(+), 7 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/kvm_host.h
  b/arch/powerpc/include/asm/kvm_host.h
  index 8a72d59..f4ba881 100644
  --- a/arch/powerpc/include/asm/kvm_host.h
  +++ b/arch/powerpc/include/asm/kvm_host.h
  @@ -503,6 +503,7 @@ struct kvm_vcpu_arch {
u32 tlbcfg[4];
u32 mmucfg;
u32 epr;
  + u32 crit_save;
struct kvmppc_booke_debug_reg dbg_reg; #endif
gpa_t paddr_accessed;
  diff --git a/arch/powerpc/kernel/asm-offsets.c
  b/arch/powerpc/kernel/asm-offsets.c
  index 46f6afd..02048f3 100644
  --- a/arch/powerpc/kernel/asm-offsets.c
  +++ b/arch/powerpc/kernel/asm-offsets.c
  @@ -562,6 +562,7 @@ int main(void)
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
  + DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
  #endif /* CONFIG_PPC_BOOK3S */
  #endif /* CONFIG_KVM */
 
  diff --git a/arch/powerpc/kvm/booke_interrupts.S
  b/arch/powerpc/kvm/booke_interrupts.S
  index eae8483..dd9c5d4 100644
  --- a/arch/powerpc/kvm/booke_interrupts.S
  +++ b/arch/powerpc/kvm/booke_interrupts.S
  @@ -52,12 +52,7 @@
(1BOOKE_INTERRUPT_PROGRAM) | \
(1BOOKE_INTERRUPT_DTLB_MISS))
 
  -.macro KVM_HANDLER ivor_nr scratch srr0
  -_GLOBAL(kvmppc_handler_\ivor_nr)
  - /* Get pointer to vcpu and record exit number. */
  - mtspr   \scratch , r4
  - mfspr   r4, SPRN_SPRG_THREAD
  - lwz r4, THREAD_KVM_VCPU(r4)
  +.macro __KVM_HANDLER ivor_nr scratch srr0
stw r3, VCPU_GPR(R3)(r4)
stw r5, VCPU_GPR(R5)(r4)
stw r6, VCPU_GPR(R6)(r4)
  @@ -74,6 +69,46 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
bctr
  .endm
 
  +.macro KVM_HANDLER ivor_nr scratch srr0
  +_GLOBAL(kvmppc_handler_\ivor_nr)
  + /* Get pointer to vcpu and record exit number. */
  + mtspr   \scratch , r4
  + mfspr   r4, SPRN_SPRG_THREAD
  + lwz r4, THREAD_KVM_VCPU(r4)
  + __KVM_HANDLER \ivor_nr \scratch \srr0 .endm
  +
  +.macro KVM_DBG_HANDLER ivor_nr scratch srr0
  +_GLOBAL(kvmppc_handler_\ivor_nr)
  + mtspr   \scratch, r4
  + mfspr   r4, SPRN_SPRG_THREAD
  + lwz r4, THREAD_KVM_VCPU(r4)
  + stw r3, VCPU_CRIT_SAVE(r4)
  + mfcrr3
  + mfspr   r4, SPRN_CSRR1
  + andi.   r4, r4, MSR_PR
  + bne 1f
 
 
  + /* debug interrupt happened in enter/exit path */
  + mfspr   r4, SPRN_CSRR1
  + rlwinm  r4, r4, 0, ~MSR_DE
  + mtspr   SPRN_CSRR1, r4
  + lis r4, 0x
  + ori r4, r4, 0x
  + mtspr   SPRN_DBSR, r4
  + mfspr   r4, SPRN_SPRG_THREAD
  + lwz r4, THREAD_KVM_VCPU(r4)
  + mtcrr3
  + lwz r3, VCPU_CRIT_SAVE(r4)
  + mfspr   r4, \scratch
  + rfci
 
  What is this part doing? Try to ignore the debug exit?
 
  As BOOKE doesn't have hardware support for virtualization, hardware never 
  know
 current pc is in guest or in host.
  So when enable hardware single step for guest, it cannot be disabled at the
 time guest exit. Thus, we'll see that an single step interrupt happens at the
 beginning of guest exit path.
 
  With the above code we recognize this kind of single step interrupt disable
 single step and rfci.
 
  Why would we have MSR_DE
  enabled in the first place when we can't handle it?
 
  When QEMU is using hardware debug resource then we always set MSR_DE during
 guest is running.
 
 Right, but why is MSR_DE enabled during the exit path? If MSR_DE wasn't set, 
 you
 wouldn't get a single step exit.

We always set MSR_DE in hw MSR when qemu using the debug resource.

 During the exit code path, you could then swap
 DBSR back to what the host expects (which means no single step). Only after 
 that
 enable MSR_DE again.

We

Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 17:58, Bhushan Bharat-R65777 wrote:

 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 5:47 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
 On 30.01.2013, at 12:30, Bhushan Bharat-R65777 wrote:
 
 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:13 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan
 Bharat-R65777
 Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
 On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 Installed debug handler will be used for guest debug support and
 debug facility emulation features (patches for these features will
 follow this patch).
 
 Signed-off-by: Liu Yu yu@freescale.com
 [bharat.bhus...@freescale.com: Substantial changes]
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/kernel/asm-offsets.c   |1 +
 arch/powerpc/kvm/booke_interrupts.S |   49 
 ++---
 --
 3 files changed, 44 insertions(+), 7 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h
 b/arch/powerpc/include/asm/kvm_host.h
 index 8a72d59..f4ba881 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -503,6 +503,7 @@ struct kvm_vcpu_arch {
   u32 tlbcfg[4];
   u32 mmucfg;
   u32 epr;
 + u32 crit_save;
   struct kvmppc_booke_debug_reg dbg_reg; #endif
   gpa_t paddr_accessed;
 diff --git a/arch/powerpc/kernel/asm-offsets.c
 b/arch/powerpc/kernel/asm-offsets.c
 index 46f6afd..02048f3 100644
 --- a/arch/powerpc/kernel/asm-offsets.c
 +++ b/arch/powerpc/kernel/asm-offsets.c
 @@ -562,6 +562,7 @@ int main(void)
   DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
   DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
   DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
 + DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* CONFIG_KVM */
 
 diff --git a/arch/powerpc/kvm/booke_interrupts.S
 b/arch/powerpc/kvm/booke_interrupts.S
 index eae8483..dd9c5d4 100644
 --- a/arch/powerpc/kvm/booke_interrupts.S
 +++ b/arch/powerpc/kvm/booke_interrupts.S
 @@ -52,12 +52,7 @@
  (1BOOKE_INTERRUPT_PROGRAM) | \
  (1BOOKE_INTERRUPT_DTLB_MISS))
 
 -.macro KVM_HANDLER ivor_nr scratch srr0
 -_GLOBAL(kvmppc_handler_\ivor_nr)
 - /* Get pointer to vcpu and record exit number. */
 - mtspr   \scratch , r4
 - mfspr   r4, SPRN_SPRG_THREAD
 - lwz r4, THREAD_KVM_VCPU(r4)
 +.macro __KVM_HANDLER ivor_nr scratch srr0
   stw r3, VCPU_GPR(R3)(r4)
   stw r5, VCPU_GPR(R5)(r4)
   stw r6, VCPU_GPR(R6)(r4)
 @@ -74,6 +69,46 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
   bctr
 .endm
 
 +.macro KVM_HANDLER ivor_nr scratch srr0
 +_GLOBAL(kvmppc_handler_\ivor_nr)
 + /* Get pointer to vcpu and record exit number. */
 + mtspr   \scratch , r4
 + mfspr   r4, SPRN_SPRG_THREAD
 + lwz r4, THREAD_KVM_VCPU(r4)
 + __KVM_HANDLER \ivor_nr \scratch \srr0 .endm
 +
 +.macro KVM_DBG_HANDLER ivor_nr scratch srr0
 +_GLOBAL(kvmppc_handler_\ivor_nr)
 + mtspr   \scratch, r4
 + mfspr   r4, SPRN_SPRG_THREAD
 + lwz r4, THREAD_KVM_VCPU(r4)
 + stw r3, VCPU_CRIT_SAVE(r4)
 + mfcrr3
 + mfspr   r4, SPRN_CSRR1
 + andi.   r4, r4, MSR_PR
 + bne 1f
 
 
 + /* debug interrupt happened in enter/exit path */
 + mfspr   r4, SPRN_CSRR1
 + rlwinm  r4, r4, 0, ~MSR_DE
 + mtspr   SPRN_CSRR1, r4
 + lis r4, 0x
 + ori r4, r4, 0x
 + mtspr   SPRN_DBSR, r4
 + mfspr   r4, SPRN_SPRG_THREAD
 + lwz r4, THREAD_KVM_VCPU(r4)
 + mtcrr3
 + lwz r3, VCPU_CRIT_SAVE(r4)
 + mfspr   r4, \scratch
 + rfci
 
 What is this part doing? Try to ignore the debug exit?
 
 As BOOKE doesn't have hardware support for virtualization, hardware never 
 know
 current pc is in guest or in host.
 So when enable hardware single step for guest, it cannot be disabled at the
 time guest exit. Thus, we'll see that an single step interrupt happens at the
 beginning of guest exit path.
 
 With the above code we recognize this kind of single step interrupt disable
 single step and rfci.
 
 Why would we have MSR_DE
 enabled in the first place when we can't handle it?
 
 When QEMU is using hardware debug resource then we always set MSR_DE during
 guest is running.
 
 Right, but why is MSR_DE enabled during the exit path? If MSR_DE wasn't set, 
 you
 wouldn't get a single step exit.
 
 We always set MSR_DE in hw MSR when qemu using the debug resource.

In the _guest_ MSR, yes. But once we exit the guest, it shouldn't be set 
anymore, because we're in an interrupt handler, no? Or is MSR_DE kept alive on 
interrupts?

 
 During the exit code path, you

Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 18:08, Alexander Graf wrote:

 
 On 31.01.2013, at 17:58, Bhushan Bharat-R65777 wrote:
 
 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 5:47 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
 On 30.01.2013, at 12:30, Bhushan Bharat-R65777 wrote:
 
 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:13 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan
 Bharat-R65777
 Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
 On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 Installed debug handler will be used for guest debug support and
 debug facility emulation features (patches for these features will
 follow this patch).
 
 Signed-off-by: Liu Yu yu@freescale.com
 [bharat.bhus...@freescale.com: Substantial changes]
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 arch/powerpc/include/asm/kvm_host.h |1 +
 arch/powerpc/kernel/asm-offsets.c   |1 +
 arch/powerpc/kvm/booke_interrupts.S |   49 
 ++---
 --
 3 files changed, 44 insertions(+), 7 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h
 b/arch/powerpc/include/asm/kvm_host.h
 index 8a72d59..f4ba881 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -503,6 +503,7 @@ struct kvm_vcpu_arch {
  u32 tlbcfg[4];
  u32 mmucfg;
  u32 epr;
 +u32 crit_save;
  struct kvmppc_booke_debug_reg dbg_reg; #endif
  gpa_t paddr_accessed;
 diff --git a/arch/powerpc/kernel/asm-offsets.c
 b/arch/powerpc/kernel/asm-offsets.c
 index 46f6afd..02048f3 100644
 --- a/arch/powerpc/kernel/asm-offsets.c
 +++ b/arch/powerpc/kernel/asm-offsets.c
 @@ -562,6 +562,7 @@ int main(void)
  DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
  DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
  DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
 +DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, 
 arch.crit_save));
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* CONFIG_KVM */
 
 diff --git a/arch/powerpc/kvm/booke_interrupts.S
 b/arch/powerpc/kvm/booke_interrupts.S
 index eae8483..dd9c5d4 100644
 --- a/arch/powerpc/kvm/booke_interrupts.S
 +++ b/arch/powerpc/kvm/booke_interrupts.S
 @@ -52,12 +52,7 @@
 (1BOOKE_INTERRUPT_PROGRAM) | \
 (1BOOKE_INTERRUPT_DTLB_MISS))
 
 -.macro KVM_HANDLER ivor_nr scratch srr0
 -_GLOBAL(kvmppc_handler_\ivor_nr)
 -/* Get pointer to vcpu and record exit number. */
 -mtspr   \scratch , r4
 -mfspr   r4, SPRN_SPRG_THREAD
 -lwz r4, THREAD_KVM_VCPU(r4)
 +.macro __KVM_HANDLER ivor_nr scratch srr0
  stw r3, VCPU_GPR(R3)(r4)
  stw r5, VCPU_GPR(R5)(r4)
  stw r6, VCPU_GPR(R6)(r4)
 @@ -74,6 +69,46 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
  bctr
 .endm
 
 +.macro KVM_HANDLER ivor_nr scratch srr0
 +_GLOBAL(kvmppc_handler_\ivor_nr)
 +/* Get pointer to vcpu and record exit number. */
 +mtspr   \scratch , r4
 +mfspr   r4, SPRN_SPRG_THREAD
 +lwz r4, THREAD_KVM_VCPU(r4)
 +__KVM_HANDLER \ivor_nr \scratch \srr0 .endm
 +
 +.macro KVM_DBG_HANDLER ivor_nr scratch srr0
 +_GLOBAL(kvmppc_handler_\ivor_nr)
 +mtspr   \scratch, r4
 +mfspr   r4, SPRN_SPRG_THREAD
 +lwz r4, THREAD_KVM_VCPU(r4)
 +stw r3, VCPU_CRIT_SAVE(r4)
 +mfcrr3
 +mfspr   r4, SPRN_CSRR1
 +andi.   r4, r4, MSR_PR
 +bne 1f
 
 
 +/* debug interrupt happened in enter/exit path */
 +mfspr   r4, SPRN_CSRR1
 +rlwinm  r4, r4, 0, ~MSR_DE
 +mtspr   SPRN_CSRR1, r4
 +lis r4, 0x
 +ori r4, r4, 0x
 +mtspr   SPRN_DBSR, r4
 +mfspr   r4, SPRN_SPRG_THREAD
 +lwz r4, THREAD_KVM_VCPU(r4)
 +mtcrr3
 +lwz r3, VCPU_CRIT_SAVE(r4)
 +mfspr   r4, \scratch
 +rfci
 
 What is this part doing? Try to ignore the debug exit?
 
 As BOOKE doesn't have hardware support for virtualization, hardware never 
 know
 current pc is in guest or in host.
 So when enable hardware single step for guest, it cannot be disabled at the
 time guest exit. Thus, we'll see that an single step interrupt happens at 
 the
 beginning of guest exit path.
 
 With the above code we recognize this kind of single step interrupt disable
 single step and rfci.
 
 Why would we have MSR_DE
 enabled in the first place when we can't handle it?
 
 When QEMU is using hardware debug resource then we always set MSR_DE during
 guest is running.
 
 Right, but why is MSR_DE enabled during the exit path? If MSR_DE wasn't 
 set, you
 wouldn't get a single step exit.
 
 We

Re: windows 2008 guest causing rcu_shed to emit NMI

2013-01-31 Thread Andrey Korolyov

On Thu, Jan 31, 2013 at 12:11 AM, Marcelo Tosatti mtosa...@redhat.com wrote:
 On Wed, Jan 30, 2013 at 11:21:08AM +0300, Andrey Korolyov wrote:
 On Wed, Jan 30, 2013 at 3:15 AM, Marcelo Tosatti mtosa...@redhat.com wrote:
  On Tue, Jan 29, 2013 at 02:35:02AM +0300, Andrey Korolyov wrote:
  On Mon, Jan 28, 2013 at 5:56 PM, Andrey Korolyov and...@xdel.ru wrote:
   On Mon, Jan 28, 2013 at 3:14 AM, Marcelo Tosatti mtosa...@redhat.com 
   wrote:
   On Mon, Jan 28, 2013 at 12:04:50AM +0300, Andrey Korolyov wrote:
   On Sat, Jan 26, 2013 at 12:49 AM, Marcelo Tosatti 
   mtosa...@redhat.com wrote:
On Fri, Jan 25, 2013 at 10:45:02AM +0300, Andrey Korolyov wrote:
On Thu, Jan 24, 2013 at 4:20 PM, Marcelo Tosatti 
mtosa...@redhat.com wrote:
 On Thu, Jan 24, 2013 at 01:54:03PM +0300, Andrey Korolyov wrote:
 Thank you Marcelo,

 Host node locking up sometimes later than yesterday, bur 
 problem still
 here, please see attached dmesg. Stuck process looks like
 root 19251  0.0  0.0 228476 12488 ?D14:42   0:00
 /usr/bin/kvm -no-user-config -device ? -device pci-assign,? 
 -device
 virtio-blk-pci,? -device

 on fourth vm by count.

 Should I try upstream kernel instead of applying patch to the 
 latest
 3.4 or it is useless?

 If you can upgrade to an upstream kernel, please do that.

   
With vanilla 3.7.4 there is almost no changes, and NMI started 
firing
again. External symptoms looks like following: starting from some
count, may be third or sixth vm, qemu-kvm process allocating its
memory very slowly and by jumps, 20M-200M-700M-1.6G in minutes. 
Patch
helps, of course - on both patched 3.4 and vanilla 3.7 I`m able to
kill stuck kvm processes and node returned back to the normal, 
when on
3.2 sending SIGKILL to the process causing zombies and hanged 
``ps''
output (problem and workaround when no scheduler involved described
here http://www.spinics.net/lists/kvm/msg84799.html).
   
Try disabling pause loop exiting with ple_gap=0 kvm-intel.ko module 
parameter.
   
  
   Hi Marcelo,
  
   thanks, this parameter helped to increase number of working VMs in a
   half of order of magnitude, from 3-4 to 10-15. Very high SY load, 10
   to 15 percents, persists on such numbers for a long time, where linux
   guests in same configuration do not jump over one percent even under
   stress bench. After I disabled HT, crash happens only in long runs and
   now it is kernel panic :)
   Stair-like memory allocation behaviour disappeared, but other symptom
   leading to the crash which I have not counted previously, persists: if
   VM count is ``enough'' for crash, some qemu processes starting to eat
   one core, and they`ll panic system after run in tens of minutes in
   such state or if I try to attach debugger to one of them. If needed, I
   can log entire crash output via netconsole, now I have some tail,
   almost the same every time:
   http://xdel.ru/downloads/btwin.png
  
   Yes, please log entire crash output, thanks.
  
  
   Here please, 3.7.4-vanilla, 16 vms, ple_gap=0:
  
   http://xdel.ru/downloads/oops-default-kvmintel.txt
 
  Just an update: I was able to reproduce that on pure linux VMs using
  qemu-1.3.0 and ``stress'' benchmark running on them - panic occurs at
  start of vm(with count ten working machines at the moment). Qemu-1.1.2
  generally is not able to reproduce that, but host node with older
  version crashing on less amount of Windows VMs(three to six instead
  ten to fifteen) than with 1.3, please see trace below:
 
  http://xdel.ru/downloads/oops-old-qemu.txt
 
  Single bit memory error, apparently. Try:
 
  1. memtest86.
  2. Boot with slub_debug=ZFPU kernel parameter.
  3. Reproduce on different machine
 
 

 Hi Marcelo,

 I always follow the rule - if some weird bug exists, check it on
 ECC-enabled machine and check IPMI logs too before start complaining
 :) I have finally managed to ``fix'' the problem, but my solution
 seems a bit strange:
 - I have noticed that if virtual machines started without any cgroup
 setting they will not cause this bug under any conditions,
 - I have thought, very wrong in my mind, that the
 CONFIG_SCHED_AUTOGROUP should regroup the tasks without any cgroup and
 should not touch tasks already inside any existing cpu cgroup. First
 sight on the 200-line patch shows that the autogrouping always applies
 to all tasks, so I tried to disable it,
 - wild magic appears - VMs didn`t crashed host any more, even in count
 30+ they work fine.
 I still don`t know what exactly triggered that and will I face it
 again under different conditions, so my solution more likely to be a
 patch of mud in wall of the dam, instead of proper fixing.

 There seems to be two possible origins of such error - a very very
 hideous race condition involving cgroups and processes like qemu-kvm
 causing frequent context switches and simple

RE: [PATCH 4/8] Added ONE_REG interface for debug instruction

2013-01-31 Thread Bhushan Bharat-R65777



 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:18 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 4/8] Added ONE_REG interface for debug instruction
 
 
 On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
  This patch adds the one_reg interface to get the special instruction
  to be used for setting software breakpoint from userspace.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  Documentation/virtual/kvm/api.txt   |1 +
  arch/powerpc/include/asm/kvm_ppc.h  |1 +
  arch/powerpc/include/uapi/asm/kvm.h |3 +++
  arch/powerpc/kvm/44x.c  |5 +
  arch/powerpc/kvm/booke.c|   10 ++
  arch/powerpc/kvm/e500.c |5 +
  arch/powerpc/kvm/e500.h |9 +
  arch/powerpc/kvm/e500mc.c   |5 +
  8 files changed, 39 insertions(+), 0 deletions(-)
 
  diff --git a/Documentation/virtual/kvm/api.txt
  b/Documentation/virtual/kvm/api.txt
  index 09905cb..7e8be9e 100644
  --- a/Documentation/virtual/kvm/api.txt
  +++ b/Documentation/virtual/kvm/api.txt
  @@ -1775,6 +1775,7 @@ registers, find a list below:
PPC   | KVM_REG_PPC_VPA_DTL   | 128
PPC   | KVM_REG_PPC_EPCR  | 32
PPC   | KVM_REG_PPC_EPR   | 32
  +  PPC   | KVM_REG_PPC_DEBUG_INST| 32
 
  4.69 KVM_GET_ONE_REG
 
  diff --git a/arch/powerpc/include/asm/kvm_ppc.h
  b/arch/powerpc/include/asm/kvm_ppc.h
  index 44a657a..b3c481e 100644
  --- a/arch/powerpc/include/asm/kvm_ppc.h
  +++ b/arch/powerpc/include/asm/kvm_ppc.h
  @@ -235,6 +235,7 @@ union kvmppc_one_reg {
 
  void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
  *sregs); int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct
  kvm_sregs *sregs);
  +u32 kvmppc_core_debug_inst_op(void);
 
  void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs
  *sregs); int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct
  kvm_sregs *sregs); diff --git a/arch/powerpc/include/uapi/asm/kvm.h
  b/arch/powerpc/include/uapi/asm/kvm.h
  index 16064d0..e81ae5b 100644
  --- a/arch/powerpc/include/uapi/asm/kvm.h
  +++ b/arch/powerpc/include/uapi/asm/kvm.h
  @@ -417,4 +417,7 @@ struct kvm_get_htab_header {
  #define KVM_REG_PPC_EPCR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
  #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
 
  +/* Debugging: Special instruction for software breakpoint */ #define
  +KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
  +
  #endif /* __LINUX_KVM_POWERPC_H */
  diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index
  3d7fd21..41501be 100644
  --- a/arch/powerpc/kvm/44x.c
  +++ b/arch/powerpc/kvm/44x.c
  @@ -114,6 +114,11 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
  return 0;
  }
 
  +u32 kvmppc_core_debug_inst_op(void)
  +{
  +   return -1;
  +}
  +
  void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
  *sregs) {
  kvmppc_get_sregs_ivor(vcpu, sregs);
  diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
  d2f502d..453a10f 100644
  --- a/arch/powerpc/kvm/booke.c
  +++ b/arch/powerpc/kvm/booke.c
 
 Please provide the DEBUG_INST on a more global level - across all ppc 
 subarchs.

Do you mean defining in powerpc.c ?

We are using one_reg for DEBUG_INST and one_reg_ioctl and defined in respective 
subarchs (booke and books have their separate handler). So how you want this to 
be defined in more common way for all subarchs?

Thanks
-Bharat

 
  @@ -1424,6 +1424,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
 struct kvm_one_reg *reg)
  r = put_user(vcpu-arch.epcr, (u32 __user *)(long)reg-addr);
  break;
  #endif
  +   case KVM_REG_PPC_DEBUG_INST: {
  +   u32 opcode = kvmppc_core_debug_inst_op();
  +   r = copy_to_user((u32 __user *)(long)reg-addr,
  +opcode, sizeof(u32));
  +   break;
  +   }
  default:
  break;
  }
  @@ -1467,6 +1473,10 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
 struct kvm_one_reg *reg)
  break;
  }
  #endif
  +   case KVM_REG_PPC_DEBUG_INST:
  +   /* This is read only, so write to this is nop*/
  +   r = 0;
  +   break;
 
 Just don't support set_one_reg on this reg.
 
  default:
  break;
  }
  diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index
  6dd4de7..d8a5e8e 100644
  --- a/arch/powerpc/kvm/e500.c
  +++ b/arch/powerpc/kvm/e500.c
  @@ -367,6 +367,11 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
  return 0;
  }
 
  +u32 kvmppc_core_debug_inst_op(void)
  +{
  +   return KVMPPC_INST_GUEST_GDB;
  +}
  +
  void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
  *sregs) {
  struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); diff --git
  a/arch/powerpc/kvm/e500.h

Re: [PATCH 4/8] Added ONE_REG interface for debug instruction

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 18:44, Bhushan Bharat-R65777 wrote:

 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:18 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 4/8] Added ONE_REG interface for debug instruction
 
 
 On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
 This patch adds the one_reg interface to get the special instruction
 to be used for setting software breakpoint from userspace.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 Documentation/virtual/kvm/api.txt   |1 +
 arch/powerpc/include/asm/kvm_ppc.h  |1 +
 arch/powerpc/include/uapi/asm/kvm.h |3 +++
 arch/powerpc/kvm/44x.c  |5 +
 arch/powerpc/kvm/booke.c|   10 ++
 arch/powerpc/kvm/e500.c |5 +
 arch/powerpc/kvm/e500.h |9 +
 arch/powerpc/kvm/e500mc.c   |5 +
 8 files changed, 39 insertions(+), 0 deletions(-)
 
 diff --git a/Documentation/virtual/kvm/api.txt
 b/Documentation/virtual/kvm/api.txt
 index 09905cb..7e8be9e 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -1775,6 +1775,7 @@ registers, find a list below:
  PPC   | KVM_REG_PPC_VPA_DTL   | 128
  PPC   | KVM_REG_PPC_EPCR   | 32
  PPC   | KVM_REG_PPC_EPR| 32
 +  PPC   | KVM_REG_PPC_DEBUG_INST| 32
 
 4.69 KVM_GET_ONE_REG
 
 diff --git a/arch/powerpc/include/asm/kvm_ppc.h
 b/arch/powerpc/include/asm/kvm_ppc.h
 index 44a657a..b3c481e 100644
 --- a/arch/powerpc/include/asm/kvm_ppc.h
 +++ b/arch/powerpc/include/asm/kvm_ppc.h
 @@ -235,6 +235,7 @@ union kvmppc_one_reg {
 
 void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
 *sregs); int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct
 kvm_sregs *sregs);
 +u32 kvmppc_core_debug_inst_op(void);
 
 void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs
 *sregs); int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct
 kvm_sregs *sregs); diff --git a/arch/powerpc/include/uapi/asm/kvm.h
 b/arch/powerpc/include/uapi/asm/kvm.h
 index 16064d0..e81ae5b 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -417,4 +417,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_EPCR(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
 #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
 
 +/* Debugging: Special instruction for software breakpoint */ #define
 +KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
 +
 #endif /* __LINUX_KVM_POWERPC_H */
 diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index
 3d7fd21..41501be 100644
 --- a/arch/powerpc/kvm/44x.c
 +++ b/arch/powerpc/kvm/44x.c
 @@ -114,6 +114,11 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 return 0;
 }
 
 +u32 kvmppc_core_debug_inst_op(void)
 +{
 +   return -1;

The way you handle it here this needs to be an  int 
kvmppc_core_debug_inst_op(u32 *inst) so you can return an error for 440. I 
don't think it's worth to worry about a case where we don't know about the inst 
though. Just return the same as what we use on e500v2 here.

 +}
 +
 void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
 *sregs) {
 kvmppc_get_sregs_ivor(vcpu, sregs);
 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
 d2f502d..453a10f 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 
 Please provide the DEBUG_INST on a more global level - across all ppc 
 subarchs.
 
 Do you mean defining in powerpc.c ?
 
 We are using one_reg for DEBUG_INST and one_reg_ioctl and defined in 
 respective subarchs (booke and books have their separate handler). So how you 
 want this to be defined in more common way for all subarchs?

Just add it to all subarch's one_reg handlers.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 4/8] Added ONE_REG interface for debug instruction

2013-01-31 Thread Bhushan Bharat-R65777



 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 11:23 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 4/8] Added ONE_REG interface for debug instruction
 
 
 On 31.01.2013, at 18:44, Bhushan Bharat-R65777 wrote:
 
 
 
  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Friday, January 25, 2013 5:18 PM
  To: Bhushan Bharat-R65777
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan
  Bharat-R65777
  Subject: Re: [PATCH 4/8] Added ONE_REG interface for debug
  instruction
 
 
  On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
  This patch adds the one_reg interface to get the special instruction
  to be used for setting software breakpoint from userspace.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  Documentation/virtual/kvm/api.txt   |1 +
  arch/powerpc/include/asm/kvm_ppc.h  |1 +
  arch/powerpc/include/uapi/asm/kvm.h |3 +++
  arch/powerpc/kvm/44x.c  |5 +
  arch/powerpc/kvm/booke.c|   10 ++
  arch/powerpc/kvm/e500.c |5 +
  arch/powerpc/kvm/e500.h |9 +
  arch/powerpc/kvm/e500mc.c   |5 +
  8 files changed, 39 insertions(+), 0 deletions(-)
 
  diff --git a/Documentation/virtual/kvm/api.txt
  b/Documentation/virtual/kvm/api.txt
  index 09905cb..7e8be9e 100644
  --- a/Documentation/virtual/kvm/api.txt
  +++ b/Documentation/virtual/kvm/api.txt
  @@ -1775,6 +1775,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_VPA_DTL   | 128
   PPC   | KVM_REG_PPC_EPCR | 32
   PPC   | KVM_REG_PPC_EPR  | 32
  +  PPC   | KVM_REG_PPC_DEBUG_INST| 32
 
  4.69 KVM_GET_ONE_REG
 
  diff --git a/arch/powerpc/include/asm/kvm_ppc.h
  b/arch/powerpc/include/asm/kvm_ppc.h
  index 44a657a..b3c481e 100644
  --- a/arch/powerpc/include/asm/kvm_ppc.h
  +++ b/arch/powerpc/include/asm/kvm_ppc.h
  @@ -235,6 +235,7 @@ union kvmppc_one_reg {
 
  void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
  *sregs); int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct
  kvm_sregs *sregs);
  +u32 kvmppc_core_debug_inst_op(void);
 
  void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs
  *sregs); int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct
  kvm_sregs *sregs); diff --git a/arch/powerpc/include/uapi/asm/kvm.h
  b/arch/powerpc/include/uapi/asm/kvm.h
  index 16064d0..e81ae5b 100644
  --- a/arch/powerpc/include/uapi/asm/kvm.h
  +++ b/arch/powerpc/include/uapi/asm/kvm.h
  @@ -417,4 +417,7 @@ struct kvm_get_htab_header {
  #define KVM_REG_PPC_EPCR  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
  #define KVM_REG_PPC_EPR   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
 
  +/* Debugging: Special instruction for software breakpoint */
  +#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 |
  +0x87)
  +
  #endif /* __LINUX_KVM_POWERPC_H */
  diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index
  3d7fd21..41501be 100644
  --- a/arch/powerpc/kvm/44x.c
  +++ b/arch/powerpc/kvm/44x.c
  @@ -114,6 +114,11 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
  }
 
  +u32 kvmppc_core_debug_inst_op(void) {
  + return -1;
 
 The way you handle it here this needs to be an  int
 kvmppc_core_debug_inst_op(u32 *inst) so you can return an error for 440. I 
 don't
 think it's worth to worry about a case where we don't know about the inst
 though. Just return the same as what we use on e500v2 here.
 
  +}
  +
  void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
  *sregs) {
kvmppc_get_sregs_ivor(vcpu, sregs); diff --git
  a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
  d2f502d..453a10f 100644
  --- a/arch/powerpc/kvm/booke.c
  +++ b/arch/powerpc/kvm/booke.c
 
  Please provide the DEBUG_INST on a more global level - across all ppc
 subarchs.
 
  Do you mean defining in powerpc.c ?
 
  We are using one_reg for DEBUG_INST and one_reg_ioctl and defined in
 respective subarchs (booke and books have their separate handler). So how you
 want this to be defined in more common way for all subarchs?
 
 Just add it to all subarch's one_reg handlers.

And what book3s etc should return?

-1 ? 

Thanks
-Bharat

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Bhushan Bharat-R65777



 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
 Of
 Alexander Graf
 Sent: Thursday, January 31, 2013 5:34 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to 
 guest
 
 
 On 30.01.2013, at 12:12, Bhushan Bharat-R65777 wrote:
 
 
 
  -Original Message-
  From: kvm-ppc-ow...@vger.kernel.org
  [mailto:kvm-ppc-ow...@vger.kernel.org] On Behalf Of Alexander Graf
  Sent: Friday, January 25, 2013 5:44 PM
  To: Bhushan Bharat-R65777
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan
  Bharat-R65777
  Subject: Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt
  injection to guest
 
 
  On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
  Allow userspace to inject debug interrupt to guest. QEMU can
 
  s/QEMU/user space.
 
  inject the debug interrupt to guest if it is not able to handle the
  debug interrupt.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  arch/powerpc/kvm/booke.c  |   32 +++-
  arch/powerpc/kvm/e500mc.c |   10 +-
  2 files changed, 40 insertions(+), 2 deletions(-)
 
  diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
  index faa0a0b..547797f 100644
  --- a/arch/powerpc/kvm/booke.c
  +++ b/arch/powerpc/kvm/booke.c
  @@ -133,6 +133,13 @@ static void kvmppc_vcpu_sync_fpu(struct
  kvm_vcpu
  *vcpu) #endif }
 
  +#ifdef CONFIG_KVM_BOOKE_HV
  +static int kvmppc_core_pending_debug(struct kvm_vcpu *vcpu) {
  + return test_bit(BOOKE_IRQPRIO_DEBUG,
  +vcpu-arch.pending_exceptions); } #endif
  +
  /*
  * Helper function for full MSR writes.  No need to call this if
  only
  * EE/CE/ME/DE/RI are changing.
  @@ -144,7 +151,11 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32
  new_msr) #ifdef CONFIG_KVM_BOOKE_HV
new_msr |= MSR_GS;
 
  - if (vcpu-guest_debug)
  + /*
  +  * Set MSR_DE if the hardware debug resources are owned by user-space
  +  * and there is no debug interrupt pending for guest to handle.
 
  Why?
 
  QEMU is using the IAC/DAC registers to set hardware breakpoint/watchpoints 
  via
 debug ioctls. As debug events are enabled/gated by MSR_DE so somehow we need 
 to
 set MSR_DE on hardware MSR when guest is running in this case.
 
 Reading this 5 times I still have no idea what you're really checking for 
 here.
 Maybe the naming for kvmppc_core_pending_debug is just unnatural? What does 
 that
 function do really?
 
 
  On bookehv this is how I am controlling the MSR_DE in hardware MSR.
 
  And why is this whole thing only executed on HV?
 
  On e500v2 we always enable MSR_DE using vcpu-arch.shadow_msr in
  e500.c #ifndef CONFIG_KVM_BOOKE_HV
  -   vcpu-arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
  +   vcpu-arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;


diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b340a62..1e2d663 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -151,10 +151,14 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)

/*
 * Set MSR_DE if the hardware debug resources are owned by user-space
-* and there is no debug interrupt pending for guest to handle.
 */
-   if (vcpu-guest_debug  !kvmppc_core_pending_debug(vcpu))
+   if (vcpu-guest_debug)
new_msr |= MSR_DE;
+#else
+   if (vcpu-guest_debug)
+   vcpu-arch.shadow_msr |= MSR_DE;
#endif

But do not when I should clear?

 
 Why? How is e500v2 any different wrt debug? And why wouldn't that work for
 e500mc?
 
 
 Alex
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in the body of 
 a
 message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Scott Wood


On 01/31/2013 06:04:29 AM, Alexander Graf wrote:


On 30.01.2013, at 12:12, Bhushan Bharat-R65777 wrote:

 On bookehv this is how I am controlling the MSR_DE in hardware MSR.

 And why is this whole thing only executed on HV?

 On e500v2 we always enable MSR_DE using vcpu-arch.shadow_msr in  
e500.c

 #ifndef CONFIG_KVM_BOOKE_HV
 -   vcpu-arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
 +   vcpu-arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;

Why? How is e500v2 any different wrt debug? And why wouldn't that  
work for e500mc?


shadow_msr isn't used at all on bookehv.

-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 18:59, Bhushan Bharat-R65777 wrote:

 
 
 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
 Of
 Alexander Graf
 Sent: Thursday, January 31, 2013 5:34 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to 
 guest
 
 
 On 30.01.2013, at 12:12, Bhushan Bharat-R65777 wrote:
 
 
 
 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org
 [mailto:kvm-ppc-ow...@vger.kernel.org] On Behalf Of Alexander Graf
 Sent: Friday, January 25, 2013 5:44 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan
 Bharat-R65777
 Subject: Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt
 injection to guest
 
 
 On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
 Allow userspace to inject debug interrupt to guest. QEMU can
 
 s/QEMU/user space.
 
 inject the debug interrupt to guest if it is not able to handle the
 debug interrupt.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 arch/powerpc/kvm/booke.c  |   32 +++-
 arch/powerpc/kvm/e500mc.c |   10 +-
 2 files changed, 40 insertions(+), 2 deletions(-)
 
 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
 index faa0a0b..547797f 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -133,6 +133,13 @@ static void kvmppc_vcpu_sync_fpu(struct
 kvm_vcpu
 *vcpu) #endif }
 
 +#ifdef CONFIG_KVM_BOOKE_HV
 +static int kvmppc_core_pending_debug(struct kvm_vcpu *vcpu) {
 + return test_bit(BOOKE_IRQPRIO_DEBUG,
 +vcpu-arch.pending_exceptions); } #endif
 +
 /*
 * Helper function for full MSR writes.  No need to call this if
 only
 * EE/CE/ME/DE/RI are changing.
 @@ -144,7 +151,11 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32
 new_msr) #ifdef CONFIG_KVM_BOOKE_HV
   new_msr |= MSR_GS;
 
 - if (vcpu-guest_debug)
 + /*
 +  * Set MSR_DE if the hardware debug resources are owned by user-space
 +  * and there is no debug interrupt pending for guest to handle.
 
 Why?
 
 QEMU is using the IAC/DAC registers to set hardware breakpoint/watchpoints 
 via
 debug ioctls. As debug events are enabled/gated by MSR_DE so somehow we need 
 to
 set MSR_DE on hardware MSR when guest is running in this case.
 
 Reading this 5 times I still have no idea what you're really checking for 
 here.
 Maybe the naming for kvmppc_core_pending_debug is just unnatural? What does 
 that
 function do really?
 
 
 On bookehv this is how I am controlling the MSR_DE in hardware MSR.
 
 And why is this whole thing only executed on HV?
 
 On e500v2 we always enable MSR_DE using vcpu-arch.shadow_msr in
 e500.c #ifndef CONFIG_KVM_BOOKE_HV
 -   vcpu-arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
 +   vcpu-arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
 
 
 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
 index b340a62..1e2d663 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -151,10 +151,14 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 
/*
 * Set MSR_DE if the hardware debug resources are owned by user-space
 -* and there is no debug interrupt pending for guest to handle.
 */
 -   if (vcpu-guest_debug  !kvmppc_core_pending_debug(vcpu))
 +   if (vcpu-guest_debug)
new_msr |= MSR_DE;
 +#else
 +   if (vcpu-guest_debug)
 +   vcpu-arch.shadow_msr |= MSR_DE;
 #endif
 
 But do not when I should clear?

How about something like this? Then both targets at least suck as much :).

Thanks to e500mc's awful hardware design, we don't know who sets the MSR_DE 
bit. Once we forced it onto the guest, we have no change to know whether the 
guest also set it or not. We could only guess.

So I would assume it's for the best to just treat both the same: always expose 
MSR_DE into guest visibility.

This will break when the guest disables MSR_DE. But I have no good idea on how 
to solve this properly - except for hypercalls to tell us that MSR_DE is set or 
not.

Scott, do you have an idea?


Alex


diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 38a62ef..3f8cbbd 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,19 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+   /* Force debug to on in guest space when user space wants to debug */
+   if (vcpu-guest_debug)
+   vcpu-arch.shared-msr |= MSR_DE;
+
+#if !defined(CONFIG_KVM_BOOKE_HV)
+   /* Synchronize MSR_DE into shadow MSR */
+   vcpu-arch.shadow_msr = ~MSR_DE;
+   vcpu-arch.shadow_msr |= vcpu-arch.shared-msr  MSR_DE;
+#endif
+}
+
 /*
  * Helper function for full MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -150,6 +163,7 @@ void kvmppc_set_msr(struct

Re: [PATCH 4/8] Added ONE_REG interface for debug instruction

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 18:58, Bhushan Bharat-R65777 wrote:

 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, January 31, 2013 11:23 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 4/8] Added ONE_REG interface for debug instruction
 
 
 On 31.01.2013, at 18:44, Bhushan Bharat-R65777 wrote:
 
 
 
 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:18 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan
 Bharat-R65777
 Subject: Re: [PATCH 4/8] Added ONE_REG interface for debug
 instruction
 
 
 On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
 This patch adds the one_reg interface to get the special instruction
 to be used for setting software breakpoint from userspace.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 Documentation/virtual/kvm/api.txt   |1 +
 arch/powerpc/include/asm/kvm_ppc.h  |1 +
 arch/powerpc/include/uapi/asm/kvm.h |3 +++
 arch/powerpc/kvm/44x.c  |5 +
 arch/powerpc/kvm/booke.c|   10 ++
 arch/powerpc/kvm/e500.c |5 +
 arch/powerpc/kvm/e500.h |9 +
 arch/powerpc/kvm/e500mc.c   |5 +
 8 files changed, 39 insertions(+), 0 deletions(-)
 
 diff --git a/Documentation/virtual/kvm/api.txt
 b/Documentation/virtual/kvm/api.txt
 index 09905cb..7e8be9e 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -1775,6 +1775,7 @@ registers, find a list below:
 PPC   | KVM_REG_PPC_VPA_DTL   | 128
 PPC   | KVM_REG_PPC_EPCR  | 32
 PPC   | KVM_REG_PPC_EPR   | 32
 +  PPC   | KVM_REG_PPC_DEBUG_INST| 32
 
 4.69 KVM_GET_ONE_REG
 
 diff --git a/arch/powerpc/include/asm/kvm_ppc.h
 b/arch/powerpc/include/asm/kvm_ppc.h
 index 44a657a..b3c481e 100644
 --- a/arch/powerpc/include/asm/kvm_ppc.h
 +++ b/arch/powerpc/include/asm/kvm_ppc.h
 @@ -235,6 +235,7 @@ union kvmppc_one_reg {
 
 void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
 *sregs); int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct
 kvm_sregs *sregs);
 +u32 kvmppc_core_debug_inst_op(void);
 
 void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs
 *sregs); int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct
 kvm_sregs *sregs); diff --git a/arch/powerpc/include/uapi/asm/kvm.h
 b/arch/powerpc/include/uapi/asm/kvm.h
 index 16064d0..e81ae5b 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -417,4 +417,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_EPCR  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
 #define KVM_REG_PPC_EPR   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
 
 +/* Debugging: Special instruction for software breakpoint */
 +#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 |
 +0x87)
 +
 #endif /* __LINUX_KVM_POWERPC_H */
 diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index
 3d7fd21..41501be 100644
 --- a/arch/powerpc/kvm/44x.c
 +++ b/arch/powerpc/kvm/44x.c
 @@ -114,6 +114,11 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
   return 0;
 }
 
 +u32 kvmppc_core_debug_inst_op(void) {
 + return -1;
 
 The way you handle it here this needs to be an  int
 kvmppc_core_debug_inst_op(u32 *inst) so you can return an error for 440. I 
 don't
 think it's worth to worry about a case where we don't know about the inst
 though. Just return the same as what we use on e500v2 here.
 
 +}
 +
 void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs
 *sregs) {
   kvmppc_get_sregs_ivor(vcpu, sregs); diff --git
 a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
 d2f502d..453a10f 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 
 Please provide the DEBUG_INST on a more global level - across all ppc
 subarchs.
 
 Do you mean defining in powerpc.c ?
 
 We are using one_reg for DEBUG_INST and one_reg_ioctl and defined in
 respective subarchs (booke and books have their separate handler). So how you
 want this to be defined in more common way for all subarchs?
 
 Just add it to all subarch's one_reg handlers.
 
 And what book3s etc should return?
 
 -1 ? 

trap maybe?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Scott Wood


On 01/31/2013 12:21:07 PM, Alexander Graf wrote:
How about something like this? Then both targets at least suck as  
much :).


I'm not sure that should be the goal...

Thanks to e500mc's awful hardware design, we don't know who sets the  
MSR_DE bit. Once we forced it onto the guest, we have no change to  
know whether the guest also set it or not. We could only guess.


MSRP[DEP] can prevent the guest from modifying MSR[DE] -- but we still  
need to set it in the first place.


According to ISA V2.06B, the hypervisor should set DBCR0[EDM] to let  
the guest know that the debug resources are not available, and that  
the value of MSR[DE] is not specified and not modifiable.


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] What to do about non-qdevified devices?

2013-01-31 Thread Markus Armbruster

Andreas Färber afaer...@suse.de writes:

 Am 30.01.2013 13:35, schrieb Markus Armbruster:
 Peter Maydell peter.mayd...@linaro.org writes:
 
 On 30 January 2013 07:02, Markus Armbruster arm...@redhat.com wrote:
 Anthony Liguori aligu...@us.ibm.com writes:

 [...]
 The problems I ran into were (1) this is a lot of work (2) it basically
 requires that all bus children have been qdev/QOM-ified.  Even with
 something like the ISA bus which is where I started, quite a few devices
 were not qdevified still.

 So what's the plan to complete the qdevification job?  Lay really low
 and quietly hope the problem goes away?  We've tried that for about
 three years, doesn't seem to work.

 Do we have a list of not-yet-qdevified devices? Maybe we need to
 start saying fix X Y and Z or platform P is dropped from the next
 release. (This would of course be easier if we had a way to let users
 know that platform P was in danger...)
 
 I think that's a good idea.  Only problem is identifying pre-qdev
 devices in the code requires code inspection (grep won't do, I'm
 afraid).

 +1 That would address my request as well.

 Having a list of low-hanging fruit on the Wiki might also give new
 contributors some ideas of where and how to start poking at the code.

 If we agree on a qdevify or else plan, I'd be prepared to help with
 the digging up of devices.

 I disagree on the or else part. I have been qdev'ifying and QOM'ifying
 devices in my maintenance area, and progress is slow. It gets even

Good work, much appreciated.

 slower if one leaves clearly maintained areas. I see no good reason to
 force a pistol on someone's breast, like you have done for IDE, unless
 there is a good reason to do so. Currently I don't see any.

There's the reason that made me hijack this thread.  Paraphrashing
Anthony: doing IRQs right involves Pin objects, and ultimately requires
all bus children have been qdevified.  Even for ISA, there are still
stragglers holding us back.

Is that sufficient reason to rip out devices *now*?  No, and I didn't
call for it.

Could it become sufficient reason in the not too distant future?
Possibly.  Should we plan ahead for such a contingency?  Probably.  But
I didn't call for that either.

What I actually wrote was 1. I think mapping the remaining qdevification
work is a good idea, and 2. if we commit to attempt doing that work in a
reasonable time frame, I'd be willing to help with the mapping.
Implying that without such a committment, sorry, got more immediately
useful things to do.

And by the way, the kind of pistol I get to brandish in this group is
about as scary as a water pistol in the middle of the Gobi desert.

[...]
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 19:43, Scott Wood wrote:

 On 01/31/2013 12:21:07 PM, Alexander Graf wrote:
 How about something like this? Then both targets at least suck as much :).
 
 I'm not sure that should be the goal...
 
 Thanks to e500mc's awful hardware design, we don't know who sets the MSR_DE 
 bit. Once we forced it onto the guest, we have no change to know whether the 
 guest also set it or not. We could only guess.
 
 MSRP[DEP] can prevent the guest from modifying MSR[DE] -- but we still need 
 to set it in the first place.
 
 According to ISA V2.06B, the hypervisor should set DBCR0[EDM] to let the 
 guest know that the debug resources are not available, and that the value of 
 MSR[DE] is not specified and not modifiable.

So what would the guest do then to tell the hypervisor that it actually wants 
to know about debug events?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Scott Wood


On 01/31/2013 12:52:41 PM, Alexander Graf wrote:


On 31.01.2013, at 19:43, Scott Wood wrote:

 On 01/31/2013 12:21:07 PM, Alexander Graf wrote:
 How about something like this? Then both targets at least suck as  
much :).


 I'm not sure that should be the goal...

 Thanks to e500mc's awful hardware design, we don't know who sets  
the MSR_DE bit. Once we forced it onto the guest, we have no change  
to know whether the guest also set it or not. We could only guess.


 MSRP[DEP] can prevent the guest from modifying MSR[DE] -- but we  
still need to set it in the first place.


 According to ISA V2.06B, the hypervisor should set DBCR0[EDM] to  
let the guest know that the debug resources are not available, and  
that the value of MSR[DE] is not specified and not modifiable.


So what would the guest do then to tell the hypervisor that it  
actually wants to know about debug events?


The guest is out of luck, just as if a JTAG were in use.

-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 19:54, Scott Wood wrote:

 On 01/31/2013 12:52:41 PM, Alexander Graf wrote:
 On 31.01.2013, at 19:43, Scott Wood wrote:
  On 01/31/2013 12:21:07 PM, Alexander Graf wrote:
  How about something like this? Then both targets at least suck as much :).
 
  I'm not sure that should be the goal...
 
  Thanks to e500mc's awful hardware design, we don't know who sets the 
  MSR_DE bit. Once we forced it onto the guest, we have no change to know 
  whether the guest also set it or not. We could only guess.
 
  MSRP[DEP] can prevent the guest from modifying MSR[DE] -- but we still 
  need to set it in the first place.
 
  According to ISA V2.06B, the hypervisor should set DBCR0[EDM] to let the 
  guest know that the debug resources are not available, and that the value 
  of MSR[DE] is not specified and not modifiable.
 So what would the guest do then to tell the hypervisor that it actually 
 wants to know about debug events?
 
 The guest is out of luck, just as if a JTAG were in use.

Hrm.

Can we somehow generalize this out of luck behavior?

Every time we would set or clear an MSR bit in shadow_msr on e500v2, we would 
instead set or clear it in the real MSR. That way only e500mc is out of luck, 
but the code would still be shared.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 20:05, Alexander Graf wrote:

 
 On 31.01.2013, at 19:54, Scott Wood wrote:
 
 On 01/31/2013 12:52:41 PM, Alexander Graf wrote:
 On 31.01.2013, at 19:43, Scott Wood wrote:
 On 01/31/2013 12:21:07 PM, Alexander Graf wrote:
 How about something like this? Then both targets at least suck as much :).
 
 I'm not sure that should be the goal...
 
 Thanks to e500mc's awful hardware design, we don't know who sets the 
 MSR_DE bit. Once we forced it onto the guest, we have no change to know 
 whether the guest also set it or not. We could only guess.
 
 MSRP[DEP] can prevent the guest from modifying MSR[DE] -- but we still 
 need to set it in the first place.
 
 According to ISA V2.06B, the hypervisor should set DBCR0[EDM] to let the 
 guest know that the debug resources are not available, and that the value 
 of MSR[DE] is not specified and not modifiable.
 So what would the guest do then to tell the hypervisor that it actually 
 wants to know about debug events?
 
 The guest is out of luck, just as if a JTAG were in use.
 
 Hrm.
 
 Can we somehow generalize this out of luck behavior?
 
 Every time we would set or clear an MSR bit in shadow_msr on e500v2, we would 
 instead set or clear it in the real MSR. That way only e500mc is out of luck, 
 but the code would still be shared.


Something like this. We could also define a SHADOW_MSR(vcpu) macro to hide the 
glorious details, but I think this way it's easier to understand what's going 
on.


Alex

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 38a62ef..9bdb845 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+   u32 is_debug = vcpu-arch.shared-msr  MSR_DE;
+
+   /* Force debug to on in guest space when user space wants to debug */
+   if (vcpu-guest_debug)
+   is_debug = MSR_DE;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+   /*
+* Since there is no shadow MSR, sync MSR_DE into the guest
+* visible MSR.
+*/
+   vcpu-arch.shared-msr = ~MSR_DE;
+   vcpu-arch.shared-msr |= is_debug;
+#endif
+
+#ifndef CONFIG_KVM_BOOKE_HV
+   vcpu-arch.shadow_msr = ~MSR_DE;
+   vcpu-arch.shadow_msr |= is_debug;
+#endif
+}
+
 /*
  * Helper function for full MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
kvmppc_mmu_msr_notify(vcpu, old_msr);
kvmppc_vcpu_sync_spe(vcpu);
kvmppc_vcpu_sync_fpu(vcpu);
+   kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: Remove duplicate text in api.txt

2013-01-31 Thread Geoff Levand

Signed-off-by: Geoff Levand ge...@infradead.org
---

Saw this in v3.8-rc5, please apply.

 Documentation/virtual/kvm/api.txt |   13 -
 1 file changed, 13 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index a4df553..a65a6b3 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -212,33 +212,20 @@ hardware requires all the hardware threads in a CPU core 
to be in the
 same partition.)  The KVM_CAP_PPC_SMT capability indicates the number
 of vcpus per virtual core (vcore).  The vcore id is obtained by
 dividing the vcpu id by the number of vcpus per vcore.  The vcpus in a
 given vcore will always be in the same physical core as each other
 (though that might be a different physical core from time to time).
 Userspace can control the threading (SMT) mode of the guest by its
 allocation of vcpu ids.  For example, if userspace wants
 single-threaded guest vcpus, it should make all vcpu ids be a multiple
 of the number of vcpus per vcore.
 
-On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
-threads in one or more virtual CPU cores.  (This is because the
-hardware requires all the hardware threads in a CPU core to be in the
-same partition.)  The KVM_CAP_PPC_SMT capability indicates the number
-of vcpus per virtual core (vcore).  The vcore id is obtained by
-dividing the vcpu id by the number of vcpus per vcore.  The vcpus in a
-given vcore will always be in the same physical core as each other
-(though that might be a different physical core from time to time).
-Userspace can control the threading (SMT) mode of the guest by its
-allocation of vcpu ids.  For example, if userspace wants
-single-threaded guest vcpus, it should make all vcpu ids be a multiple
-of the number of vcpus per vcore.
-
 For virtual cpus that have been created with S390 user controlled virtual
 machines, the resulting vcpu fd can be memory mapped at page offset
 KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
 cpu's hardware control block.
 
 
 4.8 KVM_GET_DIRTY_LOG (vm ioctl)
 
 Capability: basic
 Architectures: x86


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 02/18] KVM/MIPS32: Arch specific KVM data structures.

2013-01-31 Thread David Daney


On 11/21/2012 06:34 PM, Sanjay Lal wrote:

Signed-off-by: Sanjay Lal sanj...@kymasys.com
---
  arch/mips/include/asm/kvm.h  |  55 


asm/kvm.h defines the user space ABI, and thus should be placed in 
arch/mips/include/uapi/asm instead.





  arch/mips/include/asm/kvm_host.h | 669 +++
  2 files changed, 724 insertions(+)
  create mode 100644 arch/mips/include/asm/kvm.h
  create mode 100644 arch/mips/include/asm/kvm_host.h

diff --git a/arch/mips/include/asm/kvm.h b/arch/mips/include/asm/kvm.h
new file mode 100644
index 000..85789ea
--- /dev/null
+++ b/arch/mips/include/asm/kvm.h
@@ -0,0 +1,55 @@
+/*
+* This file is subject to the terms and conditions of the GNU General Public
+* License.  See the file COPYING in the main directory of this archive
+* for more details.
+*
+* Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
+* Authors: Sanjay Lal sanj...@kymasys.com
+*/
+
+#ifndef __LINUX_KVM_MIPS_H
+#define __LINUX_KVM_MIPS_H
+
+#include linux/types.h
+
+#define __KVM_MIPS
+
+#define N_MIPS_COPROC_REGS  32
+#define N_MIPS_COPROC_SEL  8
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+   __u32 gprs[32];


MIPS64 registers are 64 bits wide.  How is this going to work for MIPS64?

It seems a little important to answer this question as this is a 
userspace ABI that really cannot be changed once it is published.




+   __u32 hi;
+   __u32 lo;
+   __u32 pc;
+
+   __u32 cp0reg[N_MIPS_COPROC_REGS][N_MIPS_COPROC_SEL];


Do we really want CP0 regs in here?  Other architectures don't have 
things like this.  They use things like KVM_GET_MSRS and KVM_SET_MSRS 
for this.



+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {


This is a userspace ABI, and MIPS definitely has a FPU.  That means that 
we cannot change the definition after it is merged, but we know this 
must have the FPU registers in it.


So it cannot be both present and empty.



+};
+
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_mips_interrupt {
+   /* in */
+   __u32 cpu;
+   __u32 irq;
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+#endif /* __LINUX_KVM_MIPS_H */

[...]
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcm_vhost: Multi-target support

2013-01-31 Thread Nicholas A. Bellinger

On Thu, 2013-01-31 at 17:28 +0800, Asias He wrote:
 Hello Nicholas,
 
 On 01/31/2013 03:33 PM, Asias He wrote:
  In order to take advantages of Paolo's multi-queue virito-scsi, we need
  multi-target support in tcm_vhost first. Otherwise all the requests go
  to one queue and other queues are idle.
  
  This patch makes:
  
  1. All the targets under the wwpn is seen and can be used by guest.
  2. No need to pass the tpgt number in struct vhost_scsi_target to
 tcm_vhost.ko. Only wwpn is needed.
  3. We can always pass max_target = 255 to guest now, since we abort the
 request who's target id does not exist.
  
  Signed-off-by: Asias He as...@redhat.com
  ---
   drivers/vhost/tcm_vhost.c | 115 
  --
   drivers/vhost/tcm_vhost.h |   4 +-
   2 files changed, 74 insertions(+), 45 deletions(-)
  
  diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
  index 218deb6..d50cb95 100644
  --- a/drivers/vhost/tcm_vhost.c
  +++ b/drivers/vhost/tcm_vhost.c
  @@ -59,13 +59,18 @@ enum {
  VHOST_SCSI_VQ_IO = 2,
   };
   
  +#define VHOST_SCSI_MAX_TARGET 256
  +
   struct vhost_scsi {
  -   struct tcm_vhost_tpg *vs_tpg;   /* Protected by vhost_scsi-dev.mutex */
  +   /* Protected by vhost_scsi-dev.mutex */
  +   struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
  struct vhost_dev dev;
  struct vhost_virtqueue vqs[3];
   
  struct vhost_work vs_completion_work; /* cmd completion work item */
  struct llist_head vs_completion_list; /* cmd completion queue */
  +   char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
  +   int vs_num_target;
   };
   
   /* Local pointer to allocated TCM configfs fabric module */
  @@ -564,13 +569,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
  u32 exp_data_len, data_first, data_num, data_direction;
  unsigned out, in, i;
  int head, ret;
  -
  -   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
  -   tv_tpg = vs-vs_tpg;
  -   if (unlikely(!tv_tpg)) {
  -   pr_err(%s endpoint not set\n, __func__);
  -   return;
  -   }
  +   u8 target;
   
  mutex_lock(vq-mutex);
  vhost_disable_notify(vs-dev, vq);
  @@ -637,6 +636,35 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
  break;
  }
   
  +   /* Extract the tpgt */
  +   target = v_req.lun[1];
  +
  +   /* Target does not exit, fail the request */
  +   if (unlikely(target = vs-vs_num_target)) {
  +   struct virtio_scsi_cmd_resp __user *resp;
  +   struct virtio_scsi_cmd_resp rsp;
  +
  +   memset(rsp, 0, sizeof(rsp));
  +   rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
  +   resp = vq-iov[out].iov_base;
  +   ret = copy_to_user(resp, rsp, sizeof(rsp));
  +   if (!ret)
  +   vhost_add_used_and_signal(vs-dev,
  +   vs-vqs[2], head, 0);
  +   else
  +   pr_err(Faulted on virtio_scsi_cmd_resp\n);
  +
  +   continue;
  +   }
  +
  +   tv_tpg = vs-vs_tpg[target];
  +   if (unlikely(!tv_tpg)) {
  +   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
  +   pr_err(endpoint not set, target = %d\n, target);
  +   vhost_discard_vq_desc(vq, 1);
  +   break;
  +   }
  +
  exp_data_len = 0;
  for (i = 0; i  data_num; i++)
  exp_data_len += vq-iov[data_first + i].iov_len;
  @@ -771,14 +799,11 @@ static int vhost_scsi_set_endpoint(
  }
  tv_tport = tv_tpg-tport;
   
  -   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn) 
  -   (tv_tpg-tport_tpgt == t-vhost_tpgt)) {
  +   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
  tv_tpg-tv_tpg_vhost_count++;
  -   mutex_unlock(tv_tpg-tv_tpg_mutex);
  -   mutex_unlock(tcm_vhost_mutex);
   
  mutex_lock(vs-dev.mutex);
  -   if (vs-vs_tpg) {
  +   if (vs-vs_tpg[tv_tpg-tport_tpgt - 1]) {
  mutex_unlock(vs-dev.mutex);
  mutex_lock(tv_tpg-tv_tpg_mutex);
  tv_tpg-tv_tpg_vhost_count--;
  @@ -786,15 +811,17 @@ static int vhost_scsi_set_endpoint(
  return -EEXIST;
  }
   
  -   vs-vs_tpg = tv_tpg;
  +   vs-vs_tpg[tv_tpg-tport_tpgt - 1] = tv_tpg;
 
 
 tv_tpg-tport_tpgt starts from 0, right? I thought it starts from 1,
 because I always got it starts from 1 in targetcli.
 
 o- vhost
o- naa.6001405bd4e8476d
   o- tpg1
  o- luns
 o- lun0
   o- tpg2
  o- luns
 o- lun0
   o- tpg3
  o- luns
 o- lun0

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Michael S. Tsirkin

On Thu, Jan 31, 2013 at 09:34:03AM -0700, Alex Williamson wrote:
 
 On Thu, 2013-01-31 at 12:49 +0200, Michael S. Tsirkin wrote:
  On Wed, Jan 30, 2013 at 04:28:30PM -0700, Alex Williamson wrote:
   On Thu, 2013-01-31 at 10:02 +1100, Benjamin Herrenschmidt wrote:
On Thu, 2013-01-31 at 00:49 +0200, Michael S. Tsirkin wrote:
  In practice they do (VGA at least)
  
  From a SW modelling standpoint, I don't think it's worth
 differentiating
  PCI and PCIE.
  
  Cheers,
  Ben.
 
 Interesting.
 Do you have such hardware? Could you please dump
 the output of lspci -vv?

Any ATI or nVidia card still supports hard decoding of VGA regions for
the sake of legacy operating systems and BIOSes :-) I don't know about
Intel but I suppose it's the same.
   
   For example:
   
   -[:00]-+-00.0  Advanced Micro Devices [AMD] nee ATI RD890 PCI to PCI 
   bridge (external gfx0 p
  +-04.0-[02]--+-00.0  Advanced Micro Devices [AMD] nee ATI 
   Cedar PRO [Radeon HD 5450/6350]
   
   00:04.0 PCI bridge: Advanced Micro Devices [AMD] nee ATI RD890 PCI to PCI 
   bridge (PCI express gpp port D) (prog-if 00 [Normal decode])
 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
   Stepping- SERR- FastB2B- DisINTx-
 Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- 
   MAbort- SERR- PERR- INTx-
 Latency: 0, Cache Line Size: 64 bytes
 Bus: primary=00, secondary=02, subordinate=02, sec-latency=0
 I/O behind bridge: c000-cfff
 Memory behind bridge: fd10-fd1f
 Prefetchable memory behind bridge: d000-dfff
 Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- 
   MAbort+ SERR- PERR-
 BridgeCtl: Parity- SERR- NoISA- VGA+ MAbort- Reset- FastB2B-
 
   VGA+ (VGA Enable) indicates positive decode of 0x3b0 - 0x3bb, 0x3c0 -
   0x3df, and 0xa - 0xbfff.  Device 2:00.0 of course doesn't report
   these ISA ranges as they're implicit in the VGA class code.
  
  OK but this appears behind a bridge.  So the bridge configuration tells
  the root complex where to send accesses to the VGA.
  
  But qemu currently puts devices directly on root bus.
  
  And as far as I can tell when we present devices directly on bus 0, we
  pretend these are integrated in the root complex. The spec seems to
  say explicitly that root complex integrated devices should not use legacy
  addresses or support hotplug. So I would be surprised if such one
  appears in real world.
  
  Luckily guests do not seem to be worried as long as we use ACPI.
 
 Yes, in fact I just figured out last night that Windows is unhappy with
 assigned PCI devices on bus 0 that claim to be an endpoint in their PCIe
 capability rather than an integrated endpoint.  We'll need to do extra
 mangling of the PCIe capability to massage it into the guest visible
 topology.

For now, just put you device behind an express bridge. This breaks acpi
hotplug for now, but I'm looking into hotplug with bridges anyway.

If you really need it I can give you a hack for hotplug too.

Of course express  does not allow hotplug of root complex parts
but happens to work because we use ACPI.

 Section 1.3.2.3 of the 3.0 spec says integrated endpoints must not
 require I/O resources claimed through BAR(s).  VGA skirts around this by
 not having the legacy resources claimed by BARs, but instead being
 implicit.

Aha. I missed this point.

  Are there other sections restricting legacy I/O?

One other interesting things is that VGA enable bit (for bridge control
register) does not appear in express spec at all.

 It's common that a plugin VGA card sits behind a root port where the
 bridge registers tell us about VGA routing,
 but integrated VGA devices
 are often on bus 0 though, here's an example:
 
 -[:00]-+-00.0  Intel Corporation 2nd Generation Core Processor Family 
 DRAM Controller
+-02.0  Intel Corporation 2nd Generation Core Processor Family 
 Integrated Graphics Controller
 
 Often these systems will disable the integrated graphics when a plugin
 graphics is installed below a root port.  I'm not sure how the system
 knows to route VGA to the integrated device vs the root port otherwise.

I am guessing it disables the integrated graphics?

 Here's a more interesting example:
 
 -+-[:01]-+-00.0  NVIDIA Corporation GT218 [GeForce G210M]
  |   \-00.1  NVIDIA Corporation High Definition Audio Controller
  \-[:00]-+-00.0  Intel Corporation Mobile 4 Series Chipset Memory 
 Controller Hub
  +-01.0  Intel Corporation Mobile 4 Series Chipset PCI Express 
 Graphics Port
 
 This system seems to have two host bridges with VGA behind each of them.
 There's no bridge to control VGA routing, so I don't know how the
 selection is done.

Is IO space disabled for the inactive card? Maybe that is how.

  It's possible the g210m never sees

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Alex Williamson

On Thu, 2013-01-31 at 23:11 +0200, Michael S. Tsirkin wrote:
 On Thu, Jan 31, 2013 at 09:34:03AM -0700, Alex Williamson wrote:
  
  On Thu, 2013-01-31 at 12:49 +0200, Michael S. Tsirkin wrote:
   On Wed, Jan 30, 2013 at 04:28:30PM -0700, Alex Williamson wrote:
On Thu, 2013-01-31 at 10:02 +1100, Benjamin Herrenschmidt wrote:
 On Thu, 2013-01-31 at 00:49 +0200, Michael S. Tsirkin wrote:
   In practice they do (VGA at least)
   
   From a SW modelling standpoint, I don't think it's worth
  differentiating
   PCI and PCIE.
   
   Cheers,
   Ben.
  
  Interesting.
  Do you have such hardware? Could you please dump
  the output of lspci -vv?
 
 Any ATI or nVidia card still supports hard decoding of VGA regions for
 the sake of legacy operating systems and BIOSes :-) I don't know about
 Intel but I suppose it's the same.

For example:

-[:00]-+-00.0  Advanced Micro Devices [AMD] nee ATI RD890 PCI to 
PCI bridge (external gfx0 p
   +-04.0-[02]--+-00.0  Advanced Micro Devices [AMD] nee ATI 
Cedar PRO [Radeon HD 5450/6350]

00:04.0 PCI bridge: Advanced Micro Devices [AMD] nee ATI RD890 PCI to 
PCI bridge (PCI express gpp port D) (prog-if 00 [Normal decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- 
TAbort- MAbort- SERR- PERR- INTx-
Latency: 0, Cache Line Size: 64 bytes
Bus: primary=00, secondary=02, subordinate=02, sec-latency=0
I/O behind bridge: c000-cfff
Memory behind bridge: fd10-fd1f
Prefetchable memory behind bridge: 
d000-dfff
Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast TAbort- 
TAbort- MAbort+ SERR- PERR-
BridgeCtl: Parity- SERR- NoISA- VGA+ MAbort- Reset- FastB2B-
  
VGA+ (VGA Enable) indicates positive decode of 0x3b0 - 0x3bb, 0x3c0 -
0x3df, and 0xa - 0xbfff.  Device 2:00.0 of course doesn't report
these ISA ranges as they're implicit in the VGA class code.
   
   OK but this appears behind a bridge.  So the bridge configuration tells
   the root complex where to send accesses to the VGA.
   
   But qemu currently puts devices directly on root bus.
   
   And as far as I can tell when we present devices directly on bus 0, we
   pretend these are integrated in the root complex. The spec seems to
   say explicitly that root complex integrated devices should not use legacy
   addresses or support hotplug. So I would be surprised if such one
   appears in real world.
   
   Luckily guests do not seem to be worried as long as we use ACPI.
  
  Yes, in fact I just figured out last night that Windows is unhappy with
  assigned PCI devices on bus 0 that claim to be an endpoint in their PCIe
  capability rather than an integrated endpoint.  We'll need to do extra
  mangling of the PCIe capability to massage it into the guest visible
  topology.
 
 For now, just put you device behind an express bridge. This breaks acpi
 hotplug for now, but I'm looking into hotplug with bridges anyway.

We have the problem in both directions though, Endpoints that should be
Integrated Endpoints and Integrated Endpoints that should be Endpoints.
So I think we need to mangle the type.

 If you really need it I can give you a hack for hotplug too.
 
 Of course express  does not allow hotplug of root complex parts
 but happens to work because we use ACPI.

That's a little odd.

  Section 1.3.2.3 of the 3.0 spec says integrated endpoints must not
  require I/O resources claimed through BAR(s).  VGA skirts around this by
  not having the legacy resources claimed by BARs, but instead being
  implicit.
 
 Aha. I missed this point.
 
   Are there other sections restricting legacy I/O?
 
 One other interesting things is that VGA enable bit (for bridge control
 register) does not appear in express spec at all.

Yep, but it appears on hardware.

  It's common that a plugin VGA card sits behind a root port where the
  bridge registers tell us about VGA routing,
  but integrated VGA devices
  are often on bus 0 though, here's an example:
  
  -[:00]-+-00.0  Intel Corporation 2nd Generation Core Processor Family 
  DRAM Controller
 +-02.0  Intel Corporation 2nd Generation Core Processor Family 
  Integrated Graphics Controller
  
  Often these systems will disable the integrated graphics when a plugin
  graphics is installed below a root port.  I'm not sure how the system
  knows to route VGA to the integrated device vs the root port otherwise.
 
 I am guessing it disables the integrated graphics?
 
  Here's a more interesting example:
  
  -+-[:01]-+-00.0  NVIDIA Corporation GT218 [GeForce G210M]
   |   \-00.1  NVIDIA Corporation High Definition

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Benjamin Herrenschmidt

On Thu, 2013-01-31 at 12:49 +0200, Michael S. Tsirkin wrote:

 OK but this appears behind a bridge.  So the bridge configuration tells
 the root complex where to send accesses to the VGA.

Sort-of, again the root complex isn't sending anything targeted here.
PCIe is point to point and any device is behind a bridge, real or
virtual.

 But qemu currently puts devices directly on root bus.

Sure, because qemu doesn't specifically model PCIe but something else

 And as far as I can tell when we present devices directly on bus 0, we
 pretend these are integrated in the root complex.

Right, it's a bit gross.

  The spec seems to
 say explicitly that root complex integrated devices should not use legacy
 addresses or support hotplug. So I would be surprised if such one
 appears in real world.

Sure but that doesn't change the fact that there's no point in treating
things differently between PCI and PCIe for the sake of address range
decoding. The high level model remains the same.

 Luckily guests do not seem to be worried as long as we use ACPI.

Right, it all just looks like PCI to the guest anyway and is mostly
treated as such for the sake of routing and decoding (until you turn on
ARI but that's a different can of worms).

  BTW, I've been working on vfio-pci support of VGA assignment which makes
  use of the VGA arbiter in the host to manipulate the VGA Enable control
  register, allowing us to select which device to access.  The qemu side
  is simply registering memory regions for the VGA areas and expecting to
  be used with -vga none, but I'll adopt whatever strategy we choose for
  hard coded address range support.  Current base patches at the links
  below.  Thanks,
  
  Alex
  
  https://github.com/awilliam/qemu-vfio/commit/ea2befa59010a429dcf13c10dbccdf8b64e82fbd
  https://github.com/awilliam/linux-vfio/commit/bae182d929229cbf1eaeb01e5fad4f77f81a4c61


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Benjamin Herrenschmidt

On Thu, 2013-01-31 at 09:34 -0700, Alex Williamson wrote:
  Luckily guests do not seem to be worried as long as we use ACPI.
 
 Yes, in fact I just figured out last night that Windows is unhappy with
 assigned PCI devices on bus 0 that claim to be an endpoint in their PCIe
 capability rather than an integrated endpoint.  We'll need to do extra
 mangling of the PCIe capability to massage it into the guest visible
 topology.

If you are on bus 0, you need to either not have the capability, or if
you do, have it be root complex or RC intergrated endpoint. It's fair
game for any OS to assume that an endpoint will have a parent bridge
(either a RC or a downstream port) and to muck around with link control
etc...

Typically on my laptop with intel chipset, bus 0 has devices that just
don't have any PCIe capabilities.

 Section 1.3.2.3 of the 3.0 spec says integrated endpoints must not
 require I/O resources claimed through BAR(s).  VGA skirts around this by
 not having the legacy resources claimed by BARs, but instead being
 implicit.  Are there other sections restricting legacy I/O?

Right this is odd, I don't know why they put that in. Legacy endpoints
don't have that limitation and I doubt system software actually cares.

On the other hand, I suspect that doesn't apply if you simply doesn't
have the PCIe capability at all :-) IE, that's basically what my laptop
looks like here. The Intel graphics appears on bus 0 and has IO ports
mapped with a BAR and no PCIe cap.

Same with the on-chip SATA.

In fact they have a PCI Advanced features capability, but not PCIe.

Then they have a bunch of root complexes as siblings.

 It's common that a plugin VGA card sits behind a root port where the
 bridge registers tell us about VGA routing, but integrated VGA devices
 are often on bus 0 though, here's an example:
 
 -[:00]-+-00.0  Intel Corporation 2nd Generation Core Processor Family 
 DRAM Controller
+-02.0  Intel Corporation 2nd Generation Core Processor Family 
 Integrated Graphics Controller
 
 Often these systems will disable the integrated graphics when a plugin
 graphics is installed below a root port.  I'm not sure how the system
 knows to route VGA to the integrated device vs the root port otherwise.

It's a good question... I would say the cleanest way is to use the VGA
Enable bit of the root complex. If the RC is set to forward downstream,
then the plug-in card gets the VGA cycles, else, they go to the
integrated one (substractive decoding -style).

However, the PCI-E spec has removed that bit from the bridge control
register definition :-)

So whatever mechanism those chipsets use has to be somewhat proprietary.

On the other hand, I don't see it hurting to make our own proprietary
mechanism consist of using ... the bridge control VGA enable bit. IE.
The bit is not used in the PCIe spec and probably never will be so we
can use it for its original purpose.

 Here's a more interesting example:
 
 -+-[:01]-+-00.0  NVIDIA Corporation GT218 [GeForce G210M]
  |   \-00.1  NVIDIA Corporation High Definition Audio Controller
  \-[:00]-+-00.0  Intel Corporation Mobile 4 Series Chipset Memory 
 Controller Hub
  +-01.0  Intel Corporation Mobile 4 Series Chipset PCI Express 
 Graphics Port
 
 This system seems to have two host bridges with VGA behind each of them.
 There's no bridge to control VGA routing, so I don't know how the
 selection is done.  It's possible the g210m never sees legacy VGA
 accesses in this mode.  This bios has another mode which makes the g210m
 the primary graphics and hides the integrated graphics, essentially the
 same as I mention above with hiding integrated endpoint graphics when
 plugin graphics are used.  Thanks,

Wait, those are two different busses ... and there's no bridge ? Is that
the funky x86 multi domain crackpot where you have multiple roots with
non overlapping bus numbers in the same domain ?

Ben.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Michael S. Tsirkin

On Thu, Jan 31, 2013 at 02:21:50PM -0700, Alex Williamson wrote:
 On Thu, 2013-01-31 at 23:11 +0200, Michael S. Tsirkin wrote:
  On Thu, Jan 31, 2013 at 09:34:03AM -0700, Alex Williamson wrote:
   
   On Thu, 2013-01-31 at 12:49 +0200, Michael S. Tsirkin wrote:
On Wed, Jan 30, 2013 at 04:28:30PM -0700, Alex Williamson wrote:
 On Thu, 2013-01-31 at 10:02 +1100, Benjamin Herrenschmidt wrote:
  On Thu, 2013-01-31 at 00:49 +0200, Michael S. Tsirkin wrote:
In practice they do (VGA at least)

From a SW modelling standpoint, I don't think it's worth
   differentiating
PCI and PCIE.

Cheers,
Ben.
   
   Interesting.
   Do you have such hardware? Could you please dump
   the output of lspci -vv?
  
  Any ATI or nVidia card still supports hard decoding of VGA regions 
  for
  the sake of legacy operating systems and BIOSes :-) I don't know 
  about
  Intel but I suppose it's the same.
 
 For example:
 
 -[:00]-+-00.0  Advanced Micro Devices [AMD] nee ATI RD890 PCI to 
 PCI bridge (external gfx0 p
+-04.0-[02]--+-00.0  Advanced Micro Devices [AMD] nee ATI 
 Cedar PRO [Radeon HD 5450/6350]
 
 00:04.0 PCI bridge: Advanced Micro Devices [AMD] nee ATI RD890 PCI to 
 PCI bridge (PCI express gpp port D) (prog-if 00 [Normal decode])
   Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
 ParErr- Stepping- SERR- FastB2B- DisINTx-
   Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- 
 TAbort- MAbort- SERR- PERR- INTx-
   Latency: 0, Cache Line Size: 64 bytes
   Bus: primary=00, secondary=02, subordinate=02, sec-latency=0
   I/O behind bridge: c000-cfff
   Memory behind bridge: fd10-fd1f
   Prefetchable memory behind bridge: 
 d000-dfff
   Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast TAbort- 
 TAbort- MAbort+ SERR- PERR-
   BridgeCtl: Parity- SERR- NoISA- VGA+ MAbort- Reset- FastB2B-
   
 VGA+ (VGA Enable) indicates positive decode of 0x3b0 - 0x3bb, 0x3c0 -
 0x3df, and 0xa - 0xbfff.  Device 2:00.0 of course doesn't report
 these ISA ranges as they're implicit in the VGA class code.

OK but this appears behind a bridge.  So the bridge configuration tells
the root complex where to send accesses to the VGA.

But qemu currently puts devices directly on root bus.

And as far as I can tell when we present devices directly on bus 0, we
pretend these are integrated in the root complex. The spec seems to
say explicitly that root complex integrated devices should not use 
legacy
addresses or support hotplug. So I would be surprised if such one
appears in real world.

Luckily guests do not seem to be worried as long as we use ACPI.
   
   Yes, in fact I just figured out last night that Windows is unhappy with
   assigned PCI devices on bus 0 that claim to be an endpoint in their PCIe
   capability rather than an integrated endpoint.  We'll need to do extra
   mangling of the PCIe capability to massage it into the guest visible
   topology.
  
  For now, just put you device behind an express bridge. This breaks acpi
  hotplug for now, but I'm looking into hotplug with bridges anyway.
 
 We have the problem in both directions though, Endpoints that should be
 Integrated Endpoints and Integrated Endpoints that should be Endpoints.
 So I think we need to mangle the type.
 
  If you really need it I can give you a hack for hotplug too.
  
  Of course express  does not allow hotplug of root complex parts
  but happens to work because we use ACPI.
 
 That's a little odd.
 
   Section 1.3.2.3 of the 3.0 spec says integrated endpoints must not
   require I/O resources claimed through BAR(s).  VGA skirts around this by
   not having the legacy resources claimed by BARs, but instead being
   implicit.
  
  Aha. I missed this point.
  
Are there other sections restricting legacy I/O?
  
  One other interesting things is that VGA enable bit (for bridge control
  register) does not appear in express spec at all.
 
 Yep, but it appears on hardware.
 
   It's common that a plugin VGA card sits behind a root port where the
   bridge registers tell us about VGA routing,
   but integrated VGA devices
   are often on bus 0 though, here's an example:
   
   -[:00]-+-00.0  Intel Corporation 2nd Generation Core Processor Family 
   DRAM Controller
  +-02.0  Intel Corporation 2nd Generation Core Processor Family 
   Integrated Graphics Controller
   
   Often these systems will disable the integrated graphics when a plugin
   graphics is installed below a root port.  I'm not sure how the system
   knows to route VGA to the integrated device vs the root port otherwise.
  
  I am guessing it disables the

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Michael S. Tsirkin

On Fri, Feb 01, 2013 at 08:22:33AM +1100, Benjamin Herrenschmidt wrote:
 On Thu, 2013-01-31 at 12:49 +0200, Michael S. Tsirkin wrote:
 
  OK but this appears behind a bridge.  So the bridge configuration tells
  the root complex where to send accesses to the VGA.
 
 Sort-of, again the root complex isn't sending anything targeted here.
 PCIe is point to point and any device is behind a bridge, real or
 virtual.

I think we are arguing about terminology here. root complex
has a virtual bridge for each port, presumably it examines bridge control
for each port to know which link to use for a VGA access.
I say presumably because VGA enable bit in bridge control
is not listed in spec (but as Alex says some real
hardware has it implemented).

  But qemu currently puts devices directly on root bus.
 
 Sure, because qemu doesn't specifically model PCIe but something else
 
  And as far as I can tell when we present devices directly on bus 0, we
  pretend these are integrated in the root complex.
 
 Right, it's a bit gross.
 
   The spec seems to
  say explicitly that root complex integrated devices should not use legacy
  addresses or support hotplug. So I would be surprised if such one
  appears in real world.
 
 Sure but that doesn't change the fact that there's no point in treating
 things differently between PCI and PCIe for the sake of address range
 decoding. The high level model remains the same.

Yes, and it's not by chance.

  Luckily guests do not seem to be worried as long as we use ACPI.
 
 Right, it all just looks like PCI to the guest anyway and is mostly
 treated as such for the sake of routing and decoding (until you turn on
 ARI but that's a different can of worms).

Right, ARI only affects config cycles.

   BTW, I've been working on vfio-pci support of VGA assignment which makes
   use of the VGA arbiter in the host to manipulate the VGA Enable control
   register, allowing us to select which device to access.  The qemu side
   is simply registering memory regions for the VGA areas and expecting to
   be used with -vga none, but I'll adopt whatever strategy we choose for
   hard coded address range support.  Current base patches at the links
   below.  Thanks,
   
   Alex
   
   https://github.com/awilliam/qemu-vfio/commit/ea2befa59010a429dcf13c10dbccdf8b64e82fbd
   https://github.com/awilliam/linux-vfio/commit/bae182d929229cbf1eaeb01e5fad4f77f81a4c61
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Michael S. Tsirkin

  Here's a more interesting example:
  
  -+-[:01]-+-00.0  NVIDIA Corporation GT218 [GeForce G210M]
   |   \-00.1  NVIDIA Corporation High Definition Audio Controller
   \-[:00]-+-00.0  Intel Corporation Mobile 4 Series Chipset Memory 
  Controller Hub
   +-01.0  Intel Corporation Mobile 4 Series Chipset PCI Express 
  Graphics Port
  
  This system seems to have two host bridges with VGA behind each of them.
  There's no bridge to control VGA routing, so I don't know how the
  selection is done.  It's possible the g210m never sees legacy VGA
  accesses in this mode.  This bios has another mode which makes the g210m
  the primary graphics and hides the integrated graphics, essentially the
  same as I mention above with hiding integrated endpoint graphics when
  plugin graphics are used.  Thanks,
 
 Wait, those are two different busses ... and there's no bridge ? Is that
 the funky x86 multi domain crackpot where you have multiple roots with
 non overlapping bus numbers in the same domain ?
 
 Ben.

Domain numbering on x86 comes from firmware and you know what Linus
said about firmware developers ...

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Scott Wood


On 01/31/2013 01:20:39 PM, Alexander Graf wrote:


On 31.01.2013, at 20:05, Alexander Graf wrote:


 On 31.01.2013, at 19:54, Scott Wood wrote:

 On 01/31/2013 12:52:41 PM, Alexander Graf wrote:
 On 31.01.2013, at 19:43, Scott Wood wrote:
 On 01/31/2013 12:21:07 PM, Alexander Graf wrote:
 How about something like this? Then both targets at least suck  
as much :).


 I'm not sure that should be the goal...

 Thanks to e500mc's awful hardware design, we don't know who  
sets the MSR_DE bit. Once we forced it onto the guest, we have no  
change to know whether the guest also set it or not. We could only  
guess.


 MSRP[DEP] can prevent the guest from modifying MSR[DE] -- but we  
still need to set it in the first place.


 According to ISA V2.06B, the hypervisor should set DBCR0[EDM] to  
let the guest know that the debug resources are not available, and  
that the value of MSR[DE] is not specified and not modifiable.
 So what would the guest do then to tell the hypervisor that it  
actually wants to know about debug events?


 The guest is out of luck, just as if a JTAG were in use.

 Hrm.

 Can we somehow generalize this out of luck behavior?

 Every time we would set or clear an MSR bit in shadow_msr on  
e500v2, we would instead set or clear it in the real MSR. That way  
only e500mc is out of luck, but the code would still be shared.


I don't follow.  e500v2 is just as out-of-luck.  The mechanism simply  
does not support sharing debug resources.


What do you mean by the real MSR?  The real MSR is shadow_msr, and  
MSR_DE must always be set there if the host is debugging the guest.  As  
for reflecting it into the guest MSR, we could, but I don't really see  
the point.  We're never going to actually send a debug exception to the  
guest when the host owns the debug resources.


Speaking of naming issues, guest_debug is very ambiguous...


diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 38a62ef..9bdb845 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu  
*vcpu)

 #endif
 }

+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+   u32 is_debug = vcpu-arch.shared-msr  MSR_DE;
+
+	/* Force debug to on in guest space when user space wants to  
debug */

+   if (vcpu-guest_debug)
+   is_debug = MSR_DE;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+   /*
+* Since there is no shadow MSR, sync MSR_DE into the guest
+* visible MSR.
+*/
+   vcpu-arch.shared-msr = ~MSR_DE;
+   vcpu-arch.shared-msr |= is_debug;
+#endif
+
+#ifndef CONFIG_KVM_BOOKE_HV
+   vcpu-arch.shadow_msr = ~MSR_DE;
+   vcpu-arch.shadow_msr |= is_debug;
+#endif
+}


The = ~MSR_DE line is pointless on bookehv, and makes it harder to  
read.  I had to stare at it a while before noticing that you initially  
set is_debug from the guest MSR and that you'd never really clear  
MSR_DE here on bookehv.


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] KVM call minutes 2013-01-29 - Port I/O

2013-01-31 Thread Alex Williamson

On Fri, 2013-02-01 at 08:44 +1100, Benjamin Herrenschmidt wrote:
 On Thu, 2013-01-31 at 09:34 -0700, Alex Williamson wrote:
   Luckily guests do not seem to be worried as long as we use ACPI.
  
  Yes, in fact I just figured out last night that Windows is unhappy with
  assigned PCI devices on bus 0 that claim to be an endpoint in their PCIe
  capability rather than an integrated endpoint.  We'll need to do extra
  mangling of the PCIe capability to massage it into the guest visible
  topology.
 
 If you are on bus 0, you need to either not have the capability, or if
 you do, have it be root complex or RC intergrated endpoint. It's fair
 game for any OS to assume that an endpoint will have a parent bridge
 (either a RC or a downstream port) and to muck around with link control
 etc...

Yep, converting Endpoint to Integrated Endpoint is just a matter of
changing the guest visible type and hiding all the link(2) cap, control,
and status.  Integrated Endpoint to Endpoint appears to require
inventing some link capabilities since it's a required field.  Legacy
Endpoint to Integrated Endpoint seems incompatible, but I don't think we
model anything at a level that would care.

We could also take the opportunity to remove the PCIe capability when
exposing devices on 440fx, but I'm nervous that would break drivers that
are dumb and look for it anyway.

 Typically on my laptop with intel chipset, bus 0 has devices that just
 don't have any PCIe capabilities.

Oddly the audio device seems to be the only one that consistently has
it.

  Section 1.3.2.3 of the 3.0 spec says integrated endpoints must not
  require I/O resources claimed through BAR(s).  VGA skirts around this by
  not having the legacy resources claimed by BARs, but instead being
  implicit.  Are there other sections restricting legacy I/O?
 
 Right this is odd, I don't know why they put that in. Legacy endpoints
 don't have that limitation and I doubt system software actually cares.
 
 On the other hand, I suspect that doesn't apply if you simply doesn't
 have the PCIe capability at all :-) IE, that's basically what my laptop
 looks like here. The Intel graphics appears on bus 0 and has IO ports
 mapped with a BAR and no PCIe cap.
 
 Same with the on-chip SATA.
 
 In fact they have a PCI Advanced features capability, but not PCIe.
 
 Then they have a bunch of root complexes as siblings.
 
  It's common that a plugin VGA card sits behind a root port where the
  bridge registers tell us about VGA routing, but integrated VGA devices
  are often on bus 0 though, here's an example:
  
  -[:00]-+-00.0  Intel Corporation 2nd Generation Core Processor Family 
  DRAM Controller
 +-02.0  Intel Corporation 2nd Generation Core Processor Family 
  Integrated Graphics Controller
  
  Often these systems will disable the integrated graphics when a plugin
  graphics is installed below a root port.  I'm not sure how the system
  knows to route VGA to the integrated device vs the root port otherwise.
 
 It's a good question... I would say the cleanest way is to use the VGA
 Enable bit of the root complex. If the RC is set to forward downstream,
 then the plug-in card gets the VGA cycles, else, they go to the
 integrated one (substractive decoding -style).
 
 However, the PCI-E spec has removed that bit from the bridge control
 register definition :-)
 
 So whatever mechanism those chipsets use has to be somewhat proprietary.
 
 On the other hand, I don't see it hurting to make our own proprietary
 mechanism consist of using ... the bridge control VGA enable bit. IE.
 The bit is not used in the PCIe spec and probably never will be so we
 can use it for its original purpose.

Yes, our emulated root ports should include this, otherwise we have
little hope of properly supporting multiple assigned (or emulated)
graphics devices, each behind their own root port.  So we need the
ability for multiple devices to register VGA address (1 per bus?) and
change MemoryRegion routing just like hardware does.

  Here's a more interesting example:
  
  -+-[:01]-+-00.0  NVIDIA Corporation GT218 [GeForce G210M]
   |   \-00.1  NVIDIA Corporation High Definition Audio Controller
   \-[:00]-+-00.0  Intel Corporation Mobile 4 Series Chipset Memory 
  Controller Hub
   +-01.0  Intel Corporation Mobile 4 Series Chipset PCI Express 
  Graphics Port
  
  This system seems to have two host bridges with VGA behind each of them.
  There's no bridge to control VGA routing, so I don't know how the
  selection is done.  It's possible the g210m never sees legacy VGA
  accesses in this mode.  This bios has another mode which makes the g210m
  the primary graphics and hides the integrated graphics, essentially the
  same as I mention above with hiding integrated endpoint graphics when
  plugin graphics are used.  Thanks,
 
 Wait, those are two different busses ... and there's no bridge ? Is that
 the funky x86 multi domain crackpot where you have multiple roots

Re: [PATCH 8/8] KVM:PPC:booke: Allow debug interrupt injection to guest

2013-01-31 Thread Alexander Graf


On 31.01.2013, at 23:40, Scott Wood wrote:

 On 01/31/2013 01:20:39 PM, Alexander Graf wrote:
 On 31.01.2013, at 20:05, Alexander Graf wrote:
 
  On 31.01.2013, at 19:54, Scott Wood wrote:
 
  On 01/31/2013 12:52:41 PM, Alexander Graf wrote:
  On 31.01.2013, at 19:43, Scott Wood wrote:
  On 01/31/2013 12:21:07 PM, Alexander Graf wrote:
  How about something like this? Then both targets at least suck as much 
  :).
 
  I'm not sure that should be the goal...
 
  Thanks to e500mc's awful hardware design, we don't know who sets the 
  MSR_DE bit. Once we forced it onto the guest, we have no change to 
  know whether the guest also set it or not. We could only guess.
 
  MSRP[DEP] can prevent the guest from modifying MSR[DE] -- but we still 
  need to set it in the first place.
 
  According to ISA V2.06B, the hypervisor should set DBCR0[EDM] to let 
  the guest know that the debug resources are not available, and that 
  the value of MSR[DE] is not specified and not modifiable.
  So what would the guest do then to tell the hypervisor that it actually 
  wants to know about debug events?
 
  The guest is out of luck, just as if a JTAG were in use.
 
  Hrm.
 
  Can we somehow generalize this out of luck behavior?
 
  Every time we would set or clear an MSR bit in shadow_msr on e500v2, we 
  would instead set or clear it in the real MSR. That way only e500mc is out 
  of luck, but the code would still be shared.
 
 I don't follow.  e500v2 is just as out-of-luck.  The mechanism simply does 
 not support sharing debug resources.

For e500v2 we have 2 fields

  * MSR as the guest sees it
  * MSR as we execute when the guest runs

Since we know the MSR when the guest sees it, we can decide what to do when we 
get an unhandled debug interrupt. We can simulate what hardware would do 
depending on the guest's MSR_DE setting.

For e500mc we only have

  * MSR as the guest sees it and as we execute when the guest runs

Because there is only one field, as soon as we OR MSR_DE into there, we can no 
longer distinguish whether the guest wanted to have MSR_DE enabled or not.

 What do you mean by the real MSR?  The real MSR is shadow_msr, and MSR_DE 
 must always be set there if the host is debugging the guest.  As for 
 reflecting it into the guest MSR, we could, but I don't really see the point. 
  We're never going to actually send a debug exception to the guest when the 
 host owns the debug resources.

Why not? That's the whole point of jumping through user space.

  1) guest exits with debug interrupt
  2) QEMU gets a debug exit
  3) QEMU checks in its list whether it belongs to its own debug points
  4) if not, it reinjects the interrupt into the guest

Step 4 is pretty difficult to do when we don't know whether the guest is 
actually capable of handling debug interrupts at that moment.

 Speaking of naming issues, guest_debug is very ambiguous...

I agree.

 
 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
 index 38a62ef..9bdb845 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
 +{
 +u32 is_debug = vcpu-arch.shared-msr  MSR_DE;
 +
 +/* Force debug to on in guest space when user space wants to debug */
 +if (vcpu-guest_debug)
 +is_debug = MSR_DE;
 +
 +#ifdef CONFIG_KVM_BOOKE_HV
 +/*
 + * Since there is no shadow MSR, sync MSR_DE into the guest
 + * visible MSR.
 + */
 +vcpu-arch.shared-msr = ~MSR_DE;
 +vcpu-arch.shared-msr |= is_debug;
 +#endif
 +
 +#ifndef CONFIG_KVM_BOOKE_HV
 +vcpu-arch.shadow_msr = ~MSR_DE;
 +vcpu-arch.shadow_msr |= is_debug;
 +#endif
 +}
 
 The = ~MSR_DE line is pointless on bookehv, and makes it harder to read.  
 I had to stare at it a while before noticing that you initially set is_debug 
 from the guest MSR and that you'd never really clear MSR_DE here on bookehv.

Well, I'm mostly bouncing ideas here to find a way to express what we're trying 
to say in a way that someone who hasn't read this email thread would still 
understand what's going on :).

How about this version?


diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 38a62ef..9929c41 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,28 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+#ifndef CONFIG_KVM_BOOKE_HV
+   /* Synchronize guest's desire to get debug interrupts into shadow MSR */
+   vcpu-arch.shadow_msr = ~MSR_DE;
+   vcpu-arch.shadow_msr |= vcpu-arch.shared-msr  MSR_DE;
+#endif
+
+   /* Force enable debug interrupts when user space wants to debug */
+   if (vcpu-guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+   /*
+* Since there is no shadow MSR, sync MSR_DE into the guest
+* visible

KVM Test report, kernel 3f0c3d0b... qemu 4d9367b7...

2013-01-31 Thread Ren, Yongjie

Hi All,

This is KVM upstream test result against kvm.git next branch and qemu-kvm.git 
master branch.
 kvm.git next branch: 3f0c3d0bb2bcc4b88b22452a7cf0073ee9a0f1e6 based on 
kernel 3.7.0
 qemu-kvm.git master branch: 4d9367b76f71c6d938cf8201392abe4bfb1136cb

We found no new bug and no bug fixed in the past two weeks. 

New issue (0):

Fixed issue (0):
 
Old issues (6):
--
1. Nested-virt: L1 (kvm on kvm)guest panic with parameter -cpu host in qemu 
command line.
  https://bugs.launchpad.net/qemu/+bug/994378
2. Can't install or boot up 32bit win8 guest.
  https://bugs.launchpad.net/qemu/+bug/1007269
3. vCPU hot-add makes the guest abort. 
  https://bugs.launchpad.net/qemu/+bug/1019179
4. Nested Virt: VMX can't be initialized in L1 Xen (Xen on KVM)
  https://bugzilla.kernel.org/show_bug.cgi?id=45931
5. Guest has no xsave feature with parameter -cpu qemu64,+xsave in qemu 
command line.
  https://bugs.launchpad.net/qemu/+bug/1042561
6. Guest hang when doing kernel build and writing data in guest.
  https://bugs.launchpad.net/qemu/+bug/1096814


Test environment:
==
  Platform   Westmere-EP    Sandybridge-EP
  CPU Cores   24    32
  Memory size 24G   32G


Regards
Yongjie Ren  (Jay)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 0/8] KVM: BOOKE/BOOKEHV : Added debug stub support

2013-01-31 Thread Bhushan Bharat-R65777



 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Alexander Graf
 Sent: Friday, January 25, 2013 6:08 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 0/8] KVM: BOOKE/BOOKEHV : Added debug stub support
 
 
 On 16.01.2013, at 09:20, Bharat Bhushan wrote:
 
  This patchset adds the QEMU debug stub support for powerpc (booke/bookehv).
  [1/8] KVM: PPC: booke: use vcpu reference from thread_struct
  - This is a cleanup patch to use vcpu reference from thread struct
  [2/8] KVM: PPC: booke: Allow multiple exception types [3/8] KVM: PPC:
  booke: Added debug handler
  - These two patches install the KVM debug handler.
  [4/8] Added ONE_REG interface for debug instruction
  - Add the ioctl interface to get the debug instruction for
setting software breakpoint from QEMU debug stub.
  [5/8] KVM: PPC: debug stub interface parameter defined [6/8] booke:
  Added DBCR4 SPR number [7/8] KVM: booke/bookehv: Add debug stub
  support
  - Add the debug stub interface on booke/bookehv [8/8] KVM:PPC:booke:
  Allow debug interrupt injection to guest
  -- with this qemu can inject debug interrupt to guest
 
 Thanks, applied 1/8, 2/8, 6/8.


Alex I cannot see these 3 patches on kvm-ppc-next branch. Are those applied on 
some other branch ?

Thanks
-Bharat

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcm_vhost: Multi-target support

2013-01-31 Thread Asias He

On 01/31/2013 07:13 PM, Michael S. Tsirkin wrote:
 On Thu, Jan 31, 2013 at 05:28:21PM +0800, Asias He wrote:
 Hello Nicholas,

 On 01/31/2013 03:33 PM, Asias He wrote:
 In order to take advantages of Paolo's multi-queue virito-scsi, we need
 multi-target support in tcm_vhost first. Otherwise all the requests go
 to one queue and other queues are idle.

 This patch makes:

 1. All the targets under the wwpn is seen and can be used by guest.
 2. No need to pass the tpgt number in struct vhost_scsi_target to
tcm_vhost.ko. Only wwpn is needed.
 3. We can always pass max_target = 255 to guest now, since we abort the
request who's target id does not exist.

 Signed-off-by: Asias He as...@redhat.com
 ---
  drivers/vhost/tcm_vhost.c | 115 
 --
  drivers/vhost/tcm_vhost.h |   4 +-
  2 files changed, 74 insertions(+), 45 deletions(-)

 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 218deb6..d50cb95 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -59,13 +59,18 @@ enum {
 VHOST_SCSI_VQ_IO = 2,
  };
  
 +#define VHOST_SCSI_MAX_TARGET 256
 +
  struct vhost_scsi {
 -   struct tcm_vhost_tpg *vs_tpg;   /* Protected by vhost_scsi-dev.mutex */
 +   /* Protected by vhost_scsi-dev.mutex */
 +   struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
 struct vhost_dev dev;
 struct vhost_virtqueue vqs[3];
  
 struct vhost_work vs_completion_work; /* cmd completion work item */
 struct llist_head vs_completion_list; /* cmd completion queue */
 +   char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
 +   int vs_num_target;
  };
  
  /* Local pointer to allocated TCM configfs fabric module */
 @@ -564,13 +569,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
 u32 exp_data_len, data_first, data_num, data_direction;
 unsigned out, in, i;
 int head, ret;
 -
 -   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
 -   tv_tpg = vs-vs_tpg;
 -   if (unlikely(!tv_tpg)) {
 -   pr_err(%s endpoint not set\n, __func__);
 -   return;
 -   }
 +   u8 target;
  
 mutex_lock(vq-mutex);
 vhost_disable_notify(vs-dev, vq);
 @@ -637,6 +636,35 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
 break;
 }
  
 +   /* Extract the tpgt */
 +   target = v_req.lun[1];
 +
 +   /* Target does not exit, fail the request */
 +   if (unlikely(target = vs-vs_num_target)) {
 +   struct virtio_scsi_cmd_resp __user *resp;
 +   struct virtio_scsi_cmd_resp rsp;
 +
 +   memset(rsp, 0, sizeof(rsp));
 +   rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
 +   resp = vq-iov[out].iov_base;
 +   ret = copy_to_user(resp, rsp, sizeof(rsp));
 +   if (!ret)
 +   vhost_add_used_and_signal(vs-dev,
 +   vs-vqs[2], head, 0);
 +   else
 +   pr_err(Faulted on virtio_scsi_cmd_resp\n);
 +
 +   continue;
 +   }
 +
 +   tv_tpg = vs-vs_tpg[target];
 +   if (unlikely(!tv_tpg)) {
 +   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
 +   pr_err(endpoint not set, target = %d\n, target);
 +   vhost_discard_vq_desc(vq, 1);
 +   break;
 +   }
 +
 exp_data_len = 0;
 for (i = 0; i  data_num; i++)
 exp_data_len += vq-iov[data_first + i].iov_len;
 @@ -771,14 +799,11 @@ static int vhost_scsi_set_endpoint(
 }
 tv_tport = tv_tpg-tport;
  
 -   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn) 
 -   (tv_tpg-tport_tpgt == t-vhost_tpgt)) {
 +   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
 tv_tpg-tv_tpg_vhost_count++;
 -   mutex_unlock(tv_tpg-tv_tpg_mutex);
 -   mutex_unlock(tcm_vhost_mutex);
  
 mutex_lock(vs-dev.mutex);
 -   if (vs-vs_tpg) {
 +   if (vs-vs_tpg[tv_tpg-tport_tpgt - 1]) {
 mutex_unlock(vs-dev.mutex);
 mutex_lock(tv_tpg-tv_tpg_mutex);
 tv_tpg-tv_tpg_vhost_count--;
 @@ -786,15 +811,17 @@ static int vhost_scsi_set_endpoint(
 return -EEXIST;
 }
  
 -   vs-vs_tpg = tv_tpg;
 +   vs-vs_tpg[tv_tpg-tport_tpgt - 1] = tv_tpg;


 tv_tpg-tport_tpgt starts from 0, right? I thought it starts from 1,
 because I always got it starts from 1 in targetcli.

 o- vhost
o- naa.6001405bd4e8476d
   o- tpg1
  o- luns
 o- lun0
   o- tpg2
  o- luns
 o- lun0
   o- tpg3
  o- luns
 o- lun0
   o- tpg4
  o- luns
 o- lun0

 If it is

Re: [PATCH] tcm_vhost: Multi-target support

2013-01-31 Thread Asias He

On 02/01/2013 04:59 AM, Nicholas A. Bellinger wrote:
 On Thu, 2013-01-31 at 17:28 +0800, Asias He wrote:
 Hello Nicholas,

 On 01/31/2013 03:33 PM, Asias He wrote:
 In order to take advantages of Paolo's multi-queue virito-scsi, we need
 multi-target support in tcm_vhost first. Otherwise all the requests go
 to one queue and other queues are idle.

 This patch makes:

 1. All the targets under the wwpn is seen and can be used by guest.
 2. No need to pass the tpgt number in struct vhost_scsi_target to
tcm_vhost.ko. Only wwpn is needed.
 3. We can always pass max_target = 255 to guest now, since we abort the
request who's target id does not exist.

 Signed-off-by: Asias He as...@redhat.com
 ---
  drivers/vhost/tcm_vhost.c | 115 
 --
  drivers/vhost/tcm_vhost.h |   4 +-
  2 files changed, 74 insertions(+), 45 deletions(-)

 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 218deb6..d50cb95 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -59,13 +59,18 @@ enum {
 VHOST_SCSI_VQ_IO = 2,
  };
  
 +#define VHOST_SCSI_MAX_TARGET 256
 +
  struct vhost_scsi {
 -   struct tcm_vhost_tpg *vs_tpg;   /* Protected by vhost_scsi-dev.mutex */
 +   /* Protected by vhost_scsi-dev.mutex */
 +   struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
 struct vhost_dev dev;
 struct vhost_virtqueue vqs[3];
  
 struct vhost_work vs_completion_work; /* cmd completion work item */
 struct llist_head vs_completion_list; /* cmd completion queue */
 +   char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
 +   int vs_num_target;
  };
  
  /* Local pointer to allocated TCM configfs fabric module */
 @@ -564,13 +569,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
 u32 exp_data_len, data_first, data_num, data_direction;
 unsigned out, in, i;
 int head, ret;
 -
 -   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
 -   tv_tpg = vs-vs_tpg;
 -   if (unlikely(!tv_tpg)) {
 -   pr_err(%s endpoint not set\n, __func__);
 -   return;
 -   }
 +   u8 target;
  
 mutex_lock(vq-mutex);
 vhost_disable_notify(vs-dev, vq);
 @@ -637,6 +636,35 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
 break;
 }
  
 +   /* Extract the tpgt */
 +   target = v_req.lun[1];
 +
 +   /* Target does not exit, fail the request */
 +   if (unlikely(target = vs-vs_num_target)) {
 +   struct virtio_scsi_cmd_resp __user *resp;
 +   struct virtio_scsi_cmd_resp rsp;
 +
 +   memset(rsp, 0, sizeof(rsp));
 +   rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
 +   resp = vq-iov[out].iov_base;
 +   ret = copy_to_user(resp, rsp, sizeof(rsp));
 +   if (!ret)
 +   vhost_add_used_and_signal(vs-dev,
 +   vs-vqs[2], head, 0);
 +   else
 +   pr_err(Faulted on virtio_scsi_cmd_resp\n);
 +
 +   continue;
 +   }
 +
 +   tv_tpg = vs-vs_tpg[target];
 +   if (unlikely(!tv_tpg)) {
 +   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
 +   pr_err(endpoint not set, target = %d\n, target);
 +   vhost_discard_vq_desc(vq, 1);
 +   break;
 +   }
 +
 exp_data_len = 0;
 for (i = 0; i  data_num; i++)
 exp_data_len += vq-iov[data_first + i].iov_len;
 @@ -771,14 +799,11 @@ static int vhost_scsi_set_endpoint(
 }
 tv_tport = tv_tpg-tport;
  
 -   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn) 
 -   (tv_tpg-tport_tpgt == t-vhost_tpgt)) {
 +   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
 tv_tpg-tv_tpg_vhost_count++;
 -   mutex_unlock(tv_tpg-tv_tpg_mutex);
 -   mutex_unlock(tcm_vhost_mutex);
  
 mutex_lock(vs-dev.mutex);
 -   if (vs-vs_tpg) {
 +   if (vs-vs_tpg[tv_tpg-tport_tpgt - 1]) {
 mutex_unlock(vs-dev.mutex);
 mutex_lock(tv_tpg-tv_tpg_mutex);
 tv_tpg-tv_tpg_vhost_count--;
 @@ -786,15 +811,17 @@ static int vhost_scsi_set_endpoint(
 return -EEXIST;
 }
  
 -   vs-vs_tpg = tv_tpg;
 +   vs-vs_tpg[tv_tpg-tport_tpgt - 1] = tv_tpg;


 tv_tpg-tport_tpgt starts from 0, right? I thought it starts from 1,
 because I always got it starts from 1 in targetcli.

 o- vhost
o- naa.6001405bd4e8476d
   o- tpg1
  o- luns
 o- lun0
   o- tpg2
  o- luns
 o- lun0
   o- tpg3
  o- luns
 o- lun0
   o- tpg4
  o- luns
 o- lun0

 
 So at least

RE: [PATCH 3/8] KVM: PPC: booke: Added debug handler

2013-01-31 Thread Bhushan Bharat-R65777



 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Alexander Graf
 Sent: Thursday, January 31, 2013 10:38 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
 On 31.01.2013, at 17:58, Bhushan Bharat-R65777 wrote:
 
 
 
  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Thursday, January 31, 2013 5:47 PM
  To: Bhushan Bharat-R65777
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
  Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
  On 30.01.2013, at 12:30, Bhushan Bharat-R65777 wrote:
 
 
 
  -Original Message-
  From: Alexander Graf [mailto:ag...@suse.de]
  Sent: Friday, January 25, 2013 5:13 PM
  To: Bhushan Bharat-R65777
  Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan
  Bharat-R65777
  Subject: Re: [PATCH 3/8] KVM: PPC: booke: Added debug handler
 
 
  On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
  From: Bharat Bhushan bharat.bhus...@freescale.com
 
  Installed debug handler will be used for guest debug support and
  debug facility emulation features (patches for these features will
  follow this patch).
 
  Signed-off-by: Liu Yu yu@freescale.com
  [bharat.bhus...@freescale.com: Substantial changes]
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  arch/powerpc/include/asm/kvm_host.h |1 +
  arch/powerpc/kernel/asm-offsets.c   |1 +
  arch/powerpc/kvm/booke_interrupts.S |   49 
  ++-
 --
  --
  3 files changed, 44 insertions(+), 7 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/kvm_host.h
  b/arch/powerpc/include/asm/kvm_host.h
  index 8a72d59..f4ba881 100644
  --- a/arch/powerpc/include/asm/kvm_host.h
  +++ b/arch/powerpc/include/asm/kvm_host.h
  @@ -503,6 +503,7 @@ struct kvm_vcpu_arch {
  u32 tlbcfg[4];
  u32 mmucfg;
  u32 epr;
  +   u32 crit_save;
  struct kvmppc_booke_debug_reg dbg_reg; #endif
  gpa_t paddr_accessed;
  diff --git a/arch/powerpc/kernel/asm-offsets.c
  b/arch/powerpc/kernel/asm-offsets.c
  index 46f6afd..02048f3 100644
  --- a/arch/powerpc/kernel/asm-offsets.c
  +++ b/arch/powerpc/kernel/asm-offsets.c
  @@ -562,6 +562,7 @@ int main(void)
  DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, 
  arch.last_inst));
  DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, 
  arch.fault_dear));
  DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu,
  arch.fault_esr));
  +   DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu,
  +arch.crit_save));
  #endif /* CONFIG_PPC_BOOK3S */
  #endif /* CONFIG_KVM */
 
  diff --git a/arch/powerpc/kvm/booke_interrupts.S
  b/arch/powerpc/kvm/booke_interrupts.S
  index eae8483..dd9c5d4 100644
  --- a/arch/powerpc/kvm/booke_interrupts.S
  +++ b/arch/powerpc/kvm/booke_interrupts.S
  @@ -52,12 +52,7 @@
   (1BOOKE_INTERRUPT_PROGRAM) | \
   (1BOOKE_INTERRUPT_DTLB_MISS))
 
  -.macro KVM_HANDLER ivor_nr scratch srr0
  -_GLOBAL(kvmppc_handler_\ivor_nr)
  -   /* Get pointer to vcpu and record exit number. */
  -   mtspr   \scratch , r4
  -   mfspr   r4, SPRN_SPRG_THREAD
  -   lwz r4, THREAD_KVM_VCPU(r4)
  +.macro __KVM_HANDLER ivor_nr scratch srr0
  stw r3, VCPU_GPR(R3)(r4)
  stw r5, VCPU_GPR(R5)(r4)
  stw r6, VCPU_GPR(R6)(r4)
  @@ -74,6 +69,46 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
  bctr
  .endm
 
  +.macro KVM_HANDLER ivor_nr scratch srr0
  +_GLOBAL(kvmppc_handler_\ivor_nr)
  +   /* Get pointer to vcpu and record exit number. */
  +   mtspr   \scratch , r4
  +   mfspr   r4, SPRN_SPRG_THREAD
  +   lwz r4, THREAD_KVM_VCPU(r4)
  +   __KVM_HANDLER \ivor_nr \scratch \srr0 .endm
  +
  +.macro KVM_DBG_HANDLER ivor_nr scratch srr0
  +_GLOBAL(kvmppc_handler_\ivor_nr)
  +   mtspr   \scratch, r4
  +   mfspr   r4, SPRN_SPRG_THREAD
  +   lwz r4, THREAD_KVM_VCPU(r4)
  +   stw r3, VCPU_CRIT_SAVE(r4)
  +   mfcrr3
  +   mfspr   r4, SPRN_CSRR1
  +   andi.   r4, r4, MSR_PR
  +   bne 1f
 
 
  +   /* debug interrupt happened in enter/exit path */
  +   mfspr   r4, SPRN_CSRR1
  +   rlwinm  r4, r4, 0, ~MSR_DE
  +   mtspr   SPRN_CSRR1, r4
  +   lis r4, 0x
  +   ori r4, r4, 0x
  +   mtspr   SPRN_DBSR, r4
  +   mfspr   r4, SPRN_SPRG_THREAD
  +   lwz r4, THREAD_KVM_VCPU(r4)
  +   mtcrr3
  +   lwz r3, VCPU_CRIT_SAVE(r4)
  +   mfspr   r4, \scratch
  +   rfci
 
  What is this part doing? Try to ignore the debug exit?
 
  As BOOKE doesn't have hardware support for virtualization, hardware
  never know
  current pc is in guest or in host.
  So when enable hardware single step for guest, it cannot be disabled
  at the
  time guest exit. Thus, we'll see that an single step interrupt
  happens

RE: [PATCH 7/8] KVM: PPC: booke/bookehv: Add debug stub support

2013-01-31 Thread Bhushan Bharat-R65777



 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, January 25, 2013 5:37 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 7/8] KVM: PPC: booke/bookehv: Add debug stub support
 
 
 On 16.01.2013, at 09:24, Bharat Bhushan wrote:
 
  This patch adds the debug stub support on booke/bookehv.
  Now QEMU debug stub can use hw breakpoint, watchpoint and software
  breakpoint to debug guest.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  arch/powerpc/include/asm/kvm_host.h   |5 +
  arch/powerpc/include/asm/kvm_ppc.h|2 +
  arch/powerpc/include/uapi/asm/kvm.h   |   22 -
  arch/powerpc/kernel/asm-offsets.c |   26 ++
  arch/powerpc/kvm/booke.c  |  124 +
  arch/powerpc/kvm/booke_interrupts.S   |  114 ++
  arch/powerpc/kvm/bookehv_interrupts.S |  145 
  -
  arch/powerpc/kvm/e500_emulate.c   |6 ++
  arch/powerpc/kvm/e500mc.c |3 +-
  9 files changed, 422 insertions(+), 25 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/kvm_host.h
  b/arch/powerpc/include/asm/kvm_host.h
  index f4ba881..a9feeb0 100644
  --- a/arch/powerpc/include/asm/kvm_host.h
  +++ b/arch/powerpc/include/asm/kvm_host.h
  @@ -504,7 +504,12 @@ struct kvm_vcpu_arch {
  u32 mmucfg;
  u32 epr;
  u32 crit_save;
  +   /* guest debug registers*/
  struct kvmppc_booke_debug_reg dbg_reg;
  +   /* shadow debug registers */
  +   struct kvmppc_booke_debug_reg shadow_dbg_reg;
  +   /* host debug registers*/
  +   struct kvmppc_booke_debug_reg host_dbg_reg;
  #endif
  gpa_t paddr_accessed;
  gva_t vaddr_accessed;
  diff --git a/arch/powerpc/include/asm/kvm_ppc.h
  b/arch/powerpc/include/asm/kvm_ppc.h
  index b3c481e..e4b3398 100644
  --- a/arch/powerpc/include/asm/kvm_ppc.h
  +++ b/arch/powerpc/include/asm/kvm_ppc.h
  @@ -45,6 +45,8 @@ enum emulation_result {
  EMULATE_FAIL, /* can't emulate this instruction */
  EMULATE_AGAIN,/* something went wrong. go again */
  EMULATE_DO_PAPR,  /* kvm_run filled with PAPR request */
  +   EMULATE_DEBUG_INST,   /* debug instruction for software
  +breakpoint, exit to userspace */
 
 Does this do something different from DO_PAPR? Maybe it makes sense to have an
 exit code EMULATE_EXIT_USER?

I think EMULATE_DO_PAPR does something similar but the name is confusing. May 
be we can rename EMULATE_DO_PAPR to 
EMULATE_EXIT_USER.

Thanks
-Bharat
 
  };
 
  extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu
  *vcpu); diff --git a/arch/powerpc/include/uapi/asm/kvm.h
  b/arch/powerpc/include/uapi/asm/kvm.h
  index e8842ed..a81ab29 100644
  --- a/arch/powerpc/include/uapi/asm/kvm.h
  +++ b/arch/powerpc/include/uapi/asm/kvm.h
  @@ -25,6 +25,7 @@
  /* Select powerpc specific features in linux/kvm.h */ #define
  __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
  +#define __KVM_HAVE_GUEST_DEBUG
 
  struct kvm_regs {
  __u64 pc;
  @@ -267,7 +268,24 @@ struct kvm_fpu {
  __u64 fpr[32];
  };
 
  +/*
  + * Defines for h/w breakpoint, watchpoint (read, write or both) and
  + * software breakpoint.
  + * These are used as type in KVM_SET_GUEST_DEBUG ioctl and status
  + * for KVM_DEBUG_EXIT.
  + */
  +#define KVMPPC_DEBUG_NONE  0x0
  +#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
  +#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
  +#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
  struct kvm_debug_exit_arch {
  +   __u64 address;
  +   /*
  +* exiting to userspace because of h/w breakpoint, watchpoint
  +* (read, write or both) and software breakpoint.
  +*/
  +   __u32 status;
  +   __u32 reserved;
  };
 
  /* for KVM_SET_GUEST_DEBUG */
  @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
   * Type denotes h/w breakpoint, read watchpoint, write
   * watchpoint or watchpoint (both read and write).
   */
  -#define KVMPPC_DEBUG_NOTYPE0x0
  -#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
  -#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
  -#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
  __u32 type;
  __u32 reserved;
  } bp[16];
  diff --git a/arch/powerpc/kernel/asm-offsets.c
  b/arch/powerpc/kernel/asm-offsets.c
  index 02048f3..22deda7 100644
  --- a/arch/powerpc/kernel/asm-offsets.c
  +++ b/arch/powerpc/kernel/asm-offsets.c
  @@ -563,6 +563,32 @@ int main(void)
  DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
  DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
  DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
  +   DEFINE(VCPU_DBSR, offsetof(struct kvm_vcpu, arch.dbsr));
  +   DEFINE(VCPU_SHADOW_DBG, offsetof(struct kvm_vcpu, arch.shadow_dbg_reg));
  +   DEFINE(VCPU_HOST_DBG,

Re: [PATCH] tcm_vhost: Multi-target support

2013-01-31 Thread Nicholas A. Bellinger

On Fri, 2013-02-01 at 12:03 +0800, Asias He wrote:
 On 02/01/2013 04:59 AM, Nicholas A. Bellinger wrote:
  On Thu, 2013-01-31 at 17:28 +0800, Asias He wrote:
  Hello Nicholas,
 
  On 01/31/2013 03:33 PM, Asias He wrote:
  In order to take advantages of Paolo's multi-queue virito-scsi, we need
  multi-target support in tcm_vhost first. Otherwise all the requests go
  to one queue and other queues are idle.
 

SNIP

  @@ -771,14 +799,11 @@ static int vhost_scsi_set_endpoint(
}
tv_tport = tv_tpg-tport;
   
  - if (!strcmp(tv_tport-tport_name, t-vhost_wwpn) 
  - (tv_tpg-tport_tpgt == t-vhost_tpgt)) {
  + if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
tv_tpg-tv_tpg_vhost_count++;
  - mutex_unlock(tv_tpg-tv_tpg_mutex);
  - mutex_unlock(tcm_vhost_mutex);
   
mutex_lock(vs-dev.mutex);
  - if (vs-vs_tpg) {
  + if (vs-vs_tpg[tv_tpg-tport_tpgt - 1]) {
mutex_unlock(vs-dev.mutex);
mutex_lock(tv_tpg-tv_tpg_mutex);
tv_tpg-tv_tpg_vhost_count--;
  @@ -786,15 +811,17 @@ static int vhost_scsi_set_endpoint(
return -EEXIST;
}
   
  - vs-vs_tpg = tv_tpg;
  + vs-vs_tpg[tv_tpg-tport_tpgt - 1] = tv_tpg;
 
 
  tv_tpg-tport_tpgt starts from 0, right? I thought it starts from 1,
  because I always got it starts from 1 in targetcli.
 
  o- vhost
 o- naa.6001405bd4e8476d
o- tpg1
   o- luns
  o- lun0
o- tpg2
   o- luns
  o- lun0
o- tpg3
   o- luns
  o- lun0
o- tpg4
   o- luns
  o- lun0
 
  
  So at least with iscsi-target, we start from tpgt=1 to avoid some legacy
  initiators that have issues handling tgpt=0.
  
  Given that rtslib/targetcli currently expect this with the tpgs
  feature is enabled, starting from tpgt=1 with tcm_vhost probably makes
  the most sense.
 
 Okay. But tgpt can be 0, right?
 

Most certainly, in the end it's totally up to the fabric.  ;)
 
 I saw this setup:
 
   cd /sys/kernel/config/target
   mkdir -p core/fileio_0/fileio
   echo 'fd_dev_name=/home/pbonzini/test.img,fd_dev_size=5905580032' 
 core/fileio_0/fileio/control
   echo 1  core/fileio_0/fileio/enable
   mkdir -p vhost/naa.600140554cf3a18e/tpgt_0/lun/lun_0
   cd vhost/naa.600140554cf3a18e/tpgt_0
   ln -sf ../../../../../core/fileio_0/fileio/ lun/lun_0/virtual_scsi_port
   echo naa.60014053226f0388  nexus
 
 And this:
 
** Setup wwpn and tpgt
$ wwpn=naa.0
$ tpgt=/sys/kernel/config/target/vhost/$wwpn/tpgt_0
$ nexus=$tpgt/nexus
$ mkdir -p $tpgt
$ echo -n $wwpn  $nexus
 
 

OK, I think you'll want to avoid the extra vs-vs_tpg[tpgt - 1] offset
above to properly support this.

--nab

 
  If it is true. I will cook v2 of this patch.
 
  Also, the tv_tpg-tport_tpgt can be none-continuous. e.g.
 
  o- vhost
 o- naa.6001405bd4e8476d
o- tpg1
   o- luns
  o- lun0
o- tpg2
   o- luns
  o- lun0
o- tpg4
   o- luns
  o- lun0
 
  I will handle this in v2.
 
  
  Correct, tpgt values may be optionally non-contiguous up to unsigned
  short.
 
 ok.
 
  --nab
  
  
 
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 00/22] Multiqueue virtio-net

2013-01-31 Thread Jason Wang

Hello all:

This seires is an update of last version of multiqueue virtio-net support.

This series tries to brings multiqueue support to virtio-net through a
multiqueue support tap backend and multiple vhost threads.

Patch 1 converts bitfield in TAPState to bool. Patch 2 replace assert(0) with
abort() in tap.

To support this, multiqueue nic support were added to qemu. This is done by
introducing an array of NetClientStates in NICState, and make each pair of peers
to be an queue of the nic. This is done in patch 3-9.

Tap were also converted to be able to create a multiple queue
backend. Currently, only linux support this by issuing TUNSETIFF N times with
the same device name to create N queues. Each fd returned by TUNSETIFF were a
queue supported by kernel. Three new command lines were introduced, queues
were used to tell how many queues will be created by qemu; fds were used to
pass multiple pre-created tap file descriptors to qemu; vhostfds were used to
pass multiple pre-created vhost descriptors to qemu. This is done in patch 
10-15.

A method of deleting a queue and queue_index were also introduce for virtio,
this is done in patch 16-17.

Vhost were also changed to support multiqueue by introducing a start vq index
which tracks the first virtqueue that will be used by vhost instead of the
assumption that the vhost always use virtqueue from index 0. This is done in
patch 18.

The last part is the multiqueue userspace changes, this is done in patch 19-22.

With this changes, user could start a multiqueue virtio-net device through

./qemu -netdev tap,id=hn0,queues=2,vhost=on -device virtio-net-pci,netdev=hn0

Management tools such as libvirt can pass multiple pre-created fds/vhostfds 
through

./qemu -netdev tap,id=hn0,fds=X:Y,vhostfds=M:N -device virtio-net-pci,netdev=hn0

For the one who wants to try, a git tree is available at:
git://github.com/jasowang/qemu.git

Changes from V4:
- fix the conflict with Michael's tree and rebase to the latest (Michael)

Changes from V3:
- convert bitfield to bool in TAPState (Blue)
- use abort() instead of assert(0) in tap code (Blue)
- rebase to the latest
- fix a bug that breaks the non-tap network

Changes from V2:
- Don't start/stop vhost threads when changing queues and simplify the interface
  between virtio-net and vhost further.

Changes from V1:
- silent checkpatch (Blue)
- use fds/vhostfds instead of fd/vhostfd (Stefan)
- use fds=X:Y:Z instead of fd=X,fd=Y,fd=Z (Anthony)
- split patches (Stefan)
- typos in commit log (Stefan)
- Warn 'queues=' when fds/vhostfds is used (Stefan)
- rename __net_init_tap to net_init_tap_one (Stefan)
- check the consistency of vnet_hdr of multiple tap fds (Stefan)
- disable multiqueue support for bridge-helper (Stefan)
- rename tap_attach()/tap_detach() to tap_enable()/tap_disable() (Stefan)
- fix booting with legacy guest (WanLong)
- don't bump the version when doing migration (Michael)
- simplify the interface between virtio-net and multiqueue vhost_net (Michael)
- rebase the patches to latest
- re-order the patches that let the net part comes first to simplify the
  reviewing
- simplify the interface between virtio-net and multiqueue vhost_net
- move the guest notifiers setup from vhost to vhost_net
- fix a build issue of hw/mcf_fce.c

Changes from RFC v2:
- rebase the codes to latest qemu
- align the multiqueue virtio-net implementation to virtio spec
- split the patches into more smaller patches
- set_link and hotplug support

Changes from RFC V1:
- rebase to the latest
- fix memory leak in parse_netdev
- fix guest notifiers assignment/de-assignment
- changes the command lines to:
   qemu -netdev tap,queues=2 -device virtio-net-pci,queues=2

Reference:
V1: http://lists.nongnu.org/archive/html/qemu-devel/2012-12/msg03558.html
RFC v2: http://lists.gnu.org/archive/html/qemu-devel/2012-06/msg04108.html
RFC v1: http://comments.gmane.org/gmane.comp.emulators.qemu/100481

Perf Numbers:
- norm is short for normalize result
- trans.rate is short for transaction rate

Two Intel Xeon 5620 with direct connected intel 82599EB
Host/Guest kernel: David net tree
vhost enabled

- lots of improvents of both latency and cpu utilization in request-reponse test
- get regression of guest sending small packets which because TCP tends to batch
  less when the latency were improved

1q/2q/4q
TCP_RR
 size #sessions trans.rate  norm trans.rate  norm trans.rate  norm
1 1 9393.26   595.64  9408.18   597.34  9375.19   584.12
1 2072162.1   2214.24 129880.22 2456.13 196949.81 2298.13
1 50107513.38 2653.99 139721.93 2490.58 259713.82 2873.57
1 100   126734.63 2676.54 145553.5  2406.63 265252.68 2943
64 19453.42   632.33  9371.37   616.13  9338.19   615.97
64 20   70620.03  2093.68 125155.75 2409.15 191239.91 2253.32
64 50   1069662448.29 146518.67 2514.47 242134.07 2720.91
64 100  117046.35 2394.56 190153.09 2696.82 238881.29 2704.41
256 1   8733.29   736.36  8701.07   680.83  8608.92   530.1
256 20  69279.89  2274.45 115103.07

[PATCH V4 RESEND 01/22] net: tap: using bool instead of bitfield

2013-01-31 Thread Jason Wang

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio-net.c   |2 +-
 include/net/tap.h |4 ++--
 net/tap-win32.c   |6 +++---
 net/tap.c |   38 ++
 4 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index dfb9687..b5579b4 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -1102,7 +1102,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf 
*conf,
 n-nic = qemu_new_nic(net_virtio_info, conf, 
object_get_typename(OBJECT(dev)), dev-id, n);
 peer_test_vnet_hdr(n);
 if (peer_has_vnet_hdr(n)) {
-tap_using_vnet_hdr(n-nic-nc.peer, 1);
+tap_using_vnet_hdr(n-nic-nc.peer, true);
 n-host_hdr_len = sizeof(struct virtio_net_hdr);
 } else {
 n-host_hdr_len = 0;
diff --git a/include/net/tap.h b/include/net/tap.h
index bb7efb5..883cebf 100644
--- a/include/net/tap.h
+++ b/include/net/tap.h
@@ -29,10 +29,10 @@
 #include qemu-common.h
 #include qapi-types.h
 
-int tap_has_ufo(NetClientState *nc);
+bool tap_has_ufo(NetClientState *nc);
 int tap_has_vnet_hdr(NetClientState *nc);
 int tap_has_vnet_hdr_len(NetClientState *nc, int len);
-void tap_using_vnet_hdr(NetClientState *nc, int using_vnet_hdr);
+void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr);
 void tap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int 
ecn, int ufo);
 void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 265369c..3052bba 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -722,9 +722,9 @@ int net_init_tap(const NetClientOptions *opts, const char 
*name,
 return 0;
 }
 
-int tap_has_ufo(NetClientState *nc)
+bool tap_has_ufo(NetClientState *nc)
 {
-return 0;
+return false;
 }
 
 int tap_has_vnet_hdr(NetClientState *nc)
@@ -741,7 +741,7 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
-void tap_using_vnet_hdr(NetClientState *nc, int using_vnet_hdr)
+void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 {
 }
 
diff --git a/net/tap.c b/net/tap.c
index eb40c42..5542c98 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -55,10 +55,10 @@ typedef struct TAPState {
 char down_script[1024];
 char down_script_arg[128];
 uint8_t buf[TAP_BUFSIZE];
-unsigned int read_poll : 1;
-unsigned int write_poll : 1;
-unsigned int using_vnet_hdr : 1;
-unsigned int has_ufo: 1;
+bool read_poll;
+bool write_poll;
+bool using_vnet_hdr;
+bool has_ufo;
 VHostNetState *vhost_net;
 unsigned host_vnet_hdr_len;
 } TAPState;
@@ -78,15 +78,15 @@ static void tap_update_fd_handler(TAPState *s)
  s);
 }
 
-static void tap_read_poll(TAPState *s, int enable)
+static void tap_read_poll(TAPState *s, bool enable)
 {
-s-read_poll = !!enable;
+s-read_poll = enable;
 tap_update_fd_handler(s);
 }
 
-static void tap_write_poll(TAPState *s, int enable)
+static void tap_write_poll(TAPState *s, bool enable)
 {
-s-write_poll = !!enable;
+s-write_poll = enable;
 tap_update_fd_handler(s);
 }
 
@@ -94,7 +94,7 @@ static void tap_writable(void *opaque)
 {
 TAPState *s = opaque;
 
-tap_write_poll(s, 0);
+tap_write_poll(s, false);
 
 qemu_flush_queued_packets(s-nc);
 }
@@ -108,7 +108,7 @@ static ssize_t tap_write_packet(TAPState *s, const struct 
iovec *iov, int iovcnt
 } while (len == -1  errno == EINTR);
 
 if (len == -1  errno == EAGAIN) {
-tap_write_poll(s, 1);
+tap_write_poll(s, true);
 return 0;
 }
 
@@ -186,7 +186,7 @@ ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
 static void tap_send_completed(NetClientState *nc, ssize_t len)
 {
 TAPState *s = DO_UPCAST(TAPState, nc, nc);
-tap_read_poll(s, 1);
+tap_read_poll(s, true);
 }
 
 static void tap_send(void *opaque)
@@ -209,12 +209,12 @@ static void tap_send(void *opaque)
 
 size = qemu_send_packet_async(s-nc, buf, size, tap_send_completed);
 if (size == 0) {
-tap_read_poll(s, 0);
+tap_read_poll(s, false);
 }
 } while (size  0  qemu_can_send_packet(s-nc));
 }
 
-int tap_has_ufo(NetClientState *nc)
+bool tap_has_ufo(NetClientState *nc)
 {
 TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -253,12 +253,10 @@ void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 s-host_vnet_hdr_len = len;
 }
 
-void tap_using_vnet_hdr(NetClientState *nc, int using_vnet_hdr)
+void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 {
 TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
-using_vnet_hdr = using_vnet_hdr != 0;
-
 assert(nc-info-type == NET_CLIENT_OPTIONS_KIND_TAP);
 assert(!!s-host_vnet_hdr_len == using_vnet_hdr);
 
@@ -290,8 +288,8 @@ static void tap_cleanup(NetClientState *nc)
 if (s-down_script[0])
 launch_script(s-down_script, s-down_script_arg, s-fd);
 
-tap_read_poll(s,

[PATCH V4 RESEND 02/22] net: tap: use abort() instead of assert(0)

2013-01-31 Thread Jason Wang

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 net/tap-linux.c |4 ++--
 net/tap-win32.c |2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/tap-linux.c b/net/tap-linux.c
index 059f5f3..0a6acc7 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -164,7 +164,7 @@ int tap_probe_vnet_hdr_len(int fd, int len)
 if (ioctl(fd, TUNSETVNETHDRSZ, orig) == -1) {
 fprintf(stderr, TUNGETVNETHDRSZ ioctl() failed: %s. Exiting.\n,
 strerror(errno));
-assert(0);
+abort();
 return -errno;
 }
 return 1;
@@ -175,7 +175,7 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 if (ioctl(fd, TUNSETVNETHDRSZ, len) == -1) {
 fprintf(stderr, TUNSETVNETHDRSZ ioctl() failed: %s. Exiting.\n,
 strerror(errno));
-assert(0);
+abort();
 }
 }
 
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 3052bba..601437e 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -762,5 +762,5 @@ int tap_has_vnet_hdr_len(NetClientState *nc, int len)
 
 void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 {
-assert(0);
+abort();
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 03/22] net: introduce qemu_get_queue()

2013-01-31 Thread Jason Wang

To support multiqueue, the patch introduce a helper qemu_get_queue()
which is used to get the NetClientState of a device. The following patches would
refactor this helper to support multiqueue.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/cadence_gem.c|9 +++--
 hw/dp8393x.c|9 +++--
 hw/e1000.c  |   24 --
 hw/eepro100.c   |   12 +++---
 hw/etraxfs_eth.c|5 ++-
 hw/lan9118.c|   10 +++---
 hw/mcf_fec.c|4 +-
 hw/milkymist-minimac2.c |4 +-
 hw/mipsnet.c|4 +-
 hw/musicpal.c   |2 +-
 hw/ne2000-isa.c |2 +-
 hw/ne2000.c |7 ++--
 hw/opencores_eth.c  |6 ++--
 hw/pcnet-pci.c  |2 +-
 hw/pcnet.c  |7 ++--
 hw/rtl8139.c|   14 
 hw/smc91c111.c  |4 +-
 hw/spapr_llan.c |4 +-
 hw/stellaris_enet.c |5 ++-
 hw/usb/dev-network.c|   10 +++---
 hw/virtio-net.c |   78 ++-
 hw/xen_nic.c|   13 +---
 hw/xgmac.c  |4 +-
 hw/xilinx_axienet.c |4 +-
 hw/xilinx_ethlite.c |6 ++--
 include/net/net.h   |1 +
 net/net.c   |5 +++
 savevm.c|2 +-
 28 files changed, 141 insertions(+), 116 deletions(-)

diff --git a/hw/cadence_gem.c b/hw/cadence_gem.c
index b77423d..b8071a4 100644
--- a/hw/cadence_gem.c
+++ b/hw/cadence_gem.c
@@ -389,10 +389,10 @@ static void gem_init_register_masks(GemState *s)
  */
 static void phy_update_link(GemState *s)
 {
-DB_PRINT(down %d\n, s-nic-nc.link_down);
+DB_PRINT(down %d\n, qemu_get_queue(s-nic)-link_down);
 
 /* Autonegotiation status mirrors link status.  */
-if (s-nic-nc.link_down) {
+if (qemu_get_queue(s-nic)-link_down) {
 s-phy_regs[PHY_REG_STATUS] = ~(PHY_REG_STATUS_ANEGCMPL |
  PHY_REG_STATUS_LINK);
 s-phy_regs[PHY_REG_INT_ST] |= PHY_REG_INT_ST_LINKC;
@@ -908,9 +908,10 @@ static void gem_transmit(GemState *s)
 
 /* Send the packet somewhere */
 if (s-phy_loop) {
-gem_receive(s-nic-nc, tx_packet, total_bytes);
+gem_receive(qemu_get_queue(s-nic), tx_packet, total_bytes);
 } else {
-qemu_send_packet(s-nic-nc, tx_packet, total_bytes);
+qemu_send_packet(qemu_get_queue(s-nic), tx_packet,
+ total_bytes);
 }
 
 /* Prepare for next packet */
diff --git a/hw/dp8393x.c b/hw/dp8393x.c
index b501450..c2d0bc8 100644
--- a/hw/dp8393x.c
+++ b/hw/dp8393x.c
@@ -339,6 +339,7 @@ static void do_receiver_disable(dp8393xState *s)
 
 static void do_transmit_packets(dp8393xState *s)
 {
+NetClientState *nc = qemu_get_queue(s-nic);
 uint16_t data[12];
 int width, size;
 int tx_len, len;
@@ -408,13 +409,13 @@ static void do_transmit_packets(dp8393xState *s)
 if (s-regs[SONIC_RCR]  (SONIC_RCR_LB1 | SONIC_RCR_LB0)) {
 /* Loopback */
 s-regs[SONIC_TCR] |= SONIC_TCR_CRSL;
-if (s-nic-nc.info-can_receive(s-nic-nc)) {
+if (nc-info-can_receive(nc)) {
 s-loopback_packet = 1;
-s-nic-nc.info-receive(s-nic-nc, s-tx_buffer, tx_len);
+nc-info-receive(nc, s-tx_buffer, tx_len);
 }
 } else {
 /* Transmit packet */
-qemu_send_packet(s-nic-nc, s-tx_buffer, tx_len);
+qemu_send_packet(nc, s-tx_buffer, tx_len);
 }
 s-regs[SONIC_TCR] |= SONIC_TCR_PTX;
 
@@ -903,7 +904,7 @@ void dp83932_init(NICInfo *nd, hwaddr base, int it_shift,
 
 s-nic = qemu_new_nic(net_dp83932_info, s-conf, nd-model, nd-name, s);
 
-qemu_format_nic_info_str(s-nic-nc, s-conf.macaddr.a);
+qemu_format_nic_info_str(qemu_get_queue(s-nic), s-conf.macaddr.a);
 qemu_register_reset(nic_reset, s);
 nic_reset(s);
 
diff --git a/hw/e1000.c b/hw/e1000.c
index ee85c53..3622392 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -167,11 +167,11 @@ set_phy_ctrl(E1000State *s, int index, uint16_t val)
 {
 if ((val  MII_CR_AUTO_NEG_EN)  (val  MII_CR_RESTART_AUTO_NEG)) {
 /* no need auto-negotiation if link was down */
-if (s-nic-nc.link_down) {
+if (qemu_get_queue(s-nic)-link_down) {
 s-phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
 return;
 }
-s-nic-nc.link_down = true;
+qemu_get_queue(s-nic)-link_down = true;
 e1000_link_down(s);
 s-phy_reg[PHY_STATUS] = ~MII_SR_AUTONEG_COMPLETE;
 DBGOUT(PHY, Start link auto negotiation\n);
@@ -183,7 +183,7 @@ static void
 e1000_autoneg_timer(void *opaque)
 {
 E1000State *s = opaque;
-s-nic-nc.link_down = false;
+qemu_get_queue(s-nic)-link_down = false;
 e1000_link_up(s);

[PATCH V4 RESEND 04/22] net: introduce qemu_get_nic()

2013-01-31 Thread Jason Wang

To support multiqueue, this patch introduces a helper qemu_get_nic() to get
NICState from a NetClientState. The following patches would refactor this helper
to support multiqueue.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/cadence_gem.c|8 
 hw/dp8393x.c|6 +++---
 hw/e1000.c  |8 
 hw/eepro100.c   |6 +++---
 hw/etraxfs_eth.c|6 +++---
 hw/lan9118.c|6 +++---
 hw/lance.c  |2 +-
 hw/mcf_fec.c|6 +++---
 hw/milkymist-minimac2.c |6 +++---
 hw/mipsnet.c|6 +++---
 hw/musicpal.c   |4 ++--
 hw/ne2000-isa.c |2 +-
 hw/ne2000.c |6 +++---
 hw/opencores_eth.c  |6 +++---
 hw/pcnet-pci.c  |2 +-
 hw/pcnet.c  |6 +++---
 hw/rtl8139.c|8 
 hw/smc91c111.c  |6 +++---
 hw/spapr_llan.c |4 ++--
 hw/stellaris_enet.c |6 +++---
 hw/usb/dev-network.c|6 +++---
 hw/virtio-net.c |   10 +-
 hw/xen_nic.c|4 ++--
 hw/xgmac.c  |6 +++---
 hw/xilinx_axienet.c |6 +++---
 hw/xilinx_ethlite.c |6 +++---
 include/net/net.h   |2 ++
 net/net.c   |   20 
 28 files changed, 92 insertions(+), 78 deletions(-)

diff --git a/hw/cadence_gem.c b/hw/cadence_gem.c
index b8071a4..ab86c17 100644
--- a/hw/cadence_gem.c
+++ b/hw/cadence_gem.c
@@ -409,7 +409,7 @@ static int gem_can_receive(NetClientState *nc)
 {
 GemState *s;
 
-s = DO_UPCAST(NICState, nc, nc)-opaque;
+s = qemu_get_nic_opaque(nc);
 
 DB_PRINT(\n);
 
@@ -612,7 +612,7 @@ static ssize_t gem_receive(NetClientState *nc, const 
uint8_t *buf, size_t size)
 uint8_trxbuf[2048];
 uint8_t   *rxbuf_ptr;
 
-s = DO_UPCAST(NICState, nc, nc)-opaque;
+s = qemu_get_nic_opaque(nc);
 
 /* Do nothing if receive is not enabled. */
 if (!(s-regs[GEM_NWCTRL]  GEM_NWCTRL_RXENA)) {
@@ -1152,7 +1152,7 @@ static const MemoryRegionOps gem_ops = {
 
 static void gem_cleanup(NetClientState *nc)
 {
-GemState *s = DO_UPCAST(NICState, nc, nc)-opaque;
+GemState *s = qemu_get_nic_opaque(nc);
 
 DB_PRINT(\n);
 s-nic = NULL;
@@ -1161,7 +1161,7 @@ static void gem_cleanup(NetClientState *nc)
 static void gem_set_link(NetClientState *nc)
 {
 DB_PRINT(\n);
-phy_update_link(DO_UPCAST(NICState, nc, nc)-opaque);
+phy_update_link(qemu_get_nic_opaque(nc));
 }
 
 static NetClientInfo net_gem_info = {
diff --git a/hw/dp8393x.c b/hw/dp8393x.c
index c2d0bc8..0273fad 100644
--- a/hw/dp8393x.c
+++ b/hw/dp8393x.c
@@ -676,7 +676,7 @@ static const MemoryRegionOps dp8393x_ops = {
 
 static int nic_can_receive(NetClientState *nc)
 {
-dp8393xState *s = DO_UPCAST(NICState, nc, nc)-opaque;
+dp8393xState *s = qemu_get_nic_opaque(nc);
 
 if (!(s-regs[SONIC_CR]  SONIC_CR_RXEN))
 return 0;
@@ -725,7 +725,7 @@ static int receive_filter(dp8393xState *s, const uint8_t * 
buf, int size)
 
 static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t 
size)
 {
-dp8393xState *s = DO_UPCAST(NICState, nc, nc)-opaque;
+dp8393xState *s = qemu_get_nic_opaque(nc);
 uint16_t data[10];
 int packet_type;
 uint32_t available, address;
@@ -861,7 +861,7 @@ static void nic_reset(void *opaque)
 
 static void nic_cleanup(NetClientState *nc)
 {
-dp8393xState *s = DO_UPCAST(NICState, nc, nc)-opaque;
+dp8393xState *s = qemu_get_nic_opaque(nc);
 
 memory_region_del_subregion(s-address_space, s-mmio);
 memory_region_destroy(s-mmio);
diff --git a/hw/e1000.c b/hw/e1000.c
index 3622392..df6c693 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -753,7 +753,7 @@ receive_filter(E1000State *s, const uint8_t *buf, int size)
 static void
 e1000_set_link_status(NetClientState *nc)
 {
-E1000State *s = DO_UPCAST(NICState, nc, nc)-opaque;
+E1000State *s = qemu_get_nic_opaque(nc);
 uint32_t old_status = s-mac_reg[STATUS];
 
 if (nc-link_down) {
@@ -787,7 +787,7 @@ static bool e1000_has_rxbufs(E1000State *s, size_t 
total_size)
 static int
 e1000_can_receive(NetClientState *nc)
 {
-E1000State *s = DO_UPCAST(NICState, nc, nc)-opaque;
+E1000State *s = qemu_get_nic_opaque(nc);
 
 return (s-mac_reg[RCTL]  E1000_RCTL_EN)  e1000_has_rxbufs(s, 1);
 }
@@ -803,7 +803,7 @@ static uint64_t rx_desc_base(E1000State *s)
 static ssize_t
 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
-E1000State *s = DO_UPCAST(NICState, nc, nc)-opaque;
+E1000State *s = qemu_get_nic_opaque(nc);
 struct e1000_rx_desc desc;
 dma_addr_t base;
 unsigned int n, rdt;
@@ -1240,7 +1240,7 @@ e1000_mmio_setup(E1000State *d)
 static void
 e1000_cleanup(NetClientState *nc)
 {
-E1000State *s = DO_UPCAST(NICState, nc, nc)-opaque;
+E1000State *s = qemu_get_nic_opaque(nc);
 
 s-nic = NULL;

[PATCH V4 RESEND 05/22] net: intorduce qemu_del_nic()

2013-01-31 Thread Jason Wang

To support multiqueue nic, this patch separate the nic destructor from
qemu_del_net_client() to a new helper qemu_del_nic() since the mapping bettween
NiCState and NetClientState were not 1:1 in multiqueue. The following patches
would refactor this function to support multiqueue nic.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/e1000.c   |2 +-
 hw/eepro100.c|2 +-
 hw/ne2000.c  |2 +-
 hw/pcnet-pci.c   |2 +-
 hw/rtl8139.c |2 +-
 hw/usb/dev-network.c |2 +-
 hw/virtio-net.c  |2 +-
 hw/xen_nic.c |2 +-
 include/net/net.h|1 +
 net/net.c|   15 ++-
 10 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/hw/e1000.c b/hw/e1000.c
index df6c693..7dd0455 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -1254,7 +1254,7 @@ pci_e1000_uninit(PCIDevice *dev)
 qemu_free_timer(d-autoneg_timer);
 memory_region_destroy(d-mmio);
 memory_region_destroy(d-io);
-qemu_del_net_client(qemu_get_queue(d-nic));
+qemu_del_nic(d-nic);
 }
 
 static NetClientInfo net_e1000_info = {
diff --git a/hw/eepro100.c b/hw/eepro100.c
index f9856ae..5d23796 100644
--- a/hw/eepro100.c
+++ b/hw/eepro100.c
@@ -1849,7 +1849,7 @@ static void pci_nic_uninit(PCIDevice *pci_dev)
 memory_region_destroy(s-flash_bar);
 vmstate_unregister(pci_dev-qdev, s-vmstate, s);
 eeprom93xx_free(pci_dev-qdev, s-eeprom);
-qemu_del_net_client(qemu_get_queue(s-nic));
+qemu_del_nic(s-nic);
 }
 
 static NetClientInfo net_eepro100_info = {
diff --git a/hw/ne2000.c b/hw/ne2000.c
index c989190..3dd1c84 100644
--- a/hw/ne2000.c
+++ b/hw/ne2000.c
@@ -751,7 +751,7 @@ static void pci_ne2000_exit(PCIDevice *pci_dev)
 NE2000State *s = d-ne2000;
 
 memory_region_destroy(s-io);
-qemu_del_net_client(qemu_get_queue(s-nic));
+qemu_del_nic(s-nic);
 }
 
 static Property ne2000_properties[] = {
diff --git a/hw/pcnet-pci.c b/hw/pcnet-pci.c
index 26c90bf..df63b22 100644
--- a/hw/pcnet-pci.c
+++ b/hw/pcnet-pci.c
@@ -279,7 +279,7 @@ static void pci_pcnet_uninit(PCIDevice *dev)
 memory_region_destroy(d-io_bar);
 qemu_del_timer(d-state.poll_timer);
 qemu_free_timer(d-state.poll_timer);
-qemu_del_net_client(qemu_get_queue(d-state.nic));
+qemu_del_nic(d-state.nic);
 }
 
 static NetClientInfo net_pci_pcnet_info = {
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index b825e83..d7716be 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -3446,7 +3446,7 @@ static void pci_rtl8139_uninit(PCIDevice *dev)
 }
 qemu_del_timer(s-timer);
 qemu_free_timer(s-timer);
-qemu_del_net_client(qemu_get_queue(s-nic));
+qemu_del_nic(s-nic);
 }
 
 static void rtl8139_set_link_status(NetClientState *nc)
diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index abc6eac..a01a5e7 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1330,7 +1330,7 @@ static void usb_net_handle_destroy(USBDevice *dev)
 
 /* TODO: remove the nd_table[] entry */
 rndis_clear_responsequeue(s);
-qemu_del_net_client(qemu_get_queue(s-nic));
+qemu_del_nic(s-nic);
 }
 
 static NetClientInfo net_usbnet_info = {
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index e69313b..a967006 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -1157,6 +1157,6 @@ void virtio_net_exit(VirtIODevice *vdev)
 qemu_bh_delete(n-tx_bh);
 }
 
-qemu_del_net_client(qemu_get_queue(n-nic));
+qemu_del_nic(n-nic);
 virtio_cleanup(n-vdev);
 }
diff --git a/hw/xen_nic.c b/hw/xen_nic.c
index 55b7960..4be077d 100644
--- a/hw/xen_nic.c
+++ b/hw/xen_nic.c
@@ -408,7 +408,7 @@ static void net_disconnect(struct XenDevice *xendev)
 netdev-rxs = NULL;
 }
 if (netdev-nic) {
-qemu_del_net_client(qemu_get_queue(netdev-nic));
+qemu_del_nic(netdev-nic);
 netdev-nic = NULL;
 }
 }
diff --git a/include/net/net.h b/include/net/net.h
index 96e05c4..f0d1aa2 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -77,6 +77,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
const char *model,
const char *name,
void *opaque);
+void qemu_del_nic(NICState *nic);
 NetClientState *qemu_get_queue(NICState *nic);
 NICState *qemu_get_nic(NetClientState *nc);
 void *qemu_get_nic_opaque(NetClientState *nc);
diff --git a/net/net.c b/net/net.c
index 606e860..47d56e3 100644
--- a/net/net.c
+++ b/net/net.c
@@ -291,6 +291,15 @@ void qemu_del_net_client(NetClientState *nc)
 return;
 }
 
+assert(nc-info-type != NET_CLIENT_OPTIONS_KIND_NIC);
+
+qemu_cleanup_net_client(nc);
+qemu_free_net_client(nc);
+}
+
+void qemu_del_nic(NICState *nic)
+{
+NetClientState *nc = qemu_get_queue(nic);
 /* If this is a peer NIC and peer has already been deleted, free it now. */
 if (nc-peer  nc-info-type == NET_CLIENT_OPTIONS_KIND_NIC) {
 NICState *nic = qemu_get_nic(nc);
@@

[PATCH V4 RESEND 06/22] net: introduce qemu_find_net_clients_except()

2013-01-31 Thread Jason Wang

In multiqueue, all NetClientState that belongs to the same netdev or nic has the
same id. So this patches introduces an helper qemu_find_net_clients_except()
which finds all NetClientState with the same id. This will be used by multiqueue
networking.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 include/net/net.h |2 ++
 net/net.c |   21 +
 2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/include/net/net.h b/include/net/net.h
index f0d1aa2..995df5c 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -68,6 +68,8 @@ typedef struct NICState {
 } NICState;
 
 NetClientState *qemu_find_netdev(const char *id);
+int qemu_find_net_clients_except(const char *id, NetClientState **ncs,
+ NetClientOptionsKind type, int max);
 NetClientState *qemu_new_net_client(NetClientInfo *info,
 NetClientState *peer,
 const char *model,
diff --git a/net/net.c b/net/net.c
index 47d56e3..16dd327 100644
--- a/net/net.c
+++ b/net/net.c
@@ -508,6 +508,27 @@ NetClientState *qemu_find_netdev(const char *id)
 return NULL;
 }
 
+int qemu_find_net_clients_except(const char *id, NetClientState **ncs,
+ NetClientOptionsKind type, int max)
+{
+NetClientState *nc;
+int ret = 0;
+
+QTAILQ_FOREACH(nc, net_clients, next) {
+if (nc-info-type == type) {
+continue;
+}
+if (!strcmp(nc-name, id)) {
+if (ret  max) {
+ncs[ret] = nc;
+}
+ret++;
+}
+}
+
+return ret;
+}
+
 static int nic_get_free_idx(void)
 {
 int index;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 07/22] net: introduce qemu_net_client_setup()

2013-01-31 Thread Jason Wang

This patch separates the setup of NetClientState from its allocation, this will
allow allocating an arrays of NetClientState and does the initialization one by
one which is what multiqueue needs.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 net/net.c |   29 +++--
 1 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/net/net.c b/net/net.c
index 16dd327..3a5bdf6 100644
--- a/net/net.c
+++ b/net/net.c
@@ -182,17 +182,12 @@ static char *assign_name(NetClientState *nc1, const char 
*model)
 return g_strdup(buf);
 }
 
-NetClientState *qemu_new_net_client(NetClientInfo *info,
-NetClientState *peer,
-const char *model,
-const char *name)
+static void qemu_net_client_setup(NetClientState *nc,
+  NetClientInfo *info,
+  NetClientState *peer,
+  const char *model,
+  const char *name)
 {
-NetClientState *nc;
-
-assert(info-size = sizeof(NetClientState));
-
-nc = g_malloc0(info-size);
-
 nc-info = info;
 nc-model = g_strdup(model);
 if (name) {
@@ -210,6 +205,20 @@ NetClientState *qemu_new_net_client(NetClientInfo *info,
 
 nc-send_queue = qemu_new_net_queue(nc);
 
+}
+
+NetClientState *qemu_new_net_client(NetClientInfo *info,
+NetClientState *peer,
+const char *model,
+const char *name)
+{
+NetClientState *nc;
+
+assert(info-size = sizeof(NetClientState));
+
+nc = g_malloc0(info-size);
+qemu_net_client_setup(nc, info, peer, model, name);
+
 return nc;
 }
 
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 08/22] net: introduce NetClientState destructor

2013-01-31 Thread Jason Wang

To allow allocating an array of NetClientState and free it once, this patch
introduces destructor of NetClientState. Which could do type specific free,
which could be used by multiqueue to free the array once.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 include/net/net.h |2 ++
 net/net.c |   17 +
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/net/net.h b/include/net/net.h
index 995df5c..22adc99 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -35,6 +35,7 @@ typedef ssize_t (NetReceive)(NetClientState *, const uint8_t 
*, size_t);
 typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int);
 typedef void (NetCleanup) (NetClientState *);
 typedef void (LinkStatusChanged)(NetClientState *);
+typedef void (NetClientDestructor)(NetClientState *);
 
 typedef struct NetClientInfo {
 NetClientOptionsKind type;
@@ -58,6 +59,7 @@ struct NetClientState {
 char *name;
 char info_str[256];
 unsigned receive_disabled : 1;
+NetClientDestructor *destructor;
 };
 
 typedef struct NICState {
diff --git a/net/net.c b/net/net.c
index 3a5bdf6..98a1934 100644
--- a/net/net.c
+++ b/net/net.c
@@ -182,11 +182,17 @@ static char *assign_name(NetClientState *nc1, const char 
*model)
 return g_strdup(buf);
 }
 
+static void qemu_net_client_destructor(NetClientState *nc)
+{
+g_free(nc);
+}
+
 static void qemu_net_client_setup(NetClientState *nc,
   NetClientInfo *info,
   NetClientState *peer,
   const char *model,
-  const char *name)
+  const char *name,
+  NetClientDestructor *destructor)
 {
 nc-info = info;
 nc-model = g_strdup(model);
@@ -204,7 +210,7 @@ static void qemu_net_client_setup(NetClientState *nc,
 QTAILQ_INSERT_TAIL(net_clients, nc, next);
 
 nc-send_queue = qemu_new_net_queue(nc);
-
+nc-destructor = destructor;
 }
 
 NetClientState *qemu_new_net_client(NetClientInfo *info,
@@ -217,7 +223,8 @@ NetClientState *qemu_new_net_client(NetClientInfo *info,
 assert(info-size = sizeof(NetClientState));
 
 nc = g_malloc0(info-size);
-qemu_net_client_setup(nc, info, peer, model, name);
+qemu_net_client_setup(nc, info, peer, model, name,
+  qemu_net_client_destructor);
 
 return nc;
 }
@@ -279,7 +286,9 @@ static void qemu_free_net_client(NetClientState *nc)
 }
 g_free(nc-name);
 g_free(nc-model);
-g_free(nc);
+if (nc-destructor) {
+nc-destructor(nc);
+}
 }
 
 void qemu_del_net_client(NetClientState *nc)
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 09/22] net: multiqueue support

2013-01-31 Thread Jason Wang

This patch adds basic multiqueue support for qemu. The idea is simple, an array
of NetClientStates were introduced in NICState, parse_netdev() were extended to
find and match all NetClientStates belongs to the backend and place their
pointers in NICConf. Then qemu_new_nic can setup a N:N mapping between NICStates
that belongs to a nic and NICStates belongs to the netdev. And a queue_index
were introduced in NetClientState to track its index. After this, each peers of
a NICState were abstracted as a queue.

After this change, all NetClientState that belongs to the same backend/nic has
the same id. When use want to change the link status, all NetClientStates that
belongs to the same backend/nic will be also changed. When user want to delete
a device or netdev, all NetClientStates that belongs to the same backend/nic
will be deleted also. Changing or deleting an specific queue is not allowed.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/dp8393x.c|2 +-
 hw/mcf_fec.c|2 +-
 hw/qdev-properties-system.c |   46 +++---
 hw/qdev-properties.h|6 +-
 include/net/net.h   |   18 +--
 net/net.c   |  113 +++
 6 files changed, 139 insertions(+), 48 deletions(-)

diff --git a/hw/dp8393x.c b/hw/dp8393x.c
index 0273fad..808157b 100644
--- a/hw/dp8393x.c
+++ b/hw/dp8393x.c
@@ -900,7 +900,7 @@ void dp83932_init(NICInfo *nd, hwaddr base, int it_shift,
 s-regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
 
 s-conf.macaddr = nd-macaddr;
-s-conf.peer = nd-netdev;
+s-conf.peers.ncs[0] = nd-netdev;
 
 s-nic = qemu_new_nic(net_dp83932_info, s-conf, nd-model, nd-name, s);
 
diff --git a/hw/mcf_fec.c b/hw/mcf_fec.c
index 909e32b..8e60f09 100644
--- a/hw/mcf_fec.c
+++ b/hw/mcf_fec.c
@@ -472,7 +472,7 @@ void mcf_fec_init(MemoryRegion *sysmem, NICInfo *nd,
 memory_region_add_subregion(sysmem, base, s-iomem);
 
 s-conf.macaddr = nd-macaddr;
-s-conf.peer = nd-netdev;
+s-conf.peers.ncs[0] = nd-netdev;
 
 s-nic = qemu_new_nic(net_mcf_fec_info, s-conf, nd-model, nd-name, s);
 
diff --git a/hw/qdev-properties-system.c b/hw/qdev-properties-system.c
index ce0f793..ce3af22 100644
--- a/hw/qdev-properties-system.c
+++ b/hw/qdev-properties-system.c
@@ -173,16 +173,47 @@ PropertyInfo qdev_prop_chr = {
 
 static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
 {
-NetClientState *netdev = qemu_find_netdev(str);
+NICPeers *peers_ptr = (NICPeers *)ptr;
+NICConf *conf = container_of(peers_ptr, NICConf, peers);
+NetClientState **ncs = peers_ptr-ncs;
+NetClientState *peers[MAX_QUEUE_NUM];
+int queues, i = 0;
+int ret;
 
-if (netdev == NULL) {
-return -ENOENT;
+queues = qemu_find_net_clients_except(str, peers,
+  NET_CLIENT_OPTIONS_KIND_NIC,
+  MAX_QUEUE_NUM);
+if (queues == 0) {
+ret = -ENOENT;
+goto err;
 }
-if (netdev-peer) {
-return -EEXIST;
+
+if (queues  MAX_QUEUE_NUM) {
+ret = -E2BIG;
+goto err;
+}
+
+for (i = 0; i  queues; i++) {
+if (peers[i] == NULL) {
+ret = -ENOENT;
+goto err;
+}
+
+if (peers[i]-peer) {
+ret = -EEXIST;
+goto err;
+}
+
+ncs[i] = peers[i];
+ncs[i]-queue_index = i;
 }
-*ptr = netdev;
+
+conf-queues = queues;
+
 return 0;
+
+err:
+return ret;
 }
 
 static const char *print_netdev(void *ptr)
@@ -249,7 +280,8 @@ static void set_vlan(Object *obj, Visitor *v, void *opaque,
 {
 DeviceState *dev = DEVICE(obj);
 Property *prop = opaque;
-NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
+NICPeers *peers_ptr = qdev_get_prop_ptr(dev, prop);
+NetClientState **ptr = peers_ptr-ncs[0];
 Error *local_err = NULL;
 int32_t id;
 NetClientState *hubport;
diff --git a/hw/qdev-properties.h b/hw/qdev-properties.h
index ddcf774..20c67f3 100644
--- a/hw/qdev-properties.h
+++ b/hw/qdev-properties.h
@@ -31,7 +31,7 @@ extern PropertyInfo qdev_prop_pci_host_devaddr;
 .name  = (_name),\
 .info  = (_prop),   \
 .offset= offsetof(_state, _field)\
-+ type_check(_type,typeof_field(_state, _field)),\
++ type_check(_type, typeof_field(_state, _field)),   \
 }
 #define DEFINE_PROP_DEFAULT(_name, _state, _field, _defval, _prop, _type) { \
 .name  = (_name),   \
@@ -77,9 +77,9 @@ extern PropertyInfo qdev_prop_pci_host_devaddr;
 #define DEFINE_PROP_STRING(_n, _s, _f) \
 DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*)
 #define DEFINE_PROP_NETDEV(_n, _s, _f)

[PATCH V4 RESEND 10/22] tap: import linux multiqueue constants

2013-01-31 Thread Jason Wang

Import multiqueue constants from if_tun.h from 3.8-rc3. A new ifr flag
IFF_MULTI_QUEUE were introduced to create a multiqueue backend by calling
TUNSETIFF with the this flag and with the same interface name many times.

A new ioctl TUNSETQUEUE were introduced. When doing this ioctl with
IFF_DETACH_QUEUE, the queue were disabled in the linux kernel. When doing this
ioctl with IFF_ATTACH_QUEUE, the queue were enabled in the linux kernel.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 net/tap-linux.h |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/net/tap-linux.h b/net/tap-linux.h
index cb2a6d4..65087e1 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -29,6 +29,7 @@
 #define TUNSETSNDBUF   _IOW('T', 212, int)
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNSETQUEUE  _IOW('T', 217, int)
 
 #endif
 
@@ -36,6 +37,9 @@
 #define IFF_TAP0x0002
 #define IFF_NO_PI  0x1000
 #define IFF_VNET_HDR   0x4000
+#define IFF_MULTI_QUEUE 0x0100
+#define IFF_ATTACH_QUEUE 0x0200
+#define IFF_DETACH_QUEUE 0x0400
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM 0x01/* You can hand me unchecksummed packets. */
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 11/22] tap: factor out common tap initialization

2013-01-31 Thread Jason Wang

This patch factors out the common initialization of tap into a new helper
net_init_tap_one(). This will be used by multiqueue tap patches.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 net/tap.c |  130 ++---
 1 files changed, 73 insertions(+), 57 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index 5542c98..23fb6e0 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -591,6 +591,73 @@ static int net_tap_init(const NetdevTapOptions *tap, int 
*vnet_hdr,
 return fd;
 }
 
+static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
+const char *model, const char *name,
+const char *ifname, const char *script,
+const char *downscript, const char *vhostfdname,
+int vnet_hdr, int fd)
+{
+TAPState *s;
+
+s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
+if (!s) {
+close(fd);
+return -1;
+}
+
+if (tap_set_sndbuf(s-fd, tap)  0) {
+return -1;
+}
+
+if (tap-has_fd) {
+snprintf(s-nc.info_str, sizeof(s-nc.info_str), fd=%d, fd);
+} else if (tap-has_helper) {
+snprintf(s-nc.info_str, sizeof(s-nc.info_str), helper=%s,
+ tap-helper);
+} else {
+const char *downscript;
+
+downscript = tap-has_downscript ? tap-downscript :
+DEFAULT_NETWORK_DOWN_SCRIPT;
+
+snprintf(s-nc.info_str, sizeof(s-nc.info_str),
+ ifname=%s,script=%s,downscript=%s, ifname, script,
+ downscript);
+
+if (strcmp(downscript, no) != 0) {
+snprintf(s-down_script, sizeof(s-down_script), %s, downscript);
+snprintf(s-down_script_arg, sizeof(s-down_script_arg),
+ %s, ifname);
+}
+}
+
+if (tap-has_vhost ? tap-vhost :
+vhostfdname || (tap-has_vhostforce  tap-vhostforce)) {
+int vhostfd;
+
+if (tap-has_vhostfd) {
+vhostfd = monitor_handle_fd_param(cur_mon, vhostfdname);
+if (vhostfd == -1) {
+return -1;
+}
+} else {
+vhostfd = -1;
+}
+
+s-vhost_net = vhost_net_init(s-nc, vhostfd,
+  tap-has_vhostforce  tap-vhostforce);
+if (!s-vhost_net) {
+error_report(vhost-net requested but could not be initialized);
+return -1;
+}
+} else if (tap-has_vhostfd) {
+error_report(vhostfd= is not valid without vhost);
+return -1;
+}
+
+return 0;
+}
+
 int net_init_tap(const NetClientOptions *opts, const char *name,
  NetClientState *peer)
 {
@@ -598,10 +665,10 @@ int net_init_tap(const NetClientOptions *opts, const char 
*name,
 
 int fd, vnet_hdr = 0;
 const char *model;
-TAPState *s;
 
 /* for the no-fd, no-helper case */
 const char *script = NULL; /* suppress wrong uninit'd use gcc warning */
+const char *downscript = NULL;
 char ifname[128];
 
 assert(opts-kind == NET_CLIENT_OPTIONS_KIND_TAP);
@@ -647,6 +714,8 @@ int net_init_tap(const NetClientOptions *opts, const char 
*name,
 
 } else {
 script = tap-has_script ? tap-script : DEFAULT_NETWORK_SCRIPT;
+downscript = tap-has_downscript ? tap-downscript :
+DEFAULT_NETWORK_DOWN_SCRIPT;
 fd = net_tap_init(tap, vnet_hdr, script, ifname, sizeof ifname);
 if (fd == -1) {
 return -1;
@@ -655,62 +724,9 @@ int net_init_tap(const NetClientOptions *opts, const char 
*name,
 model = tap;
 }
 
-s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
-if (!s) {
-close(fd);
-return -1;
-}
-
-if (tap_set_sndbuf(s-fd, tap)  0) {
-return -1;
-}
-
-if (tap-has_fd) {
-snprintf(s-nc.info_str, sizeof(s-nc.info_str), fd=%d, fd);
-} else if (tap-has_helper) {
-snprintf(s-nc.info_str, sizeof(s-nc.info_str), helper=%s,
- tap-helper);
-} else {
-const char *downscript;
-
-downscript = tap-has_downscript ? tap-downscript :
-   DEFAULT_NETWORK_DOWN_SCRIPT;
-
-snprintf(s-nc.info_str, sizeof(s-nc.info_str),
- ifname=%s,script=%s,downscript=%s, ifname, script,
- downscript);
-
-if (strcmp(downscript, no) != 0) {
-snprintf(s-down_script, sizeof(s-down_script), %s, downscript);
-snprintf(s-down_script_arg, sizeof(s-down_script_arg), %s, 
ifname);
-}
-}
-
-if (tap-has_vhost ? tap-vhost :
-tap-has_vhostfd || (tap-has_vhostforce  tap-vhostforce)) {
-int vhostfd;
-
-if (tap-has_vhostfd) {
-vhostfd = monitor_handle_fd_param(cur_mon, tap-vhostfd);
-if (vhostfd == -1) {
-return -1;
-

[PATCH V4 RESEND 12/22] tap: add Linux multiqueue support

2013-01-31 Thread Jason Wang

This patch add basic multiqueue support for Linux. When multiqueue is needed, we
will first check whether kernel support multiqueue tap before creating more
queues. Two new functions tap_fd_enable() and tap_fd_disable() were introduced
to enable and disable a specific queue. Since the multiqueue is only supported
in Linux, return error on other platforms.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 net/tap-aix.c |   10 ++
 net/tap-bsd.c |   10 ++
 net/tap-haiku.c   |   10 ++
 net/tap-linux.c   |   51 +++
 net/tap-solaris.c |   10 ++
 net/tap_int.h |2 ++
 6 files changed, 93 insertions(+), 0 deletions(-)

diff --git a/net/tap-aix.c b/net/tap-aix.c
index aff6c52..66e0574 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -59,3 +59,13 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
 int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_enable(int fd)
+{
+return -1;
+}
+
+int tap_fd_disable(int fd)
+{
+return -1;
+}
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 01c705b..5ed2d16 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -145,3 +145,13 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
 int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_enable(int fd)
+{
+return -1;
+}
+
+int tap_fd_disable(int fd)
+{
+return -1;
+}
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index 08cc034..0f1b1fe 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -59,3 +59,13 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
 int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_enable(int fd)
+{
+return -1;
+}
+
+int tap_fd_disable(int fd)
+{
+return -1;
+}
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 0a6acc7..42376cc 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -41,6 +41,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, 
int vnet_hdr_required
 struct ifreq ifr;
 int fd, ret;
 int len = sizeof(struct virtio_net_hdr);
+int mq_required = 0;
 
 TFR(fd = open(PATH_NET_TUN, O_RDWR));
 if (fd  0) {
@@ -76,6 +77,20 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, 
int vnet_hdr_required
 ioctl(fd, TUNSETVNETHDRSZ, len);
 }
 
+if (mq_required) {
+unsigned int features;
+
+if ((ioctl(fd, TUNGETFEATURES, features) != 0) ||
+!(features  IFF_MULTI_QUEUE)) {
+error_report(multiqueue required, but no kernel 
+ support for IFF_MULTI_QUEUE available);
+close(fd);
+return -1;
+} else {
+ifr.ifr_flags |= IFF_MULTI_QUEUE;
+}
+}
+
 if (ifname[0] != '\0')
 pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
 else
@@ -209,3 +224,39 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
 }
 }
 }
+
+/* Enable a specific queue of tap. */
+int tap_fd_enable(int fd)
+{
+struct ifreq ifr;
+int ret;
+
+memset(ifr, 0, sizeof(ifr));
+
+ifr.ifr_flags = IFF_ATTACH_QUEUE;
+ret = ioctl(fd, TUNSETQUEUE, (void *) ifr);
+
+if (ret != 0) {
+error_report(could not enable queue);
+}
+
+return ret;
+}
+
+/* Disable a specific queue of tap/ */
+int tap_fd_disable(int fd)
+{
+struct ifreq ifr;
+int ret;
+
+memset(ifr, 0, sizeof(ifr));
+
+ifr.ifr_flags = IFF_DETACH_QUEUE;
+ret = ioctl(fd, TUNSETQUEUE, (void *) ifr);
+
+if (ret != 0) {
+error_report(could not disable queue);
+}
+
+return ret;
+}
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 486a7ea..cc08e9e 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -225,3 +225,13 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
 int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_enable(int fd)
+{
+return -1;
+}
+
+int tap_fd_disable(int fd)
+{
+return -1;
+}
diff --git a/net/tap_int.h b/net/tap_int.h
index 1dffe12..ca1c21b 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -42,5 +42,7 @@ int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int 
ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
+int tap_fd_enable(int fd);
+int tap_fd_disable(int fd);
 
 #endif /* QEMU_TAP_H */
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 13/22] tap: support enabling or disabling a queue

2013-01-31 Thread Jason Wang

This patch introduce a new bit - enabled in TAPState which tracks whether a
specific queue/fd is enabled. The tap/fd is enabled during initialization and
could be enabled/disabled by tap_enalbe() and tap_disable() which calls platform
specific helpers to do the real work. Polling of a tap fd can only done when
the tap was enabled.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 include/net/tap.h |2 ++
 net/tap-win32.c   |   10 ++
 net/tap.c |   43 ---
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/include/net/tap.h b/include/net/tap.h
index 883cebf..a994f20 100644
--- a/include/net/tap.h
+++ b/include/net/tap.h
@@ -35,6 +35,8 @@ int tap_has_vnet_hdr_len(NetClientState *nc, int len);
 void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr);
 void tap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int 
ecn, int ufo);
 void tap_set_vnet_hdr_len(NetClientState *nc, int len);
+int tap_enable(NetClientState *nc);
+int tap_disable(NetClientState *nc);
 
 int tap_get_fd(NetClientState *nc);
 
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 601437e..d0b89f6 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -764,3 +764,13 @@ void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 {
 abort();
 }
+
+int tap_enable(NetClientState *nc)
+{
+return 0;
+}
+
+int tap_disable(NetClientState *nc)
+{
+abort();
+}
diff --git a/net/tap.c b/net/tap.c
index 23fb6e0..8610ba2 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -59,6 +59,7 @@ typedef struct TAPState {
 bool write_poll;
 bool using_vnet_hdr;
 bool has_ufo;
+bool enabled;
 VHostNetState *vhost_net;
 unsigned host_vnet_hdr_len;
 } TAPState;
@@ -72,9 +73,9 @@ static void tap_writable(void *opaque);
 static void tap_update_fd_handler(TAPState *s)
 {
 qemu_set_fd_handler2(s-fd,
- s-read_poll  ? tap_can_send : NULL,
- s-read_poll  ? tap_send : NULL,
- s-write_poll ? tap_writable : NULL,
+ s-read_poll  s-enabled ? tap_can_send : NULL,
+ s-read_poll  s-enabled ? tap_send : NULL,
+ s-write_poll  s-enabled ? tap_writable : NULL,
  s);
 }
 
@@ -337,6 +338,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
 s-host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
 s-using_vnet_hdr = false;
 s-has_ufo = tap_probe_has_ufo(s-fd);
+s-enabled = true;
 tap_set_offload(s-nc, 0, 0, 0, 0, 0);
 /*
  * Make sure host header length is set correctly in tap:
@@ -735,3 +737,38 @@ VHostNetState *tap_get_vhost_net(NetClientState *nc)
 assert(nc-info-type == NET_CLIENT_OPTIONS_KIND_TAP);
 return s-vhost_net;
 }
+
+int tap_enable(NetClientState *nc)
+{
+TAPState *s = DO_UPCAST(TAPState, nc, nc);
+int ret;
+
+if (s-enabled) {
+return 0;
+} else {
+ret = tap_fd_enable(s-fd);
+if (ret == 0) {
+s-enabled = true;
+tap_update_fd_handler(s);
+}
+return ret;
+}
+}
+
+int tap_disable(NetClientState *nc)
+{
+TAPState *s = DO_UPCAST(TAPState, nc, nc);
+int ret;
+
+if (s-enabled == 0) {
+return 0;
+} else {
+ret = tap_fd_disable(s-fd);
+if (ret == 0) {
+qemu_purge_queued_packets(nc);
+s-enabled = false;
+tap_update_fd_handler(s);
+}
+return ret;
+}
+}
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 14/22] tap: introduce a helper to get the name of an interface

2013-01-31 Thread Jason Wang

This patch introduces a helper tap_get_ifname() to get the device name of tap
device. This is needed when ifname is unspecified in the command line and qemu
were asked to create tap device by itself. In this situation, the name were
allocated by kernel, so if multiqueue is asked, we need to fetch its name after
creating the first queue.

Only linux has this support since it's the only platform that supports
multiqueue tap.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 include/net/tap.h |1 +
 net/tap-aix.c |5 +
 net/tap-bsd.c |5 +
 net/tap-haiku.c   |5 +
 net/tap-linux.c   |   14 ++
 net/tap-solaris.c |5 +
 net/tap_int.h |1 +
 7 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/include/net/tap.h b/include/net/tap.h
index a994f20..c3eb85a 100644
--- a/include/net/tap.h
+++ b/include/net/tap.h
@@ -37,6 +37,7 @@ void tap_set_offload(NetClientState *nc, int csum, int tso4, 
int tso6, int ecn,
 void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 int tap_enable(NetClientState *nc);
 int tap_disable(NetClientState *nc);
+int tap_get_ifname(NetClientState *nc, char *ifname);
 
 int tap_get_fd(NetClientState *nc);
 
diff --git a/net/tap-aix.c b/net/tap-aix.c
index 66e0574..0e1eac3 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -69,3 +69,8 @@ int tap_fd_disable(int fd)
 {
 return -1;
 }
+
+int tap_fd_get_ifname(int fd, char *ifname)
+{
+return -1;
+}
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 5ed2d16..4f22109 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -155,3 +155,8 @@ int tap_fd_disable(int fd)
 {
 return -1;
 }
+
+int tap_fd_get_ifname(int fd, char *ifname)
+{
+return -1;
+}
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index 0f1b1fe..b3b5fbb 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -69,3 +69,8 @@ int tap_fd_disable(int fd)
 {
 return -1;
 }
+
+int tap_fd_get_ifname(int fd, char *ifname)
+{
+return -1;
+}
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 42376cc..3b21662 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -260,3 +260,17 @@ int tap_fd_disable(int fd)
 
 return ret;
 }
+
+int tap_fd_get_ifname(int fd, char *ifname)
+{
+struct ifreq ifr;
+
+if (ioctl(fd, TUNGETIFF, ifr) != 0) {
+error_report(TUNGETIFF ioctl() failed: %s,
+ strerror(errno));
+return -1;
+}
+
+pstrcpy(ifname, sizeof(ifr.ifr_name), ifr.ifr_name);
+return 0;
+}
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index cc08e9e..214d95e 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -235,3 +235,8 @@ int tap_fd_disable(int fd)
 {
 return -1;
 }
+
+int tap_fd_get_ifname(int fd, char *ifname)
+{
+return -1;
+}
diff --git a/net/tap_int.h b/net/tap_int.h
index ca1c21b..125f83d 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -44,5 +44,6 @@ void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, 
int ecn, int ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
 int tap_fd_enable(int fd);
 int tap_fd_disable(int fd);
+int tap_fd_get_ifname(int fd, char *ifname);
 
 #endif /* QEMU_TAP_H */
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 15/22] tap: multiqueue support

2013-01-31 Thread Jason Wang

Recently, linux support multiqueue tap which could let userspace call TUNSETIFF
for a signle device many times to create multiple file descriptors as
independent queues. User could also enable/disabe a specific queue through
TUNSETQUEUE.

The patch adds the generic infrastructure to create multiqueue taps. To achieve
this a new parameter queues were introduced to specify how many queues were
expected to be created for tap by qemu itself. Alternatively, management could
also pass multiple pre-created tap file descriptors separated with ':' through a
new parameter fds like -netdev tap,id=hn0,fds=X:Y:..:Z. Multiple vhost file
descriptors could also be passed in this way.

Each TAPState were still associated to a tap fd, which mean multiple TAPStates
were created when user needs multiqueue taps. Since each TAPState contains one
NetClientState, with the multiqueue nic support, an N peers of NetClientState
were built up.

A new parameter, mq_required were introduce in tap_open() to create multiqueue
tap fds.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 include/net/tap.h |1 -
 net/tap-aix.c |3 +-
 net/tap-bsd.c |3 +-
 net/tap-haiku.c   |3 +-
 net/tap-linux.c   |4 +-
 net/tap-solaris.c |3 +-
 net/tap.c |  158 +
 net/tap_int.h |3 +-
 qapi-schema.json  |5 +-
 9 files changed, 139 insertions(+), 44 deletions(-)

diff --git a/include/net/tap.h b/include/net/tap.h
index c3eb85a..a994f20 100644
--- a/include/net/tap.h
+++ b/include/net/tap.h
@@ -37,7 +37,6 @@ void tap_set_offload(NetClientState *nc, int csum, int tso4, 
int tso6, int ecn,
 void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 int tap_enable(NetClientState *nc);
 int tap_disable(NetClientState *nc);
-int tap_get_ifname(NetClientState *nc, char *ifname);
 
 int tap_get_fd(NetClientState *nc);
 
diff --git a/net/tap-aix.c b/net/tap-aix.c
index 0e1eac3..3953b60 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -25,7 +25,8 @@
 #include tap_int.h
 #include stdio.h
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+ int vnet_hdr_required, int mq_required)
 {
 fprintf(stderr, no tap on AIX\n);
 return -1;
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 4f22109..bcdb268 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -33,7 +33,8 @@
 #include net/if_tap.h
 #endif
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+ int vnet_hdr_required, int mq_required)
 {
 int fd;
 #ifdef TAPGIFNAME
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index b3b5fbb..e5ce436 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -25,7 +25,8 @@
 #include tap_int.h
 #include stdio.h
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+ int vnet_hdr_required, int mq_required)
 {
 fprintf(stderr, no tap on Haiku\n);
 return -1;
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 3b21662..a953189 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -36,12 +36,12 @@
 
 #define PATH_NET_TUN /dev/net/tun
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+ int vnet_hdr_required, int mq_required)
 {
 struct ifreq ifr;
 int fd, ret;
 int len = sizeof(struct virtio_net_hdr);
-int mq_required = 0;
 
 TFR(fd = open(PATH_NET_TUN, O_RDWR));
 if (fd  0) {
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 214d95e..9c7278f 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -173,7 +173,8 @@ static int tap_alloc(char *dev, size_t dev_size)
 return tap_fd;
 }
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int 
vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+ int vnet_hdr_required, int mq_required)
 {
 char  dev[10]=;
 int fd;
diff --git a/net/tap.c b/net/tap.c
index 8610ba2..1bf7609 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -558,17 +558,10 @@ int net_init_bridge(const NetClientOptions *opts, const 
char *name,
 
 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
 const char *setup_script, char *ifname,
-size_t ifname_sz)
+size_t ifname_sz, int mq_required)
 {
 int fd, vnet_hdr_required;
 
-if (tap-has_ifname) {
-pstrcpy(ifname, ifname_sz, tap-ifname);
-} else {
-assert(ifname_sz  0);
-ifname[0] = '\0';
-}
-
 if (tap-has_vnet_hdr) {
 *vnet_hdr = tap-vnet_hdr;
 vnet_hdr_required = *vnet_hdr;
@@ -577,7 +570,8 @@ static int net_tap_init(const

[PATCH V4 RESEND 18/22] virtio: add a queue_index to VirtQueue

2013-01-31 Thread Jason Wang

Add a queue_index to VirtQueue and a helper to fetch it, this could be used by
multiqueue supported device.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio.c |8 
 hw/virtio.h |1 +
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index d8c77b0..e259348 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -73,6 +73,8 @@ struct VirtQueue
 /* Notification enabled? */
 bool notification;
 
+uint16_t queue_index;
+
 int inuse;
 
 uint16_t vector;
@@ -931,6 +933,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
 for (i = 0; i  VIRTIO_PCI_QUEUE_MAX; i++) {
 vdev-vq[i].vector = VIRTIO_NO_VECTOR;
 vdev-vq[i].vdev = vdev;
+vdev-vq[i].queue_index = i;
 }
 
 vdev-name = name;
@@ -1018,6 +1021,11 @@ VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
 return vdev-vq + n;
 }
 
+uint16_t virtio_get_queue_index(VirtQueue *vq)
+{
+return vq-queue_index;
+}
+
 static void virtio_queue_guest_notifier_read(EventNotifier *n)
 {
 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
diff --git a/hw/virtio.h b/hw/virtio.h
index d3da1d2..a29a54d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -280,6 +280,7 @@ hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int 
n);
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
+uint16_t virtio_get_queue_index(VirtQueue *vq);
 int virtio_queue_get_id(VirtQueue *vq);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 19/22] virtio-net: separate virtqueue from VirtIONet

2013-01-31 Thread Jason Wang

To support multiqueue virtio-net, the first step is to separate the virtqueue
related fields from VirtIONet to a new structure VirtIONetQueue. The following
patches will add an array of VirtIONetQueue to VirtIONet based on this patch.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio-net.c |  195 ---
 1 files changed, 114 insertions(+), 81 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index f4146aa..4b285c1 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -26,28 +26,33 @@
 #define MAC_TABLE_ENTRIES64
 #define MAX_VLAN(1  12)   /* Per 802.1Q definition */
 
+typedef struct VirtIONetQueue {
+VirtQueue *rx_vq;
+VirtQueue *tx_vq;
+QEMUTimer *tx_timer;
+QEMUBH *tx_bh;
+int tx_waiting;
+struct {
+VirtQueueElement elem;
+ssize_t len;
+} async_tx;
+struct VirtIONet *n;
+} VirtIONetQueue;
+
 typedef struct VirtIONet
 {
 VirtIODevice vdev;
 uint8_t mac[ETH_ALEN];
 uint16_t status;
-VirtQueue *rx_vq;
-VirtQueue *tx_vq;
+VirtIONetQueue vq;
 VirtQueue *ctrl_vq;
 NICState *nic;
-QEMUTimer *tx_timer;
-QEMUBH *tx_bh;
 uint32_t tx_timeout;
 int32_t tx_burst;
-int tx_waiting;
 uint32_t has_vnet_hdr;
 size_t host_hdr_len;
 size_t guest_hdr_len;
 uint8_t has_ufo;
-struct {
-VirtQueueElement elem;
-ssize_t len;
-} async_tx;
 int mergeable_rx_bufs;
 uint8_t promisc;
 uint8_t allmulti;
@@ -67,6 +72,12 @@ typedef struct VirtIONet
 DeviceState *qdev;
 } VirtIONet;
 
+static VirtIONetQueue *virtio_net_get_queue(NetClientState *nc)
+{
+VirtIONet *n = qemu_get_nic_opaque(nc);
+
+return n-vq;
+}
 /* TODO
  * - we could suppress RX interrupt if we were so inclined.
  */
@@ -135,6 +146,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
status)
 error_report(unable to start vhost net: %d: 
  falling back on userspace virtio, -r);
 n-vhost_started = 0;
+} else {
+n-vhost_started = 1;
 }
 } else {
 vhost_net_stop(n-vdev, nc, 1);
@@ -145,25 +158,26 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
status)
 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 {
 VirtIONet *n = to_virtio_net(vdev);
+VirtIONetQueue *q = n-vq;
 
 virtio_net_vhost_status(n, status);
 
-if (!n-tx_waiting) {
+if (!q-tx_waiting) {
 return;
 }
 
 if (virtio_net_started(n, status)  !n-vhost_started) {
-if (n-tx_timer) {
-qemu_mod_timer(n-tx_timer,
+if (q-tx_timer) {
+qemu_mod_timer(q-tx_timer,
qemu_get_clock_ns(vm_clock) + n-tx_timeout);
 } else {
-qemu_bh_schedule(n-tx_bh);
+qemu_bh_schedule(q-tx_bh);
 }
 } else {
-if (n-tx_timer) {
-qemu_del_timer(n-tx_timer);
+if (q-tx_timer) {
+qemu_del_timer(q-tx_timer);
 } else {
-qemu_bh_cancel(n-tx_bh);
+qemu_bh_cancel(q-tx_bh);
 }
 }
 }
@@ -507,35 +521,40 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, 
VirtQueue *vq)
 static int virtio_net_can_receive(NetClientState *nc)
 {
 VirtIONet *n = qemu_get_nic_opaque(nc);
+VirtIONetQueue *q = virtio_net_get_queue(nc);
+
 if (!n-vdev.vm_running) {
 return 0;
 }
 
-if (!virtio_queue_ready(n-rx_vq) ||
-!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK))
+if (!virtio_queue_ready(q-rx_vq) ||
+!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK)) {
 return 0;
+}
 
 return 1;
 }
 
-static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
+static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
 {
-if (virtio_queue_empty(n-rx_vq) ||
+VirtIONet *n = q-n;
+if (virtio_queue_empty(q-rx_vq) ||
 (n-mergeable_rx_bufs 
- !virtqueue_avail_bytes(n-rx_vq, bufsize, 0))) {
-virtio_queue_set_notification(n-rx_vq, 1);
+ !virtqueue_avail_bytes(q-rx_vq, bufsize, 0))) {
+virtio_queue_set_notification(q-rx_vq, 1);
 
 /* To avoid a race condition where the guest has made some buffers
  * available after the above check but before notification was
  * enabled, check for available buffers again.
  */
-if (virtio_queue_empty(n-rx_vq) ||
+if (virtio_queue_empty(q-rx_vq) ||
 (n-mergeable_rx_bufs 
- !virtqueue_avail_bytes(n-rx_vq, bufsize, 0)))
+ !virtqueue_avail_bytes(q-rx_vq, bufsize, 0))) {
 return 0;
+}
 }
 
-virtio_queue_set_notification(n-rx_vq, 0);
+virtio_queue_set_notification(q-rx_vq, 0);
 return 1;
 }
 
@@ -638,6 +657,7 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, 
int size)
 static

[PATCH V4 RESEND 20/22] virtio-net: multiqueue support

2013-01-31 Thread Jason Wang

This patch implements both userspace and vhost support for multiple queue
virtio-net (VIRTIO_NET_F_MQ). This is done by introducing an array of
VirtIONetQueue to VirtIONet.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio-net.c |  301 +++
 hw/virtio-net.h |   27 +-
 2 files changed, 261 insertions(+), 67 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 4b285c1..2067fa7 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -44,7 +44,7 @@ typedef struct VirtIONet
 VirtIODevice vdev;
 uint8_t mac[ETH_ALEN];
 uint16_t status;
-VirtIONetQueue vq;
+VirtIONetQueue vqs[MAX_QUEUE_NUM];
 VirtQueue *ctrl_vq;
 NICState *nic;
 uint32_t tx_timeout;
@@ -70,14 +70,23 @@ typedef struct VirtIONet
 } mac_table;
 uint32_t *vlans;
 DeviceState *qdev;
+int multiqueue;
+uint16_t max_queues;
+uint16_t curr_queues;
 } VirtIONet;
 
-static VirtIONetQueue *virtio_net_get_queue(NetClientState *nc)
+static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 {
 VirtIONet *n = qemu_get_nic_opaque(nc);
 
-return n-vq;
+return n-vqs[nc-queue_index];
 }
+
+static int vq2q(int queue_index)
+{
+return queue_index / 2;
+}
+
 /* TODO
  * - we could suppress RX interrupt if we were so inclined.
  */
@@ -93,6 +102,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, 
uint8_t *config)
 struct virtio_net_config netcfg;
 
 stw_p(netcfg.status, n-status);
+stw_p(netcfg.max_virtqueue_pairs, n-max_queues);
 memcpy(netcfg.mac, n-mac, ETH_ALEN);
 memcpy(config, netcfg, sizeof(netcfg));
 }
@@ -120,6 +130,7 @@ static bool virtio_net_started(VirtIONet *n, uint8_t status)
 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 {
 NetClientState *nc = qemu_get_queue(n-nic);
+int queues = n-multiqueue ? n-max_queues : 1;
 
 if (!nc-peer) {
 return;
@@ -131,6 +142,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
status)
 if (!tap_get_vhost_net(nc-peer)) {
 return;
 }
+
 if (!!n-vhost_started == virtio_net_started(n, status) 
   !nc-peer-link_down) {
 return;
@@ -141,16 +153,14 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
status)
 return;
 }
 n-vhost_started = 1;
-r = vhost_net_start(n-vdev, nc, 1);
+r = vhost_net_start(n-vdev, n-nic-ncs, queues);
 if (r  0) {
 error_report(unable to start vhost net: %d: 
  falling back on userspace virtio, -r);
 n-vhost_started = 0;
-} else {
-n-vhost_started = 1;
 }
 } else {
-vhost_net_stop(n-vdev, nc, 1);
+vhost_net_stop(n-vdev, n-nic-ncs, queues);
 n-vhost_started = 0;
 }
 }
@@ -158,26 +168,38 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
status)
 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 {
 VirtIONet *n = to_virtio_net(vdev);
-VirtIONetQueue *q = n-vq;
+VirtIONetQueue *q;
+int i;
+uint8_t queue_status;
 
 virtio_net_vhost_status(n, status);
 
-if (!q-tx_waiting) {
-return;
-}
+for (i = 0; i  n-max_queues; i++) {
+q = n-vqs[i];
 
-if (virtio_net_started(n, status)  !n-vhost_started) {
-if (q-tx_timer) {
-qemu_mod_timer(q-tx_timer,
-   qemu_get_clock_ns(vm_clock) + n-tx_timeout);
+if ((!n-multiqueue  i != 0) || i = n-curr_queues) {
+queue_status = 0;
 } else {
-qemu_bh_schedule(q-tx_bh);
+queue_status = status;
 }
-} else {
-if (q-tx_timer) {
-qemu_del_timer(q-tx_timer);
+
+if (!q-tx_waiting) {
+continue;
+}
+
+if (virtio_net_started(n, queue_status)  !n-vhost_started) {
+if (q-tx_timer) {
+qemu_mod_timer(q-tx_timer,
+   qemu_get_clock_ns(vm_clock) + n-tx_timeout);
+} else {
+qemu_bh_schedule(q-tx_bh);
+}
 } else {
-qemu_bh_cancel(q-tx_bh);
+if (q-tx_timer) {
+qemu_del_timer(q-tx_timer);
+} else {
+qemu_bh_cancel(q-tx_bh);
+}
 }
 }
 }
@@ -209,6 +231,8 @@ static void virtio_net_reset(VirtIODevice *vdev)
 n-nomulti = 0;
 n-nouni = 0;
 n-nobcast = 0;
+/* multiqueue is disabled by default */
+n-curr_queues = 1;
 
 /* Flush any MAC and VLAN filter table state */
 n-mac_table.in_use = 0;
@@ -251,18 +275,70 @@ static int peer_has_ufo(VirtIONet *n)
 
 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
 {
+int i;
+NetClientState *nc;
+
 n-mergeable_rx_bufs = mergeable_rx_bufs;

[PATCH V4 RESEND 21/22] virtio-net: migration support for multiqueue

2013-01-31 Thread Jason Wang

This patch add migration support for multiqueue virtio-net. Instead of bumping
the version, we conditionally send the info of multiqueue only when the device
support more than one queue to maintain the backward compatibility.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio-net.c |   35 +--
 1 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 2067fa7..5699f5e 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -1093,8 +1093,8 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int 
multiqueue, int ctrl)
 
 static void virtio_net_save(QEMUFile *f, void *opaque)
 {
+int i;
 VirtIONet *n = opaque;
-VirtIONetQueue *q = n-vqs[0];
 
 /* At this point, backend must be stopped, otherwise
  * it might keep writing to memory. */
@@ -1102,7 +1102,7 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
 virtio_save(n-vdev, f);
 
 qemu_put_buffer(f, n-mac, ETH_ALEN);
-qemu_put_be32(f, q-tx_waiting);
+qemu_put_be32(f, n-vqs[0].tx_waiting);
 qemu_put_be32(f, n-mergeable_rx_bufs);
 qemu_put_be16(f, n-status);
 qemu_put_byte(f, n-promisc);
@@ -1118,13 +1118,19 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
 qemu_put_byte(f, n-nouni);
 qemu_put_byte(f, n-nobcast);
 qemu_put_byte(f, n-has_ufo);
+if (n-max_queues  1) {
+qemu_put_be16(f, n-max_queues);
+qemu_put_be16(f, n-curr_queues);
+for (i = 1; i  n-curr_queues; i++) {
+qemu_put_be32(f, n-vqs[i].tx_waiting);
+}
+}
 }
 
 static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
 {
 VirtIONet *n = opaque;
-VirtIONetQueue *q = n-vqs[0];
-int ret, i;
+int ret, i, link_down;
 
 if (version_id  2 || version_id  VIRTIO_NET_VM_VERSION)
 return -EINVAL;
@@ -1135,7 +1141,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int 
version_id)
 }
 
 qemu_get_buffer(f, n-mac, ETH_ALEN);
-q-tx_waiting = qemu_get_be32(f);
+n-vqs[0].tx_waiting = qemu_get_be32(f);
 
 virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
 
@@ -1205,6 +1211,20 @@ static int virtio_net_load(QEMUFile *f, void *opaque, 
int version_id)
 }
 }
 
+if (n-max_queues  1) {
+if (n-max_queues != qemu_get_be16(f)) {
+error_report(virtio-net: different max_queues );
+return -1;
+}
+
+n-curr_queues = qemu_get_be16(f);
+for (i = 1; i  n-curr_queues; i++) {
+n-vqs[i].tx_waiting = qemu_get_be32(f);
+}
+}
+
+virtio_net_set_queues(n);
+
 /* Find the first multicast entry in the saved MAC filter */
 for (i = 0; i  n-mac_table.in_use; i++) {
 if (n-mac_table.macs[i * ETH_ALEN]  1) {
@@ -1215,7 +1235,10 @@ static int virtio_net_load(QEMUFile *f, void *opaque, 
int version_id)
 
 /* nc.link_down can't be migrated, so infer link_down according
  * to link status bit in n-status */
-qemu_get_queue(n-nic)-link_down = (n-status  VIRTIO_NET_S_LINK_UP) == 
0;
+link_down = (n-status  VIRTIO_NET_S_LINK_UP) == 0;
+for (i = 0; i  n-max_queues; i++) {
+qemu_get_subqueue(n-nic, i)-link_down = link_down;
+}
 
 return 0;
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH V4 RESEND 22/22] virtio-net: compat multiqueue support

2013-01-31 Thread Jason Wang

Disable multiqueue support for pre 1.4.

Signed-off-by: Jason Wang jasow...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/pc_piix.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index ba09714..0af436c 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -313,6 +313,10 @@ static QEMUMachine pc_i440fx_machine_v1_4 = {
 .driver   = virtio-net-pci,\
 .property = ctrl_mac_addr,\
 .value= off,  \
+},{ \
+.driver   = virtio-net-pci, \
+.property = mq, \
+.value= off, \
 }
 
 static QEMUMachine pc_machine_v1_3 = {
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

1 2 >

1 - 100 of 130 matches

Mail list logo