from:"Blue Swirl"

Re: [PATCH v3 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER

2013-02-03 Thread Blue Swirl

On Sun, Feb 3, 2013 at 2:10 PM, Pandarathil, Vijaymohan R
vijaymohan.pandarat...@hp.com wrote:
 - New VFIO_SET_IRQ ioctl option to pass the eventfd that is signaled 
 when
   an error occurs in the vfio_pci_device

 - Register pci_error_handler for the vfio_pci driver

 - When the device encounters an error, the error handler registered by
   the vfio_pci driver gets invoked by the AER infrastructure

 - In the error handler, signal the eventfd registered for the device.

 - This results in the qemu eventfd handler getting invoked and
   appropriate action taken for the guest.

 Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com
 ---
  drivers/vfio/pci/vfio_pci.c | 43 
 -
  drivers/vfio/pci/vfio_pci_intrs.c   | 30 ++
  drivers/vfio/pci/vfio_pci_private.h |  1 +
  include/uapi/linux/vfio.h   |  1 +
  4 files changed, 74 insertions(+), 1 deletion(-)

 diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
 index b28e66c..818b1ed 100644
 --- a/drivers/vfio/pci/vfio_pci.c
 +++ b/drivers/vfio/pci/vfio_pci.c
 @@ -196,7 +196,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
 *vdev, int irq_type)

 return (flags  PCI_MSIX_FLAGS_QSIZE) + 1;
 }
 -   }
 +   } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
 +   if (pci_is_pcie(vdev-pdev))
 +   return 1;

 return 0;
  }
 @@ -302,6 +304,16 @@ static long vfio_pci_ioctl(void *device_data,
 if (info.argsz  minsz || info.index = VFIO_PCI_NUM_IRQS)
 return -EINVAL;

 +   switch (info.index) {
 +   case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
 +   break;
 +   case VFIO_PCI_ERR_IRQ_INDEX:
 +   if (pci_is_pcie(vdev-pdev))
 +   break;

I don't know what is the policy in Linux kernel for this, but I'd add
a comment about fall through here.

 +   default:
 +   return -EINVAL;
 +   }
 +
 info.flags = VFIO_IRQ_INFO_EVENTFD;

 info.count = vfio_pci_get_irq_count(vdev, info.index);
 @@ -538,11 +550,40 @@ static void vfio_pci_remove(struct pci_dev *pdev)
 kfree(vdev);
  }

 +static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
 + pci_channel_state_t state)
 +{
 +   struct vfio_pci_device *vpdev;
 +   void *vdev;
 +
 +   vdev = vfio_device_get_from_dev(pdev-dev);
 +   if (vdev == NULL)
 +   return PCI_ERS_RESULT_DISCONNECT;
 +
 +   vpdev = vfio_device_data(vdev);
 +   if (vpdev == NULL) {
 +   vfio_device_put(vdev);
 +   return PCI_ERS_RESULT_DISCONNECT;
 +   }
 +
 +   if (vpdev-err_trigger)
 +   eventfd_signal(vpdev-err_trigger, 1);
 +
 +   vfio_device_put(vdev);
 +
 +   return PCI_ERS_RESULT_CAN_RECOVER;
 +}
 +
 +static struct pci_error_handlers vfio_err_handlers = {
 +   .error_detected = vfio_pci_aer_err_detected,
 +};
 +
  static struct pci_driver vfio_pci_driver = {
 .name   = vfio-pci,
 .id_table   = NULL, /* only dynamic ids */
 .probe  = vfio_pci_probe,
 .remove = vfio_pci_remove,
 +   .err_handler= vfio_err_handlers,
  };

  static void __exit vfio_pci_cleanup(void)
 diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
 b/drivers/vfio/pci/vfio_pci_intrs.c
 index 3639371..83035b1 100644
 --- a/drivers/vfio/pci/vfio_pci_intrs.c
 +++ b/drivers/vfio/pci/vfio_pci_intrs.c
 @@ -745,6 +745,29 @@ static int vfio_pci_set_msi_trigger(struct 
 vfio_pci_device *vdev,
 return 0;
  }

 +static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
 +   unsigned index, unsigned start,
 +   unsigned count, uint32_t flags, void 
 *data)
 +{
 +   int32_t fd = *(int32_t *)data;
 +
 +   if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
 +   !(flags  VFIO_IRQ_SET_DATA_EVENTFD))
 +   return -EINVAL;
 +
 +   if (fd == -1) {
 +   if (vdev-err_trigger)
 +   eventfd_ctx_put(vdev-err_trigger);
 +   vdev-err_trigger = NULL;
 +   return 0;
 +   } else if (fd = 0) {
 +   vdev-err_trigger = eventfd_ctx_fdget(fd);
 +   if (IS_ERR(vdev-err_trigger))
 +   return PTR_ERR(vdev-err_trigger);
 +   return 0;
 +   } else
 +   return -EINVAL;
 +}
  int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
 unsigned index, unsigned start, unsigned count,
 void *data)
 @@ -779,6 +802,13 @@ int

Re: [PATCH v3 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-02-03 Thread Blue Swirl

On Sun, Feb 3, 2013 at 2:10 PM, Pandarathil, Vijaymohan R
vijaymohan.pandarat...@hp.com wrote:
 - Create eventfd per vfio device assigned to a guest and register an
   event handler

 - This fd is passed to the vfio_pci driver through the SET_IRQ ioctl

 - When the device encounters an error, the eventfd is signalled
   and the qemu eventfd handler gets invoked.

 - In the handler decide what action to take. Current action taken
   is to terminate the guest.

Usually this is not OK, but I guess this is not guest triggerable.


 Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com
 ---
  hw/vfio_pci.c  | 105 
 +
  linux-headers/linux/vfio.h |   1 +
  2 files changed, 106 insertions(+)

 diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
 index c51ae67..4e2f768 100644
 --- a/hw/vfio_pci.c
 +++ b/hw/vfio_pci.c
 @@ -130,6 +130,8 @@ typedef struct VFIODevice {
  QLIST_ENTRY(VFIODevice) next;
  struct VFIOGroup *group;
  bool reset_works;
 +EventNotifier err_notifier;
 +bool pci_aer;
  } VFIODevice;

  typedef struct VFIOGroup {
 @@ -1922,6 +1924,106 @@ static void vfio_put_device(VFIODevice *vdev)
  }
  }

 +static void vfio_err_notifier_handler(void *opaque)
 +{
 +VFIODevice *vdev = opaque;
 +
 +if (!event_notifier_test_and_clear(vdev-err_notifier)) {
 +return;
 +}
 +
 +/*
 + * TBD. Retrieve the error details and decide what action
 + * needs to be taken. One of the actions could be to pass
 + * the error to the guest and have the guest driver recover
 + * from the error. This requires that PCIe capabilities be
 + * exposed to the guest. At present, we just terminate the
 + * guest to contain the error.
 + */
 +
 +error_report(%s (%04x:%02x:%02x.%x)
 +Unrecoverable error detected... Terminating guest\n,
 +__func__, vdev-host.domain, vdev-host.bus,
 +vdev-host.slot, vdev-host.function);
 +
 +hw_error((%04x:%02x:%02x.%x) Unrecoverable device error\n,
 +vdev-host.domain, vdev-host.bus,
 +vdev-host.slot, vdev-host.function);
 +
 +return;

Useless, please remove.

 +}
 +
 +static void vfio_register_err_notifier(VFIODevice *vdev)
 +{
 +int ret;
 +int argsz;
 +struct vfio_irq_set *irq_set;
 +int32_t *pfd;
 +
 +if (event_notifier_init(vdev-err_notifier, 0)) {
 +error_report(vfio: Warning: Unable to init event notifier for error 
 detection\n);
 +return;
 +}
 +
 +argsz = sizeof(*irq_set) + sizeof(*pfd);
 +
 +irq_set = g_malloc0(argsz);
 +irq_set-argsz = argsz;
 +irq_set-flags = VFIO_IRQ_SET_DATA_EVENTFD |
 + VFIO_IRQ_SET_ACTION_TRIGGER;
 +irq_set-index = VFIO_PCI_ERR_IRQ_INDEX;
 +irq_set-start = 0;
 +irq_set-count = 1;
 +pfd = (int32_t *)irq_set-data;
 +
 +*pfd = event_notifier_get_fd(vdev-err_notifier);
 +qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
 +
 +ret = ioctl(vdev-fd, VFIO_DEVICE_SET_IRQS, irq_set);
 +if (ret) {
 +DPRINTF(vfio: Error notification not supported for the device\n);
 +qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
 +event_notifier_cleanup(vdev-err_notifier);
 +g_free(irq_set);
 +return;
 +}
 +g_free(irq_set);
 +vdev-pci_aer = 1;
 +return;

Ditto.

 +}
 +static void vfio_unregister_err_notifier(VFIODevice *vdev)
 +{
 +int argsz;
 +struct vfio_irq_set *irq_set;
 +int32_t *pfd;
 +int ret;
 +
 +if (!vdev-pci_aer) {
 +return;
 +}
 +
 +argsz = sizeof(*irq_set) + sizeof(*pfd);
 +
 +irq_set = g_malloc0(argsz);
 +irq_set-argsz = argsz;
 +irq_set-flags = VFIO_IRQ_SET_DATA_EVENTFD |
 + VFIO_IRQ_SET_ACTION_TRIGGER;
 +irq_set-index = VFIO_PCI_ERR_IRQ_INDEX;
 +irq_set-start = 0;
 +irq_set-count = 1;
 +pfd = (int32_t *)irq_set-data;
 +*pfd = -1;
 +
 +ret = ioctl(vdev-fd, VFIO_DEVICE_SET_IRQS, irq_set);
 +if (ret) {
 +DPRINTF(vfio: Failed to de-assign error fd: %d\n, ret);
 +}
 +g_free(irq_set);
 +qemu_set_fd_handler(event_notifier_get_fd(vdev-err_notifier),
 +NULL, NULL, vdev);
 +event_notifier_cleanup(vdev-err_notifier);
 +return;

Ditto.

 +}
  static int vfio_initfn(PCIDevice *pdev)
  {
  VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
 @@ -2032,6 +2134,8 @@ static int vfio_initfn(PCIDevice *pdev)
  }
  }

 +vfio_register_err_notifier(vdev);
 +
  return 0;

  out_teardown:
 @@ -2049,6 +2153,7 @@ static void vfio_exitfn(PCIDevice *pdev)
  VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
  VFIOGroup *group = vdev-group;

 +vfio_unregister_err_notifier(vdev);
  pci_device_set_intx_routing_notifier(vdev-pdev, NULL);
  vfio_disable_interrupts(vdev);
  if (vdev-intx.mmap_timer) {

Re: [Qemu-devel] [PATCH V2 11/20] tap: support enabling or disabling a queue

2013-01-29 Thread Blue Swirl

On Tue, Jan 29, 2013 at 1:50 PM, Jason Wang jasow...@redhat.com wrote:
 On 01/26/2013 03:13 AM, Blue Swirl wrote:
 On Fri, Jan 25, 2013 at 10:35 AM, Jason Wang jasow...@redhat.com wrote:
 This patch introduce a new bit - enabled in TAPState which tracks whether a
 specific queue/fd is enabled. The tap/fd is enabled during initialization 
 and
 could be enabled/disabled by tap_enalbe() and tap_disable() which calls 
 platform
 specific helpers to do the real work. Polling of a tap fd can only done when
 the tap was enabled.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  include/net/tap.h |2 ++
  net/tap-win32.c   |   10 ++
  net/tap.c |   43 ---
  3 files changed, 52 insertions(+), 3 deletions(-)

 diff --git a/include/net/tap.h b/include/net/tap.h
 index bb7efb5..0caf8c4 100644
 --- a/include/net/tap.h
 +++ b/include/net/tap.h
 @@ -35,6 +35,8 @@ int tap_has_vnet_hdr_len(NetClientState *nc, int len);
  void tap_using_vnet_hdr(NetClientState *nc, int using_vnet_hdr);
  void tap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int 
 ecn, int ufo);
  void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 +int tap_enable(NetClientState *nc);
 +int tap_disable(NetClientState *nc);

  int tap_get_fd(NetClientState *nc);

 diff --git a/net/tap-win32.c b/net/tap-win32.c
 index 265369c..a2cd94b 100644
 --- a/net/tap-win32.c
 +++ b/net/tap-win32.c
 @@ -764,3 +764,13 @@ void tap_set_vnet_hdr_len(NetClientState *nc, int len)
  {
  assert(0);
  }
 +
 +int tap_enable(NetClientState *nc)
 +{
 +assert(0);
 abort()

 This is just to be consistent with the reset of the helpers in this file.

 +}
 +
 +int tap_disable(NetClientState *nc)
 +{
 +assert(0);
 +}
 diff --git a/net/tap.c b/net/tap.c
 index 67080f1..95e557b 100644
 --- a/net/tap.c
 +++ b/net/tap.c
 @@ -59,6 +59,7 @@ typedef struct TAPState {
  unsigned int write_poll : 1;
  unsigned int using_vnet_hdr : 1;
  unsigned int has_ufo: 1;
 +unsigned int enabled : 1;
 bool without bit field?

 Also to be consistent with other field. If you wish I can send patches
 to convert all those bit field to bool on top of this series.

That would be nice, likewise for the assert(0).


 Thanks
  VHostNetState *vhost_net;
  unsigned host_vnet_hdr_len;
  } TAPState;
 @@ -72,9 +73,9 @@ static void tap_writable(void *opaque);
  static void tap_update_fd_handler(TAPState *s)
  {
  qemu_set_fd_handler2(s-fd,
 - s-read_poll  ? tap_can_send : NULL,
 - s-read_poll  ? tap_send : NULL,
 - s-write_poll ? tap_writable : NULL,
 + s-read_poll  s-enabled ? tap_can_send : NULL,
 + s-read_poll  s-enabled ? tap_send : NULL,
 + s-write_poll  s-enabled ? tap_writable : NULL,
   s);
  }

 @@ -339,6 +340,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
  s-host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
  s-using_vnet_hdr = 0;
  s-has_ufo = tap_probe_has_ufo(s-fd);
 +s-enabled = 1;
  tap_set_offload(s-nc, 0, 0, 0, 0, 0);
  /*
   * Make sure host header length is set correctly in tap:
 @@ -737,3 +739,38 @@ VHostNetState *tap_get_vhost_net(NetClientState *nc)
  assert(nc-info-type == NET_CLIENT_OPTIONS_KIND_TAP);
  return s-vhost_net;
  }
 +
 +int tap_enable(NetClientState *nc)
 +{
 +TAPState *s = DO_UPCAST(TAPState, nc, nc);
 +int ret;
 +
 +if (s-enabled) {
 +return 0;
 +} else {
 +ret = tap_fd_enable(s-fd);
 +if (ret == 0) {
 +s-enabled = 1;
 +tap_update_fd_handler(s);
 +}
 +return ret;
 +}
 +}
 +
 +int tap_disable(NetClientState *nc)
 +{
 +TAPState *s = DO_UPCAST(TAPState, nc, nc);
 +int ret;
 +
 +if (s-enabled == 0) {
 +return 0;
 +} else {
 +ret = tap_fd_disable(s-fd);
 +if (ret == 0) {
 +qemu_purge_queued_packets(nc);
 +s-enabled = 0;
 +tap_update_fd_handler(s);
 +}
 +return ret;
 +}
 +}
 --
 1.7.1



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH V2 11/20] tap: support enabling or disabling a queue

2013-01-25 Thread Blue Swirl

On Fri, Jan 25, 2013 at 10:35 AM, Jason Wang jasow...@redhat.com wrote:
 This patch introduce a new bit - enabled in TAPState which tracks whether a
 specific queue/fd is enabled. The tap/fd is enabled during initialization and
 could be enabled/disabled by tap_enalbe() and tap_disable() which calls 
 platform
 specific helpers to do the real work. Polling of a tap fd can only done when
 the tap was enabled.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  include/net/tap.h |2 ++
  net/tap-win32.c   |   10 ++
  net/tap.c |   43 ---
  3 files changed, 52 insertions(+), 3 deletions(-)

 diff --git a/include/net/tap.h b/include/net/tap.h
 index bb7efb5..0caf8c4 100644
 --- a/include/net/tap.h
 +++ b/include/net/tap.h
 @@ -35,6 +35,8 @@ int tap_has_vnet_hdr_len(NetClientState *nc, int len);
  void tap_using_vnet_hdr(NetClientState *nc, int using_vnet_hdr);
  void tap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int 
 ecn, int ufo);
  void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 +int tap_enable(NetClientState *nc);
 +int tap_disable(NetClientState *nc);

  int tap_get_fd(NetClientState *nc);

 diff --git a/net/tap-win32.c b/net/tap-win32.c
 index 265369c..a2cd94b 100644
 --- a/net/tap-win32.c
 +++ b/net/tap-win32.c
 @@ -764,3 +764,13 @@ void tap_set_vnet_hdr_len(NetClientState *nc, int len)
  {
  assert(0);
  }
 +
 +int tap_enable(NetClientState *nc)
 +{
 +assert(0);

abort()

 +}
 +
 +int tap_disable(NetClientState *nc)
 +{
 +assert(0);
 +}
 diff --git a/net/tap.c b/net/tap.c
 index 67080f1..95e557b 100644
 --- a/net/tap.c
 +++ b/net/tap.c
 @@ -59,6 +59,7 @@ typedef struct TAPState {
  unsigned int write_poll : 1;
  unsigned int using_vnet_hdr : 1;
  unsigned int has_ufo: 1;
 +unsigned int enabled : 1;

bool without bit field?

  VHostNetState *vhost_net;
  unsigned host_vnet_hdr_len;
  } TAPState;
 @@ -72,9 +73,9 @@ static void tap_writable(void *opaque);
  static void tap_update_fd_handler(TAPState *s)
  {
  qemu_set_fd_handler2(s-fd,
 - s-read_poll  ? tap_can_send : NULL,
 - s-read_poll  ? tap_send : NULL,
 - s-write_poll ? tap_writable : NULL,
 + s-read_poll  s-enabled ? tap_can_send : NULL,
 + s-read_poll  s-enabled ? tap_send : NULL,
 + s-write_poll  s-enabled ? tap_writable : NULL,
   s);
  }

 @@ -339,6 +340,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
  s-host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
  s-using_vnet_hdr = 0;
  s-has_ufo = tap_probe_has_ufo(s-fd);
 +s-enabled = 1;
  tap_set_offload(s-nc, 0, 0, 0, 0, 0);
  /*
   * Make sure host header length is set correctly in tap:
 @@ -737,3 +739,38 @@ VHostNetState *tap_get_vhost_net(NetClientState *nc)
  assert(nc-info-type == NET_CLIENT_OPTIONS_KIND_TAP);
  return s-vhost_net;
  }
 +
 +int tap_enable(NetClientState *nc)
 +{
 +TAPState *s = DO_UPCAST(TAPState, nc, nc);
 +int ret;
 +
 +if (s-enabled) {
 +return 0;
 +} else {
 +ret = tap_fd_enable(s-fd);
 +if (ret == 0) {
 +s-enabled = 1;
 +tap_update_fd_handler(s);
 +}
 +return ret;
 +}
 +}
 +
 +int tap_disable(NetClientState *nc)
 +{
 +TAPState *s = DO_UPCAST(TAPState, nc, nc);
 +int ret;
 +
 +if (s-enabled == 0) {
 +return 0;
 +} else {
 +ret = tap_fd_disable(s-fd);
 +if (ret == 0) {
 +qemu_purge_queued_packets(nc);
 +s-enabled = 0;
 +tap_update_fd_handler(s);
 +}
 +return ret;
 +}
 +}
 --
 1.7.1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 2/2] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-01-09 Thread Blue Swirl

On Wed, Jan 9, 2013 at 6:26 AM, Pandarathil, Vijaymohan R
vijaymohan.pandarat...@hp.com wrote:
 - Create eventfd per vfio device assigned to a guest and register an
   event handler

 - This fd is passed to the vfio_pci driver through a new ioctl

 - When the device encounters an error, the eventfd is signaled
   and the qemu eventfd handler gets invoked.

 - In the handler decide what action to take. Current action taken
   is to terminate the guest.

 Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com
 ---
  hw/vfio_pci.c  | 56 
 ++
  linux-headers/linux/vfio.h |  9 
  2 files changed, 65 insertions(+)

 diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
 index 28c8303..9c3c28b 100644
 --- a/hw/vfio_pci.c
 +++ b/hw/vfio_pci.c
 @@ -38,6 +38,7 @@
  #include qemu/error-report.h
  #include qemu/queue.h
  #include qemu/range.h
 +#include sysemu/sysemu.h

  /* #define DEBUG_VFIO */
  #ifdef DEBUG_VFIO
 @@ -130,6 +131,8 @@ typedef struct VFIODevice {
  QLIST_ENTRY(VFIODevice) next;
  struct VFIOGroup *group;
  bool reset_works;
 +EventNotifier errfd;
 +__u32 dev_info_flags;

QEMU is not kernel code, please use uint32_t.

  } VFIODevice;

  typedef struct VFIOGroup {
 @@ -1805,6 +1808,8 @@ static int vfio_get_device(VFIOGroup *group, const char 
 *name, VFIODevice *vdev)
  DPRINTF(Device %s flags: %u, regions: %u, irgs: %u\n, name,
  dev_info.flags, dev_info.num_regions, dev_info.num_irqs);

 +vdev-dev_info_flags = dev_info.flags;
 +
  if (!(dev_info.flags  VFIO_DEVICE_FLAGS_PCI)) {
  error_report(vfio: Um, this isn't a PCI device\n);
  goto error;
 @@ -1900,6 +1905,55 @@ static void vfio_put_device(VFIODevice *vdev)
  }
  }

 +static void vfio_errfd_handler(void *opaque)
 +{
 +VFIODevice *vdev = opaque;
 +
 +if (!event_notifier_test_and_clear(vdev-errfd)) {
 +return;
 +}
 +
 +/*
 + * TBD. Retrieve the error details and decide what action
 + * needs to be taken. One of the actions could be to pass
 + * the error to the guest and have the guest driver recover
 + * the error. This requires that PCIe capabilities be
 + * exposed to the guest. At present, we just terminate the
 + * guest to contain the error.
 + */
 +error_report(%s(%04x:%02x:%02x.%x) 
 +Unrecoverable error detected... Terminating guest\n,
 +__func__, vdev-host.domain, vdev-host.bus, vdev-host.slot,
 +vdev-host.function);
 +
 +qemu_system_shutdown_request();
 +return;
 +}
 +
 +static void vfio_register_errfd(VFIODevice *vdev)
 +{
 +int32_t pfd;
 +int ret;
 +
 +if (!(vdev-dev_info_flags  VFIO_DEVICE_FLAGS_AER_NOTIFY)) {
 +error_report(vfio: Warning: Error notification not supported for 
 the device\n);
 +return;
 +}
 +if (event_notifier_init(vdev-errfd, 0)) {
 +error_report(vfio: Warning: Unable to init event notifier for error 
 detection\n);
 +return;
 +}
 +pfd = event_notifier_get_fd(vdev-errfd);
 +qemu_set_fd_handler(pfd, vfio_errfd_handler, NULL, vdev);
 +
 +ret = ioctl(vdev-fd, VFIO_DEVICE_SET_ERRFD, pfd);
 +if (ret) {
 +error_report(vfio: Warning: Failed to setup error fd: %d\n, ret);
 +qemu_set_fd_handler(pfd, NULL, NULL, vdev);
 +event_notifier_cleanup(vdev-errfd);
 +}
 +return;
 +}
  static int vfio_initfn(PCIDevice *pdev)
  {
  VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
 @@ -2010,6 +2064,8 @@ static int vfio_initfn(PCIDevice *pdev)
  }
  }

 +vfio_register_errfd(vdev);
 +
  return 0;

  out_teardown:
 diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
 index 4758d1b..0ca4eeb 100644
 --- a/linux-headers/linux/vfio.h
 +++ b/linux-headers/linux/vfio.h
 @@ -147,6 +147,7 @@ struct vfio_device_info {
 __u32   flags;
  #define VFIO_DEVICE_FLAGS_RESET(1  0)/* Device supports 
 reset */
  #define VFIO_DEVICE_FLAGS_PCI  (1  1)/* vfio-pci device */
 +#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1  2)   /* Supports aer notification 
 */
 __u32   num_regions;/* Max region index + 1 */
 __u32   num_irqs;   /* Max IRQ index + 1 */

These are verbatim copies of kernel headers so it's OK here.

  };
 @@ -288,6 +289,14 @@ struct vfio_irq_set {
   */
  #define VFIO_DEVICE_RESET  _IO(VFIO_TYPE, VFIO_BASE + 11)

 +/**
 + * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 12)
 + *
 + * Pass the eventfd to the vfio-pci driver for signalling any device
 + * error notifications
 + */
 +#define VFIO_DEVICE_SET_ERRFD   _IO(VFIO_TYPE, VFIO_BASE + 12)
 +
  /*
   * The VFIO-PCI bus driver makes use of the following fixed region and
   * IRQ index mapping.  Unimplemented regions return a size of zero.
 --
 1.7.11.3


--
To unsubscribe from this list: send the

Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-04 Thread Blue Swirl

On Fri, Jan 4, 2013 at 5:12 AM, Jason Wang jasow...@redhat.com wrote:
 On 12/29/2012 01:52 AM, Blue Swirl wrote:
 On Fri, Dec 28, 2012 at 10:32 AM, Jason Wang jasow...@redhat.com wrote:
 This patch implements both userspace and vhost support for multiple queue
 virtio-net (VIRTIO_NET_F_MQ). This is done by introducing an array of
 VirtIONetQueue to VirtIONet.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  hw/virtio-net.c |  318 
 ++-
  hw/virtio-net.h |   27 +-
  2 files changed, 271 insertions(+), 74 deletions(-)
 [...]
  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
  {
  VirtIONet *n = to_virtio_net(vdev);
 @@ -464,6 +578,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, 
 VirtQueue *vq)
  status = virtio_net_handle_mac(n, ctrl.cmd, elem);
  else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
  status = virtio_net_handle_vlan_table(n, ctrl.cmd, elem);
 +else if (ctrl.class == VIRTIO_NET_CTRL_MQ)
 Please add braces.

 Sure.

 +status = virtio_net_handle_mq(n, ctrl.cmd, elem);

  stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);

 @@ -477,19 +593,24 @@ static void virtio_net_handle_ctrl(VirtIODevice 
 *vdev, VirtQueue *vq)
  static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
  {
  VirtIONet *n = to_virtio_net(vdev);
 +int queue_index = vq2q(virtio_get_queue_index(vq));

 -qemu_flush_queued_packets(qemu_get_queue(n-nic));
 +qemu_flush_queued_packets(qemu_get_subqueue(n-nic, queue_index));
  }


 [...]

 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int 
 ctrl)
 +{
 +VirtIODevice *vdev = n-vdev;
 +int i;
 +
 +n-multiqueue = multiqueue;
 +
 +if (!multiqueue)
 +n-curr_queues = 1;
 Ditto. Didn't checkpatch.pl catch these or did you not check?

 Sorry, will add braces here. I run checkpatch.pl but finally find that
 some or lots of the existed codes (such as this file) does not obey the
 rules. So I'm not sure whether I need to correct my own codes, or left
 them as this file does and correct them all in the future.

The goal is to make QEMU codebase conform to CODING_STYLE. Currently
this is not the case for some amounts of code, but we should use
opportunities like this to advance towards that goal.


 [...]
  } QEMU_PACKED;

  /* This is the first element of the scatter-gather list.  If you don't
 @@ -168,6 +172,26 @@ struct virtio_net_ctrl_mac {
   #define VIRTIO_NET_CTRL_VLAN_ADD 0
   #define VIRTIO_NET_CTRL_VLAN_DEL 1

 +/*
 + * Control Multiqueue
 + *
 + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
 + * enables multiqueue, specifying the number of the transmit and
 + * receive queues that will be used. After the command is consumed and 
 acked by
 + * the device, the device will not steer new packets on receive virtqueues
 + * other than specified nor read from transmit virtqueues other than 
 specified.
 + * Accordingly, driver should not transmit new packets  on virtqueues 
 other than
 + * specified.
 + */
 +struct virtio_net_ctrl_mq {
 VirtIONetCtrlMQ and please don't forget the typedef.

 Sure, but the same question as above. (See other structures in this file).

 +uint16_t virtqueue_pairs;
 +};
 +
 +#define VIRTIO_NET_CTRL_MQ   4
 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET0
 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN1
 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX0x8000
 +
  #define DEFINE_VIRTIO_NET_FEATURES(_state, _field) \
  DEFINE_VIRTIO_COMMON_FEATURES(_state, _field), \
  DEFINE_PROP_BIT(csum, _state, _field, VIRTIO_NET_F_CSUM, true), \
 @@ -186,5 +210,6 @@ struct virtio_net_ctrl_mac {
  DEFINE_PROP_BIT(ctrl_vq, _state, _field, VIRTIO_NET_F_CTRL_VQ, 
 true), \
  DEFINE_PROP_BIT(ctrl_rx, _state, _field, VIRTIO_NET_F_CTRL_RX, 
 true), \
  DEFINE_PROP_BIT(ctrl_vlan, _state, _field, 
 VIRTIO_NET_F_CTRL_VLAN, true), \
 -DEFINE_PROP_BIT(ctrl_rx_extra, _state, _field, 
 VIRTIO_NET_F_CTRL_RX_EXTRA, true)
 +DEFINE_PROP_BIT(ctrl_rx_extra, _state, _field, 
 VIRTIO_NET_F_CTRL_RX_EXTRA, true), \
 +DEFINE_PROP_BIT(mq, _state, _field, VIRTIO_NET_F_MQ, true)
  #endif
 --
 1.7.1



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 1/2] target-i386: Don't set any KVM flag by default if KVM is disabled

2013-01-04 Thread Blue Swirl

On Fri, Jan 4, 2013 at 2:52 PM, Eduardo Habkost ehabk...@redhat.com wrote:
 This is a cleanup that tries to solve two small issues:

  - We don't need a separate kvm_pv_eoi_features variable just to keep a
constant calculated at compile-time, and this style would require
adding a separate variable (that's declared twice because of the
CONFIG_KVM ifdef) for each feature that's going to be enabled/disable
by machine-type compat code.
  - The pc-1.3 code is setting the kvm_pv_eoi flag on cpuid_kvm_features
even when KVM is disabled at runtime. This small incosistency in
the cpuid_kvm_features field isn't a problem today because
cpuid_kvm_features is ignored by the TCG code, but it may cause
unexpected problems later when refactoring the CPUID handling code.

 This patch eliminates the kvm_pv_eoi_features variable and simply uses
 CONFIG_KVM and kvm_enabled() inside the enable_kvm_pv_eoi() compat
 function, so it enables kvm_pv_eoi only if KVM is enabled. I believe
 this makes the behavior of enable_kvm_pv_eoi() clearer and easier to
 understand.

 Signed-off-by: Eduardo Habkost ehabk...@redhat.com
 ---
 Cc: kvm@vger.kernel.org
 Cc: Michael S. Tsirkin m...@redhat.com
 Cc: Gleb Natapov g...@redhat.com
 Cc: Marcelo Tosatti mtosa...@redhat.com
 ---
  target-i386/cpu.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

 diff --git a/target-i386/cpu.c b/target-i386/cpu.c
 index 82685dc..808001a 100644
 --- a/target-i386/cpu.c
 +++ b/target-i386/cpu.c
 @@ -145,15 +145,16 @@ static uint32_t kvm_default_features = (1  
 KVM_FEATURE_CLOCKSOURCE) |
  (1  KVM_FEATURE_ASYNC_PF) |
  (1  KVM_FEATURE_STEAL_TIME) |
  (1  KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
 -static const uint32_t kvm_pv_eoi_features = (0x1  KVM_FEATURE_PV_EOI);
  #else
  static uint32_t kvm_default_features = 0;
 -static const uint32_t kvm_pv_eoi_features = 0;
  #endif

  void enable_kvm_pv_eoi(void)
  {
 -kvm_default_features |= kvm_pv_eoi_features;
 +#ifdef CONFIG_KVM
 +if (kvm_enabled())

Missing braces, please read CODING_STYLE and use checkpatch.pl to find
problems in patches.

 +kvm_default_features |= (1UL  KVM_FEATURE_PV_EOI);
 +#endif
  }

  void host_cpuid(uint32_t function, uint32_t count,
 --
 1.7.11.7


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 2/2] target-i386: Disable kvm_mmu_op by default on pc-1.4

2013-01-04 Thread Blue Swirl

On Fri, Jan 4, 2013 at 2:52 PM, Eduardo Habkost ehabk...@redhat.com wrote:
 The kvm_mmu_op feature was removed from the kernel since v3.3 (released
 in March 2012), it was marked for removal since January 2011 and it's
 slower than shadow or hardware assisted paging (see kernel commit
 fb92045843). It doesn't make sense to keep it enabled by default.

 Also, keeping it enabled by default would cause unnecessary hassle when
 libvirt start using the enforce option.

 Signed-off-by: Eduardo Habkost ehabk...@redhat.com
 ---
 Cc: kvm@vger.kernel.org
 Cc: Michael S. Tsirkin m...@redhat.com
 Cc: Gleb Natapov g...@redhat.com
 Cc: Marcelo Tosatti mtosa...@redhat.com
 Cc: libvir-l...@redhat.com
 Cc: Jiri Denemark jdene...@redhat.com

 I was planning to reverse the logic of the compat init functions and make
 pc_init_pci_1_3() enable kvm_mmu_op and then call pc_init_pci_1_4(). But that
 would require changing pc_init_pci_no_kvmclock() and pc_init_isa() as well. So
 to keep the changes simple, I am keeping the pattern used when 
 pc_init_pci_1_3()
 was introduced, making pc_init_pci_1_4() disable kvm_mmu_op and then call
 pc_init_pci_1_3().

 ---
  hw/pc_piix.c  | 11 ++-
  target-i386/cpu.c |  8 
  target-i386/cpu.h |  1 +
  3 files changed, 19 insertions(+), 1 deletion(-)

 diff --git a/hw/pc_piix.c b/hw/pc_piix.c
 index 99747a7..a6bf645 100644
 --- a/hw/pc_piix.c
 +++ b/hw/pc_piix.c
 @@ -217,6 +217,7 @@ static void pc_init1(MemoryRegion *system_memory,
  }
  }

 +/* machine init function for pc-0.14 - pc-1.2 */
  static void pc_init_pci(QEMUMachineInitArgs *args)
  {
  ram_addr_t ram_size = args-ram_size;
 @@ -232,12 +233,20 @@ static void pc_init_pci(QEMUMachineInitArgs *args)
   initrd_filename, cpu_model, 1, 1);
  }

 +/* machine init function for pc-1.3 */

The comment does give much information compared to the function name.

  static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
  {
  enable_kvm_pv_eoi();
  pc_init_pci(args);
  }

 +/* machine init function for pc-1.4 */

Ditto.

 +static void pc_init_pci_1_4(QEMUMachineInitArgs *args)
 +{
 +disable_kvm_mmu_op();
 +pc_init_pci_1_3(args);
 +}
 +
  static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
  {
  ram_addr_t ram_size = args-ram_size;
 @@ -285,7 +294,7 @@ static QEMUMachine pc_machine_v1_4 = {
  .name = pc-1.4,
  .alias = pc,
  .desc = Standard PC,
 -.init = pc_init_pci_1_3,
 +.init = pc_init_pci_1_4,
  .max_cpus = 255,
  .is_default = 1,
  };
 diff --git a/target-i386/cpu.c b/target-i386/cpu.c
 index 808001a..ec877c7 100644
 --- a/target-i386/cpu.c
 +++ b/target-i386/cpu.c
 @@ -157,6 +157,14 @@ void enable_kvm_pv_eoi(void)
  #endif
  }

 +void disable_kvm_mmu_op(void)
 +{
 +#ifdef CONFIG_KVM
 +if (kvm_enabled())

Braces.

 +kvm_default_features = ~(1UL  KVM_FEATURE_MMU_OP);
 +#endif
 +}
 +
  void host_cpuid(uint32_t function, uint32_t count,
  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
  {
 diff --git a/target-i386/cpu.h b/target-i386/cpu.h
 index 1283537..27c8d0c 100644
 --- a/target-i386/cpu.h
 +++ b/target-i386/cpu.h
 @@ -1219,5 +1219,6 @@ void do_smm_enter(CPUX86State *env1);
  void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);

  void enable_kvm_pv_eoi(void);
 +void disable_kvm_mmu_op(void);

  #endif /* CPU_I386_H */
 --
 1.7.11.7


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 3/9] target-i386: check/enforce: Fix CPUID leaf numbers on error messages

2013-01-04 Thread Blue Swirl

On Fri, Jan 4, 2013 at 3:37 PM, Eduardo Habkost ehabk...@redhat.com wrote:
 The -cpu check/enforce warnings are printing incorrect information about the
 missing flags. There are no feature flags on CPUID leaves 0 and 0x8000, 
 but
 there were references to 0 and 0x8000 in the table at
 kvm_check_features_against_host().

 This changes the model_features_t struct to contain the register number as
 well, so the error messages print the correct CPUID leaf+register information,
 instead of wrong CPUID leaf numbers.

 This also changes the format of the error messages, so they follow the
 CPUID.leaf.register.name [bit offset] convention used on Intel
 documentation. Example output:

 $ qemu-system-x86_64 -machine pc-1.0,accel=kvm -cpu 
 Opteron_G4,+ia64,enforce
 warning: host doesn't support requested feature: CPUID.01H:EDX.ia64 [bit 
 30]
 warning: host doesn't support requested feature: CPUID.01H:ECX.xsave [bit 
 26]
 warning: host doesn't support requested feature: CPUID.01H:ECX.avx [bit 
 28]
 warning: host doesn't support requested feature: CPUID.8001H:ECX.abm 
 [bit 5]
 warning: host doesn't support requested feature: 
 CPUID.8001H:ECX.sse4a [bit 6]
 warning: host doesn't support requested feature: 
 CPUID.8001H:ECX.misalignsse [bit 7]
 warning: host doesn't support requested feature: 
 CPUID.8001H:ECX.3dnowprefetch [bit 8]
 warning: host doesn't support requested feature: CPUID.8001H:ECX.xop 
 [bit 11]
 warning: host doesn't support requested feature: CPUID.8001H:ECX.fma4 
 [bit 16]
 Unable to find x86 CPU definition
 $

 Signed-off-by: Eduardo Habkost ehabk...@redhat.com
 ---
 Cc: Gleb Natapov g...@redhat.com
 Cc: Marcelo Tosatti mtosa...@redhat.com
 Cc: kvm@vger.kernel.org
 ---
  target-i386/cpu.c | 38 +-
  target-i386/cpu.h |  3 +++
  2 files changed, 32 insertions(+), 9 deletions(-)

 diff --git a/target-i386/cpu.c b/target-i386/cpu.c
 index 4e26b11..6c43ace 100644
 --- a/target-i386/cpu.c
 +++ b/target-i386/cpu.c
 @@ -124,6 +124,24 @@ static const char *cpuid_7_0_ebx_feature_name[] = {
  NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  };

 +const char *get_register_name_32(unsigned int reg)
 +{
 +static const char *reg_names[CPU_NB_REGS32] = {
 +[R_EAX] = EAX,
 +[R_ECX] = ECX,
 +[R_EDX] = EDX,
 +[R_EBX] = EBX,
 +[R_ESP] = ESP,
 +[R_EBP] = EBP,
 +[R_ESI] = ESI,
 +[R_EDI] = EDI,
 +};
 +
 +if (reg  CPU_NB_REGS32)

Missing braces.

 +return NULL;
 +return reg_names[reg];
 +}
 +
  /* collects per-function cpuid data
   */
  typedef struct model_features_t {
 @@ -132,7 +150,8 @@ typedef struct model_features_t {
  uint32_t check_feat;
  const char **flag_names;
  uint32_t cpuid;
 -} model_features_t;
 +int reg;
 +} model_features_t;

  int check_cpuid = 0;
  int enforce_cpuid = 0;
 @@ -921,10 +940,11 @@ static int unavailable_host_feature(struct 
 model_features_t *f, uint32_t mask)

  for (i = 0; i  32; ++i)
  if (1  i  mask) {
 -fprintf(stderr, warning: host cpuid %04x_%04x lacks requested
 - flag '%s' [0x%08x]\n,
 -f-cpuid  16, f-cpuid  0x,
 -f-flag_names[i] ? f-flag_names[i] : [reserved], mask);
 +fprintf(stderr, warning: host doesn't support requested 
 feature: 
 +CPUID.%02XH:%s%s%s [bit %d]\n,
 +f-cpuid, get_register_name_32(f-reg),

This could attempt to print NULL via %s format, which is not OK with
all C libraries. If we trust that the callers always pass valid
numbers, the check above could be turned into assert().

 +f-flag_names[i] ? . : ,
 +f-flag_names[i] ? f-flag_names[i] : , i);
  break;
  }
  return 0;
 @@ -943,13 +963,13 @@ static int kvm_check_features_against_host(x86_def_t 
 *guest_def)
  int rv, i;
  struct model_features_t ft[] = {
  {guest_def-features, host_def.features,
 -~0, feature_name, 0x},
 +~0, feature_name, 0x0001, R_EDX},
  {guest_def-ext_features, host_def.ext_features,
 -~CPUID_EXT_HYPERVISOR, ext_feature_name, 0x0001},
 +~CPUID_EXT_HYPERVISOR, ext_feature_name, 0x0001, R_ECX},
  {guest_def-ext2_features, host_def.ext2_features,
 -~PPRO_FEATURES, ext2_feature_name, 0x8000},
 +~PPRO_FEATURES, ext2_feature_name, 0x8001, R_EDX},
  {guest_def-ext3_features, host_def.ext3_features,
 -~CPUID_EXT3_SVM, ext3_feature_name, 0x8001}};
 +~CPUID_EXT3_SVM, ext3_feature_name, 0x8001, R_ECX}};

  assert(kvm_enabled());

 diff --git a/target-i386/cpu.h b/target-i386/cpu.h
 index 27c8d0c..ab81a5c 100644
 --- a/target-i386/cpu.h
 +++ b/target-i386/cpu.h
 @@ -1221,4 +1221,7 @@ void cpu_report_tpr_access(CPUX86State

Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2012-12-28 Thread Blue Swirl

On Fri, Dec 28, 2012 at 10:32 AM, Jason Wang jasow...@redhat.com wrote:
 This patch implements both userspace and vhost support for multiple queue
 virtio-net (VIRTIO_NET_F_MQ). This is done by introducing an array of
 VirtIONetQueue to VirtIONet.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  hw/virtio-net.c |  318 
 ++-
  hw/virtio-net.h |   27 +-
  2 files changed, 271 insertions(+), 74 deletions(-)

 diff --git a/hw/virtio-net.c b/hw/virtio-net.c
 index c6f0915..aaeef1b 100644
 --- a/hw/virtio-net.c
 +++ b/hw/virtio-net.c
 @@ -45,7 +45,7 @@ typedef struct VirtIONet
  VirtIODevice vdev;
  uint8_t mac[ETH_ALEN];
  uint16_t status;
 -VirtIONetQueue vq;
 +VirtIONetQueue vqs[MAX_QUEUE_NUM];
  VirtQueue *ctrl_vq;
  NICState *nic;
  uint32_t tx_timeout;
 @@ -70,14 +70,23 @@ typedef struct VirtIONet
  } mac_table;
  uint32_t *vlans;
  DeviceState *qdev;
 +int multiqueue;
 +uint16_t max_queues;
 +uint16_t curr_queues;
  } VirtIONet;

 -static VirtIONetQueue *virtio_net_get_queue(NetClientState *nc)
 +static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
  {
  VirtIONet *n = qemu_get_nic_opaque(nc);

 -return n-vq;
 +return n-vqs[nc-queue_index];
  }
 +
 +static int vq2q(int queue_index)
 +{
 +return queue_index / 2;
 +}
 +
  /* TODO
   * - we could suppress RX interrupt if we were so inclined.
   */
 @@ -93,6 +102,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, 
 uint8_t *config)
  struct virtio_net_config netcfg;

  stw_p(netcfg.status, n-status);
 +stw_p(netcfg.max_virtqueue_pairs, n-max_queues);
  memcpy(netcfg.mac, n-mac, ETH_ALEN);
  memcpy(config, netcfg, sizeof(netcfg));
  }
 @@ -116,31 +126,33 @@ static bool virtio_net_started(VirtIONet *n, uint8_t 
 status)
  (n-status  VIRTIO_NET_S_LINK_UP)  n-vdev.vm_running;
  }

 -static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 +static void virtio_net_vhost_status(VirtIONet *n, int queue_index,
 +uint8_t status)
  {
 -VirtIONetQueue *q = n-vq;
 +NetClientState *nc = qemu_get_subqueue(n-nic, queue_index);
 +VirtIONetQueue *q = n-vqs[queue_index];

 -if (!qemu_get_queue(n-nic)-peer) {
 +if (!nc-peer) {
  return;
  }
 -if (qemu_get_queue(n-nic)-peer-info-type != 
 NET_CLIENT_OPTIONS_KIND_TAP) {
 +if (nc-peer-info-type != NET_CLIENT_OPTIONS_KIND_TAP) {
  return;
  }

 -if (!tap_get_vhost_net(qemu_get_queue(n-nic)-peer)) {
 +if (!tap_get_vhost_net(nc-peer)) {
  return;
  }
 -if (!!q-vhost_started == virtio_net_started(n, status) 
 -  !qemu_get_queue(n-nic)-peer-link_down) {
 +if (!!q-vhost_started ==
 +(virtio_net_started(n, status)  !nc-peer-link_down)) {
  return;
  }
  if (!q-vhost_started) {
  int r;
 -if 
 (!vhost_net_query(tap_get_vhost_net(qemu_get_queue(n-nic)-peer), n-vdev)) 
 {
 +if (!vhost_net_query(tap_get_vhost_net(nc-peer), n-vdev)) {
  return;
  }
 -r = vhost_net_start(tap_get_vhost_net(qemu_get_queue(n-nic)-peer),
 -n-vdev, 0);
 +r = vhost_net_start(tap_get_vhost_net(nc-peer), n-vdev,
 +queue_index * 2);
  if (r  0) {
  error_report(unable to start vhost net: %d: 
   falling back on userspace virtio, -r);
 @@ -148,7 +160,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
 status)
  q-vhost_started = 1;
  }
  } else {
 -vhost_net_stop(tap_get_vhost_net(qemu_get_queue(n-nic)-peer), 
 n-vdev);
 +vhost_net_stop(tap_get_vhost_net(nc-peer), n-vdev);
  q-vhost_started = 0;
  }
  }
 @@ -156,26 +168,35 @@ static void virtio_net_vhost_status(VirtIONet *n, 
 uint8_t status)
  static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
  {
  VirtIONet *n = to_virtio_net(vdev);
 -VirtIONetQueue *q = n-vq;
 +int i;

 -virtio_net_vhost_status(n, status);
 +for (i = 0; i  n-max_queues; i++) {
 +VirtIONetQueue *q = n-vqs[i];
 +uint8_t queue_status = status;

 -if (!q-tx_waiting) {
 -return;
 -}
 +if ((!n-multiqueue  i != 0) || i = n-curr_queues) {
 +queue_status = 0;
 +}

 -if (virtio_net_started(n, status)  !q-vhost_started) {
 -if (q-tx_timer) {
 -qemu_mod_timer(q-tx_timer,
 -   qemu_get_clock_ns(vm_clock) + n-tx_timeout);
 -} else {
 -qemu_bh_schedule(q-tx_bh);
 +virtio_net_vhost_status(n, i, queue_status);
 +
 +if (!q-tx_waiting) {
 +continue;
  }
 -} else {
 -if (q-tx_timer) {
 -qemu_del_timer(q-tx_timer);
 +
 +if (virtio_net_started(n, status)

Re: [Qemu-devel] [PATCH 05/12] net: multiqueue support

2012-12-28 Thread Blue Swirl

On Fri, Dec 28, 2012 at 10:31 AM, Jason Wang jasow...@redhat.com wrote:
 This patch adds basic multiqueue support for qemu. The idea is simple, an 
 array
 of NetClientStates were introduced in NICState, parse_netdev() were extended 
 to
 find and match all NetClientStates belongs to the backend and place their
 pointers in NICConf. Then qemu_new_nic can setup a N:N mapping between 
 NICStates
 that belongs to a nic and NICStates belongs to the netdev. After this, each
 peers of a NICStaet were abstracted as a queue.

 To adapt this change, set_link/netdev_del command will find all the
 NetClientStates of a nic or a netdev, and change all their state in one run.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  hw/dp8393x.c |2 +-
  hw/mcf_fec.c |2 +-
  hw/qdev-properties.c |   46 +++--
  hw/qdev-properties.h |6 +-
  net.c|  172 
 +-
  net.h|   27 +++-
  6 files changed, 195 insertions(+), 60 deletions(-)

 diff --git a/hw/dp8393x.c b/hw/dp8393x.c
 index 8f20a4a..fad0837 100644
 --- a/hw/dp8393x.c
 +++ b/hw/dp8393x.c
 @@ -899,7 +899,7 @@ void dp83932_init(NICInfo *nd, hwaddr base, int it_shift,
  s-regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */

  s-conf.macaddr = nd-macaddr;
 -s-conf.peer = nd-netdev;
 +s-conf.peers.ncs[0] = nd-netdev;

  s-nic = qemu_new_nic(net_dp83932_info, s-conf, nd-model, nd-name, 
 s);

 diff --git a/hw/mcf_fec.c b/hw/mcf_fec.c
 index 7fc89b5..c298bec 100644
 --- a/hw/mcf_fec.c
 +++ b/hw/mcf_fec.c
 @@ -472,7 +472,7 @@ void mcf_fec_init(MemoryRegion *sysmem, NICInfo *nd,
  memory_region_add_subregion(sysmem, base, s-iomem);

  s-conf.macaddr = nd-macaddr;
 -s-conf.peer = nd-netdev;
 +s-conf.peers[0] = nd-netdev;

  s-nic = qemu_new_nic(net_mcf_fec_info, s-conf, nd-model, nd-name, 
 s);

 diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
 index 81d901c..6e45def 100644
 --- a/hw/qdev-properties.c
 +++ b/hw/qdev-properties.c
 @@ -585,16 +585,47 @@ PropertyInfo qdev_prop_chr = {

  static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
  {
 -NetClientState *netdev = qemu_find_netdev(str);
 +NICPeers *peers_ptr = (NICPeers *)ptr;
 +NICConf *conf = container_of(peers_ptr, NICConf, peers);
 +NetClientState **ncs = peers_ptr-ncs;
 +NetClientState *peers[MAX_QUEUE_NUM];
 +int queues, i = 0;
 +int ret;

 -if (netdev == NULL) {
 -return -ENOENT;
 +queues = qemu_find_net_clients_except(str, peers,
 +  NET_CLIENT_OPTIONS_KIND_NIC,
 +  MAX_QUEUE_NUM);
 +if (queues == 0) {
 +ret = -ENOENT;
 +goto err;
  }
 -if (netdev-peer) {
 -return -EEXIST;
 +
 +if (queues  MAX_QUEUE_NUM) {
 +ret = -E2BIG;
 +goto err;
 +}
 +
 +for (i = 0; i  queues; i++) {
 +if (peers[i] == NULL) {
 +ret = -ENOENT;
 +goto err;
 +}
 +
 +if (peers[i]-peer) {
 +ret = -EEXIST;
 +goto err;
 +}
 +
 +ncs[i] = peers[i];
 +ncs[i]-queue_index = i;
  }
 -*ptr = netdev;
 +
 +conf-queues = queues;
 +
  return 0;
 +
 +err:
 +return ret;
  }

  static const char *print_netdev(void *ptr)
 @@ -661,7 +692,8 @@ static void set_vlan(Object *obj, Visitor *v, void 
 *opaque,
  {
  DeviceState *dev = DEVICE(obj);
  Property *prop = opaque;
 -NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
 +NICPeers *peers_ptr = qdev_get_prop_ptr(dev, prop);
 +NetClientState **ptr = peers_ptr-ncs[0];
  Error *local_err = NULL;
  int32_t id;
  NetClientState *hubport;
 diff --git a/hw/qdev-properties.h b/hw/qdev-properties.h
 index 5b046ab..2d90848 100644
 --- a/hw/qdev-properties.h
 +++ b/hw/qdev-properties.h
 @@ -31,7 +31,7 @@ extern PropertyInfo qdev_prop_pci_host_devaddr;
  .name  = (_name),\
  .info  = (_prop),   \
  .offset= offsetof(_state, _field)\
 -+ type_check(_type,typeof_field(_state, _field)),\
 ++ type_check(_type, typeof_field(_state, _field)),   \
  }
  #define DEFINE_PROP_DEFAULT(_name, _state, _field, _defval, _prop, _type) { \
  .name  = (_name),   \
 @@ -77,9 +77,9 @@ extern PropertyInfo qdev_prop_pci_host_devaddr;
  #define DEFINE_PROP_STRING(_n, _s, _f) \
  DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*)
  #define DEFINE_PROP_NETDEV(_n, _s, _f) \
 -DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NetClientState*)
 +DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NICPeers)
  #define DEFINE_PROP_VLAN(_n, _s, _f) \
 -DEFINE_PROP(_n, _s, _f, qdev_prop_vlan,

Re: [Qemu-devel] [PATCH 2/2] qemu-kvm/pci-assign: 64 bits bar emulation

2012-11-03 Thread Blue Swirl

On Fri, Nov 2, 2012 at 5:38 AM, Xudong Hao xudong@intel.com wrote:
 Enable 64 bits bar emulation.

 Signed-off-by: Xudong Hao xudong@intel.com
 ---
  hw/kvm/pci-assign.c |   18 --
  1 files changed, 12 insertions(+), 6 deletions(-)

 diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
 index 05b93d9..f1f8d1e 100644
 --- a/hw/kvm/pci-assign.c
 +++ b/hw/kvm/pci-assign.c
 @@ -46,6 +46,7 @@
  #define IORESOURCE_IRQ  0x0400
  #define IORESOURCE_DMA  0x0800
  #define IORESOURCE_PREFETCH 0x2000  /* No side effects */
 +#define IORESOURCE_MEM_64   0x0010

  //#define DEVICE_ASSIGNMENT_DEBUG

 @@ -442,9 +443,13 @@ static int assigned_dev_register_regions(PCIRegion 
 *io_regions,

  /* handle memory io regions */
  if (cur_region-type  IORESOURCE_MEM) {
 -int t = cur_region-type  IORESOURCE_PREFETCH
 -? PCI_BASE_ADDRESS_MEM_PREFETCH
 -: PCI_BASE_ADDRESS_SPACE_MEMORY;
 +int t = PCI_BASE_ADDRESS_SPACE_MEMORY;
 +if (cur_region-type  IORESOURCE_PREFETCH) {
 +t |= PCI_BASE_ADDRESS_MEM_PREFETCH;
 +}
 +if (cur_region-type  IORESOURCE_MEM_64) {
 +t |= PCI_BASE_ADDRESS_MEM_TYPE_64;
 +}

  /* map physical memory */
  pci_dev-v_addrs[i].u.r_virtbase = mmap(NULL, cur_region-size,
 @@ -468,8 +473,8 @@ static int assigned_dev_register_regions(PCIRegion 
 *io_regions,
  (cur_region-base_addr  0xFFF);

  if (cur_region-size  0xFFF) {
 -error_report(PCI region %d at address 0x% PRIx64  has 
 - size 0x% PRIx64 , which is not a multiple of 
 
 +error_report(PCI region %d at address 0lx% PRIx64  has 
 + size 0lx% PRIx64 , which is not a multiple 
 of 

Adding 'l' to '0x' prefix does not make sense.

   4K.  You might experience some performance hit 
 
   due to that.,
   i, cur_region-base_addr, cur_region-size);
 @@ -638,7 +643,8 @@ again:
  rp-valid = 0;
  rp-resource_fd = -1;
  size = end - start + 1;
 -flags = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
 +flags = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH
 + | IORESOURCE_MEM_64;
  if (size == 0 || (flags  ~IORESOURCE_PREFETCH) == 0) {
  continue;
  }
 --
 1.5.5


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/5] kvm: avoid using cpu_single_env

2012-10-28 Thread Blue Swirl

Pass around CPUState instead of using global cpu_single_env.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 target-i386/kvm.c |   21 +++--
 1 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 3aa62b2..3329d5e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1909,14 +1909,15 @@ void kvm_arch_remove_all_hw_breakpoints(void)
 
 static CPUWatchpoint hw_watchpoint;
 
-static int kvm_handle_debug(struct kvm_debug_exit_arch *arch_info)
+static int kvm_handle_debug(CPUX86State *env,
+struct kvm_debug_exit_arch *arch_info)
 {
 int ret = 0;
 int n;
 
 if (arch_info-exception == 1) {
 if (arch_info-dr6  (1  14)) {
-if (cpu_single_env-singlestep_enabled) {
+if (env-singlestep_enabled) {
 ret = EXCP_DEBUG;
 }
 } else {
@@ -1928,13 +1929,13 @@ static int kvm_handle_debug(struct kvm_debug_exit_arch 
*arch_info)
 break;
 case 0x1:
 ret = EXCP_DEBUG;
-cpu_single_env-watchpoint_hit = hw_watchpoint;
+env-watchpoint_hit = hw_watchpoint;
 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
 hw_watchpoint.flags = BP_MEM_WRITE;
 break;
 case 0x3:
 ret = EXCP_DEBUG;
-cpu_single_env-watchpoint_hit = hw_watchpoint;
+env-watchpoint_hit = hw_watchpoint;
 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
 hw_watchpoint.flags = BP_MEM_ACCESS;
 break;
@@ -1942,16 +1943,16 @@ static int kvm_handle_debug(struct kvm_debug_exit_arch 
*arch_info)
 }
 }
 }
-} else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info-pc)) {
+} else if (kvm_find_sw_breakpoint(env, arch_info-pc)) {
 ret = EXCP_DEBUG;
 }
 if (ret == 0) {
-cpu_synchronize_state(cpu_single_env);
-assert(cpu_single_env-exception_injected == -1);
+cpu_synchronize_state(env);
+assert(env-exception_injected == -1);
 
 /* pass to guest */
-cpu_single_env-exception_injected = arch_info-exception;
-cpu_single_env-has_error_code = 0;
+env-exception_injected = arch_info-exception;
+env-has_error_code = 0;
 }
 
 return ret;
@@ -2034,7 +2035,7 @@ int kvm_arch_handle_exit(CPUX86State *env, struct kvm_run 
*run)
 break;
 case KVM_EXIT_DEBUG:
 DPRINTF(kvm_exit_debug\n);
-ret = kvm_handle_debug(run-debug.arch);
+ret = kvm_handle_debug(env, run-debug.arch);
 break;
 default:
 fprintf(stderr, KVM: unknown exit reason %d\n, run-exit_reason);
-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH v3 06/19] Implement -dimm command line option

2012-10-19 Thread Blue Swirl

On Thu, Oct 18, 2012 at 12:33 PM, Avi Kivity a...@redhat.com wrote:
 On 10/18/2012 11:27 AM, Vasilis Liaskovitis wrote:
 On Wed, Oct 17, 2012 at 12:03:51PM +0200, Avi Kivity wrote:
 On 10/17/2012 11:19 AM, Vasilis Liaskovitis wrote:
 
  I don't think so, but probably there's a limit of DIMMs that real
  controllers have, something like 8 max.
 
  In the case of i440fx specifically, do you mean that we should model the 
  DRB
  (Dram row boundary registers in section 3.2.19 of the i440fx spec) ?
 
  The i440fx DRB registers only supports up to 8 DRAM rows (let's say 1 row
  maps 1-1 to a DimmDevice for this discussion) and only supports up to 2GB 
  of
  memory afaict (bit 31 and above is ignored).
 
  I 'd rather not model this part of the i440fx - having only 8 DIMMs seems 
  too
  restrictive. The rest of the patchset supports up to 255 DIMMs so it 
  would be a
  waste imho to model an old pc memory controller that only supports 8 
  DIMMs.
 
  There was also an old discussion about i440fx modeling here:
  https://lists.nongnu.org/archive/html/qemu-devel/2011-07/msg02705.html
  the general direction was that i440fx is too old and we don't want to 
  precisely
  emulate the DRB registers, since they lack flexibility.
 
  Possible solutions:
 
  1) is there a newer and more flexible chipset that we could model?

 Look for q35 on this list.

 thanks, I 'll take a look. It sounds like the other options below are more
 straightforward now, but let me know if you prefer q35 integration as a 
 priority.

 At least validate that what you're doing fits with how q35 works.


 
  We could for example model:
  - an 8-bit non-cumulative register for each DIMM, denoting how many
  128MB chunks it contains. This allowes 32GB for each DIMM, and with 255 
  DIMMs we
  describe a bit less than 8TB. These registers require 255 bytes.
  - a 16-bit cumulative register for each DIMM again for 128MB chunks. This 
  allows
  us to describe 8TB of memory (but the registers take up double the space, 
  because
  they describe cumulative memory amounts)

 There is no reason to save space.  Why not have two 64-bit registers per
 DIMM, one describing the size and the other the base address, both in
 bytes?  Use a few low order bits for control.

 Do we want this generic scheme above to be tied into the i440fx/pc machine?

 Yes.  q35 should work according to its own specifications.

 Or have it as a separate generic memory bus / pmc usable by others (e.g. in
 hw/dimm.c)?
 The 64-bit values you describe are already part of DimmDevice properties, but
 they are not hardware registers described as part of a chipset.

 In terms of control bits, did you want to mimic some other chipset 
 registers? -
 any examples would be useful.

 I don't have any real requirements.  Just make it simple and easily
 accessible to ACPI code.



 
  3) let everything be handled/abstracted by dimmbus - the chipset DRB 
  modelling
  is not done (at least for i440fx, other machines could). This is the 
  least precise
  in terms of emulation. On the other hand, if we are not really trying to 
  emulate
  the real (too restrictive) hardware, does it matter?

 We could emulate base memory using the chipset, and extra memory using
 the scheme above.  This allows guests that are tied to the chipset to
 work, and guests that have more awareness (seabios) to use the extra
 features.

 But if we use the real i440fx pmc DRBs for base memory, this means base 
 memory
 would be = 2GB, right?

 Sounds like we 'd need to change the DRBs anyway to describe useful amounts 
 of
 base memory (e.g. 512MB chunks and check against address lines [36:29] can
 describe base memory up to 64GB, though that's still limiting for very large
 VMs). But we'd be diverting from the real hardware again.

 Then there's no point.  Modelling real hardware allows guests written to
 work against that hardware to function correctly.  If you diverge, they
 won't.

The guest is also unlikely to want to reprogram the memory controller.



 Then we can model base memory with tweaked i440fx pmc's DRB registers - we
 could only use DRB[0] (one DIMM describing all of base memory) or more.

 DIMMs would be allowed to be hotplugged in the generic mem-controller scheme 
 only
 (unless it makes sense to allow hotplug in the remaining pmc DRBs and
 start using the generic scheme once we run out of emulated DRBs)


 440fx seems a lost cause, so we can go wild and just implement pv dimms.

Maybe. But what would be a PV DIMM? Do we need any DIMM-like
granularity at all, instead the guest could be told to use a list of
RAM regions with arbitrary start and end addresses? Isn't ballooning
also related?

  For q35 I'd like to stay within the spec.

That may not last forever when machines have terabytes of memory.


 --
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo

Re: [RFC PATCH v3 06/19] Implement -dimm command line option

2012-10-13 Thread Blue Swirl

On Tue, Oct 9, 2012 at 5:04 PM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 Hi,

 sorry for the delayed answer.

 On Sat, Sep 29, 2012 at 11:13:04AM +, Blue Swirl wrote:
 
  The -dimm option is supposed to specify the dimm/memory layout, and not 
  create
  any devices.
 
  If we don't want this new option, I have a question:
 
  A -device/device_add means we create a new qdev device at startup or as a
  hotplug operation respectively. So, the semantics of
  -device dimm,id=dimm0,size=512M,node=0,populated=on are clear to me.
 
  What does -device dimm,populated=off mean from a qdev perspective? There 
  are 2
  alternatives:
 
  - The device is created on the dimmbus, but is not used/populated yet. 
  Than the
  activation/acpi-hotplug of the dimm may require a separate command (we 
  used to have
  dimm_add in versions  3). device_add handling always hotplugs a new 
  qdev
  device, so this wouldn't fit this usecase, because the device already 
  exists. In
  this case, the actual acpi hotplug operation is decoupled from qdev 
  device
  creation.

 The bus exists but the devices do not, device_add would add DIMMs to
 the bus. This matches PCI bus created by the host bridge and PCI
 device hotplug.

 A more complex setup would be dimm bus, dimm slot devices and DIMM
 devices. The intermediate slot device would contain one DIMM device if
 plugged.

 interesting, I haven't thought about this alternative. It does sounds overly
 complex, but a dimmslot / dimmdevice splitup could consolidate hotplug 
 semantic
 differences between populated=on/off. Something similar to the dimmslot device
 is already present in v3 (dimmcfg structure), but it's not a qdev visible 
 device.
 I 'd rather avoid the complication, but i might revisit this idea.

The memory controller could be able to also enable and disable slots
independently to their population state.



 
  - The dimmdevice is not created when -device dimm,populated=off (this 
  would
  require some ugly checking in normal -device argument handling). Only the 
  dimm
  layout is saved. The hotplug is triggered from a normal device_add later. 
  So in
  this case, the acpi hotplug happens at the same time as the qdev hotplug.
 
  Do you see a simpler alternative without introducing a new option?
 
  Using the -dimm option follows the second semantic and avoids changing 
  the -device
  semantics. Dimm layout description is decoupled from dimmdevice creation, 
  and qdev
  hotplug coincides with acpi hotplug.

 Maybe even the dimmbus device shouldn't exist by itself after all, or
 it should be pretty much invisible to users. On real HW, the memory
 controller or south bridge handles the memory. For i440fx, it's part
 of the same chipset. So I think we should just add qdev properties to
 i440fx to specify the sizes, nodes etc. Then i440fx should create the
 dimmbus device unconditionally using the properties. The default
 properties should create a sane configuration, otherwise -global
 i440fx.dimm_size=512M etc. could be used. Then the bus would be
 populated as before or with device_add.

 hmm the problem with using only i440fx properties, is that size/nodes look
 dimm specific to me, not chipset-memcontroller specific. Unless we only allow
 uniform size dimms. Is it possible to have a dynamic list of sizes/nodes 
 pairs as
 properties of a qdev device?

I don't think so, but probably there's a limit of DIMMs that real
controllers have, something like 8 max.


 Also if there is no dimmbus, and instead we have only links from i440fx to 
 dimm-devices,
 would the current qdev hotplug API be enough?

I'd just disable hotplug if there is no dimmbus (ISA PC?).


 I am currently leaning towards this: i440fx unconditionally creates the 
 dimmbus. Users
 don't have to specify the bus (i assume this is what you mean by dimmbus 
 should
 be invisible to the users)

 We only use -device dimm to describe dimms. With -device 
 dimm,populated=off, only
 the dimm config layout will be saved in the dimmbus. The hotplug is triggered 
 from a normal
 device_add later (same as pci hotplug).

OK.


 thanks,

 - Vasilis
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [RFC v2 2/6] ARM: KVM: Add support for KVM on ARM architecture

2012-10-13 Thread Blue Swirl

On Wed, Oct 10, 2012 at 3:07 PM, Peter Maydell peter.mayd...@linaro.org wrote:
 From: Christoffer Dall cd...@cs.columbia.edu

 Add basic support for KVM on ARM architecture.

 Signed-off-by: Christoffer Dall cd...@cs.columbia.edu
 [PMM: Minor tweaks and code cleanup, switch to ONE_REG]
 Signed-off-by: Peter Maydell peter.mayd...@linaro.org
 ---
  hw/arm_pic.c |   28 
  target-arm/Makefile.objs |1 +
  target-arm/cpu.h |1 +
  target-arm/helper.c  |2 +-
  target-arm/kvm.c |  328 
 ++
  5 files changed, 359 insertions(+), 1 deletion(-)
  create mode 100644 target-arm/kvm.c

 diff --git a/hw/arm_pic.c b/hw/arm_pic.c
 index ffb4d41..72272dc 100644
 --- a/hw/arm_pic.c
 +++ b/hw/arm_pic.c
 @@ -9,6 +9,7 @@

  #include hw.h
  #include arm-misc.h
 +#include kvm.h

  /* Input 0 is IRQ and input 1 is FIQ.  */
  static void arm_pic_cpu_handler(void *opaque, int irq, int level)
 @@ -34,7 +35,34 @@ static void arm_pic_cpu_handler(void *opaque, int irq, int 
 level)
  }
  }

 +#ifdef CONFIG_KVM
 +static void kvm_arm_pic_cpu_handler(void *opaque, int irq, int level)
 +{
 +ARMCPU *cpu = opaque;
 +CPUARMState *env = cpu-env;
 +int kvm_irq = KVM_ARM_IRQ_TYPE_CPU  KVM_ARM_IRQ_TYPE_SHIFT;
 +
 +switch (irq) {
 +case ARM_PIC_CPU_IRQ:
 +kvm_irq |= KVM_ARM_IRQ_CPU_IRQ;
 +break;
 +case ARM_PIC_CPU_FIQ:
 +kvm_irq |= KVM_ARM_IRQ_CPU_FIQ;
 +break;
 +default:
 +hw_error(kvm_arm_pic_cpu_handler: Bad interrupt line %d\n, irq);
 +}
 +kvm_irq |= env-cpu_index  KVM_ARM_IRQ_VCPU_SHIFT;
 +kvm_set_irq(kvm_state, kvm_irq, level ? 1 : 0);
 +}
 +#endif
 +
  qemu_irq *arm_pic_init_cpu(ARMCPU *cpu)
  {
 +#ifdef CONFIG_KVM
 +if (kvm_enabled()) {
 +return qemu_allocate_irqs(kvm_arm_pic_cpu_handler, cpu, 2);
 +}
 +#endif
  return qemu_allocate_irqs(arm_pic_cpu_handler, cpu, 2);
  }
 diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
 index b6f1a9e..d89b57c 100644
 --- a/target-arm/Makefile.objs
 +++ b/target-arm/Makefile.objs
 @@ -1,4 +1,5 @@
  obj-y += arm-semi.o
  obj-$(CONFIG_SOFTMMU) += machine.o
 +obj-$(CONFIG_KVM) += kvm.o
  obj-y += translate.o op_helper.o helper.o cpu.o
  obj-y += neon_helper.o iwmmxt_helper.o
 diff --git a/target-arm/cpu.h b/target-arm/cpu.h
 index ff4de10..8c7e574 100644
 --- a/target-arm/cpu.h
 +++ b/target-arm/cpu.h
 @@ -236,6 +236,7 @@ ARMCPU *cpu_arm_init(const char *cpu_model);
  void arm_translate_init(void);
  int cpu_arm_exec(CPUARMState *s);
  void do_interrupt(CPUARMState *);
 +int bank_number(CPUARMState *env, int mode);
  void switch_mode(CPUARMState *, int);
  uint32_t do_arm_semihosting(CPUARMState *env);

 diff --git a/target-arm/helper.c b/target-arm/helper.c
 index 58340bd..f62f3da 100644
 --- a/target-arm/helper.c
 +++ b/target-arm/helper.c
 @@ -1619,7 +1619,7 @@ uint32_t HELPER(get_r13_banked)(CPUARMState *env, 
 uint32_t mode)
  #else

  /* Map CPU modes onto saved register banks.  */
 -static inline int bank_number(CPUARMState *env, int mode)
 +int bank_number(CPUARMState *env, int mode)
  {
  switch (mode) {
  case ARM_CPU_MODE_USR:
 diff --git a/target-arm/kvm.c b/target-arm/kvm.c
 new file mode 100644
 index 000..fee60e1
 --- /dev/null
 +++ b/target-arm/kvm.c
 @@ -0,0 +1,328 @@
 +/*
 + * ARM implementation of KVM hooks
 + *
 + * Copyright Christoffer Dall 2009-2010
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + *
 + */
 +
 +#include stdio.h
 +#include sys/types.h
 +#include sys/ioctl.h
 +#include sys/mman.h
 +
 +#include linux/kvm.h
 +
 +#include qemu-common.h
 +#include qemu-timer.h
 +#include sysemu.h
 +#include kvm.h
 +#include cpu.h
 +#include device_tree.h

Is this used?

 +#include hw/arm-misc.h
 +
 +const KVMCapabilityInfo kvm_arch_required_capabilities[] = {

'static'. In fact, 'static' not used at all in this file, probably it
could be used a lot more.

 +KVM_CAP_LAST_INFO
 +};
 +
 +int kvm_arch_init(KVMState *s)
 +{
 +/* For ARM interrupt delivery is always asynchronous,
 + * whether we are using an in-kernel VGIC or not.
 + */
 +kvm_async_interrupts_allowed = true;
 +return 0;
 +}
 +
 +int kvm_arch_init_vcpu(CPUARMState *env)
 +{
 +struct kvm_vcpu_init init;
 +
 +init.target = KVM_ARM_TARGET_CORTEX_A15;
 +memset(init.features, 0, sizeof(init.features));
 +return kvm_vcpu_ioctl(env, KVM_ARM_VCPU_INIT, init);
 +}
 +
 +struct reg {

Reg or other CamelCase version and a typedef, please.

 +uint64_t id;
 +int offset;
 +};
 +
 +#define COREREG(KERNELNAME, QEMUFIELD)   \
 +{\
 +KVM_REG_ARM | KVM_REG_SIZE_U32 | \
 +KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \
 +offsetof(CPUARMState, QEMUFIELD)

Re: [PATCH v6 3/4] vfio: vfio-pci device assignment driver

2012-10-05 Thread Blue Swirl

On Fri, Oct 5, 2012 at 5:11 PM, Alex Williamson
alex.william...@redhat.com wrote:
 On Fri, 2012-10-05 at 16:54 +, Blue Swirl wrote:
 On Wed, Sep 26, 2012 at 5:19 PM, Alex Williamson
 alex.william...@redhat.com wrote:
  +
  +typedef struct QEMU_PACKED VFIOIRQSetFD {
  +struct vfio_irq_set irq_set;
  +int32_t fd;
  +} VFIOIRQSetFD;

 I'm now getting this error from Clang:

 /src/qemu/hw/vfio_pci.c:126:25: error: field 'irq_set' with variable
 sized type 'struct vfio_irq_set' not at the end of a struct or class
 is a GNU extension [-Werror,-Wgnu]
 struct vfio_irq_set irq_set;

 Does the kernel really use the fd field, isn't it implicit from the
 ioctl fd or are they different?

 The kernel side is defined as:

 struct vfio_irq_set {
 __u32   argsz;
 __u32   flags;
 __u32   index;
 __u32   start;
 __u32   count;
 __u8data[];
 };

Then the kernel only expects vfio_irq_set structure, not VFIOIRQSetFD,
so you should use irq_set_fd.irq_set instead of irq_set_fd for the
ioctl(). Then VFIOIRQSetFD can be rearranged to have fd field first,
also QEMU_PACKED is not necessary.


 Where data is the start of a variable sized array.  The data type of the
 array depends on the flags.  The purpose of VFIOIRQSetFD is simply to
 make a data type that I don't need to dynamically allocate.  You can
 find other cases for MSI and MSIX where we don't know the array size and
 do malloc the whole structure.  For this interrupt type we know there's
 only one entry.  If there's a better way to do this, let me know.  VFIO
 is only available on Linux hosts, so I have no particular reason to
 avoid GNU extensions.

  +
  +static int vfio_enable_intx(VFIODevice *vdev)
  +{
  +VFIOIRQSetFD irq_set_fd = {
  +.irq_set = {
  +.argsz = sizeof(irq_set_fd),
  +.flags = VFIO_IRQ_SET_DATA_EVENTFD | 
  VFIO_IRQ_SET_ACTION_TRIGGER,
  +.index = VFIO_PCI_INTX_IRQ_INDEX,
  +.start = 0,
  +.count = 1,
  +},

 Here the field is not even initialized.

 It's initialized later...

  +};
  +uint8_t pin = vfio_pci_read_config(vdev-pdev, PCI_INTERRUPT_PIN, 1);
  +int ret;
  +
  +if (vdev-intx.disabled || !pin) {
  +return 0;
  +}
  +
  +vfio_disable_interrupts(vdev);
  +
  +vdev-intx.pin = pin - 1; /* Pin A (1) - irq[0] */
  +ret = event_notifier_init(vdev-intx.interrupt, 0);
  +if (ret) {
  +error_report(vfio: Error: event_notifier_init failed\n);
  +return ret;
  +}
  +
  +irq_set_fd.fd = event_notifier_get_fd(vdev-intx.interrupt);

 Here.

 Thanks,
 Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v6 3/4] vfio: vfio-pci device assignment driver

2012-10-05 Thread Blue Swirl

On Fri, Oct 5, 2012 at 5:33 PM, Alex Williamson
alex.william...@redhat.com wrote:
 On Fri, 2012-10-05 at 17:22 +, Blue Swirl wrote:
 On Fri, Oct 5, 2012 at 5:11 PM, Alex Williamson
 alex.william...@redhat.com wrote:
  On Fri, 2012-10-05 at 16:54 +, Blue Swirl wrote:
  On Wed, Sep 26, 2012 at 5:19 PM, Alex Williamson
  alex.william...@redhat.com wrote:
   +
   +typedef struct QEMU_PACKED VFIOIRQSetFD {
   +struct vfio_irq_set irq_set;
   +int32_t fd;
   +} VFIOIRQSetFD;
 
  I'm now getting this error from Clang:
 
  /src/qemu/hw/vfio_pci.c:126:25: error: field 'irq_set' with variable
  sized type 'struct vfio_irq_set' not at the end of a struct or class
  is a GNU extension [-Werror,-Wgnu]
  struct vfio_irq_set irq_set;
 
  Does the kernel really use the fd field, isn't it implicit from the
  ioctl fd or are they different?
 
  The kernel side is defined as:
 
  struct vfio_irq_set {
  __u32   argsz;
  __u32   flags;
  __u32   index;
  __u32   start;
  __u32   count;
  __u8data[];
  };

 Then the kernel only expects vfio_irq_set structure, not VFIOIRQSetFD,
 so you should use irq_set_fd.irq_set instead of irq_set_fd for the
 ioctl(). Then VFIOIRQSetFD can be rearranged to have fd field first,
 also QEMU_PACKED is not necessary.

 Sorry, I was unclear.  The kernel sees fd as data[0], that's the point
 of the structure, so re-arranging it makes it useless.  Thanks,

I see. The example in GCC shows how to statically initialize flexible
array members properly but it does not seem to work:
http://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html

Also, Clang does not like that syntax either.

Maybe it's best to use g_malloc with room for the extra int.


 Alex

  Where data is the start of a variable sized array.  The data type of the
  array depends on the flags.  The purpose of VFIOIRQSetFD is simply to
  make a data type that I don't need to dynamically allocate.  You can
  find other cases for MSI and MSIX where we don't know the array size and
  do malloc the whole structure.  For this interrupt type we know there's
  only one entry.  If there's a better way to do this, let me know.  VFIO
  is only available on Linux hosts, so I have no particular reason to
  avoid GNU extensions.
 
   +
   +static int vfio_enable_intx(VFIODevice *vdev)
   +{
   +VFIOIRQSetFD irq_set_fd = {
   +.irq_set = {
   +.argsz = sizeof(irq_set_fd),
   +.flags = VFIO_IRQ_SET_DATA_EVENTFD | 
   VFIO_IRQ_SET_ACTION_TRIGGER,
   +.index = VFIO_PCI_INTX_IRQ_INDEX,
   +.start = 0,
   +.count = 1,
   +},
 
  Here the field is not even initialized.
 
  It's initialized later...
 
   +};
   +uint8_t pin = vfio_pci_read_config(vdev-pdev, PCI_INTERRUPT_PIN, 
   1);
   +int ret;
   +
   +if (vdev-intx.disabled || !pin) {
   +return 0;
   +}
   +
   +vfio_disable_interrupts(vdev);
   +
   +vdev-intx.pin = pin - 1; /* Pin A (1) - irq[0] */
   +ret = event_notifier_init(vdev-intx.interrupt, 0);
   +if (ret) {
   +error_report(vfio: Error: event_notifier_init failed\n);
   +return ret;
   +}
   +
   +irq_set_fd.fd = event_notifier_get_fd(vdev-intx.interrupt);
 
  Here.
 
  Thanks,
  Alex
 



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] kvm: Set default accelerator to kvm if the host supports it

2012-10-03 Thread Blue Swirl

On Mon, Oct 1, 2012 at 4:20 PM, Anthony Liguori anth...@codemonkey.ws wrote:
 Jan Kiszka jan.kis...@siemens.com writes:

 If we built a target for a host that supports KVM in principle, set the
 default accelerator to KVM as well. This also means the start of QEMU
 will fail to start if KVM support turns out to be unavailable at
 runtime.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  kvm-all.c  |1 +
  kvm-stub.c |1 +
  kvm.h  |1 +
  vl.c   |4 ++--
  4 files changed, 5 insertions(+), 2 deletions(-)

 diff --git a/kvm-all.c b/kvm-all.c
 index 92a7137..4d5f86c 100644
 --- a/kvm-all.c
 +++ b/kvm-all.c
 @@ -103,6 +103,7 @@ struct KVMState
  #endif
  };

 +bool kvm_configured = true;
  KVMState *kvm_state;
  bool kvm_kernel_irqchip;
  bool kvm_async_interrupts_allowed;
 diff --git a/kvm-stub.c b/kvm-stub.c
 index 3c52eb5..86a6451 100644
 --- a/kvm-stub.c
 +++ b/kvm-stub.c
 @@ -17,6 +17,7 @@
  #include gdbstub.h
  #include kvm.h

 +bool kvm_configured;
  KVMState *kvm_state;
  bool kvm_kernel_irqchip;
  bool kvm_async_interrupts_allowed;
 diff --git a/kvm.h b/kvm.h
 index dea2998..9936e5f 100644
 --- a/kvm.h
 +++ b/kvm.h
 @@ -22,6 +22,7 @@
  #include linux/kvm.h
  #endif

 +extern bool kvm_configured;
  extern int kvm_allowed;
  extern bool kvm_kernel_irqchip;
  extern bool kvm_async_interrupts_allowed;
 diff --git a/vl.c b/vl.c
 index 8d305ca..f557bd1 100644
 --- a/vl.c
 +++ b/vl.c
 @@ -2215,8 +2215,8 @@ static int configure_accelerator(void)
  }

  if (p == NULL) {
 -/* Use the default accelerator, tcg */
 -p = tcg;
 +/* The default accelerator depends on the availability of KVM. */
 +p = kvm_configured ? kvm : tcg;
  }

 How about making this an arch_init() function call and then using a #if
 defined(KVM_CONFIG) in arch_init.c?

 I hate to introduce another global variable if we can avoid it...

 Otherwise:

 Acked-by: Anthony Liguori aligu...@us.ibm.com

 Blue/Aurelien, any objections?

No, maybe a message could be printed that says that the default has
changed, for a few releases.


 Regards,

 Anthony Liguori


  while (!accel_initialised  *p != '\0') {
 --
 1.7.3.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH v3 06/19] Implement -dimm command line option

2012-09-29 Thread Blue Swirl

On Mon, Sep 24, 2012 at 10:42 AM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 On Sat, Sep 22, 2012 at 01:46:57PM +, Blue Swirl wrote:
 On Fri, Sep 21, 2012 at 11:17 AM, Vasilis Liaskovitis
 vasilis.liaskovi...@profitbricks.com wrote:
  Example:
  -dimm id=dimm0,size=512M,node=0,populated=off

 There should not be a need to introduce a new top level option,
 instead you should just use -device, like
 -device dimm,base=0,id=dimm0,size=512M,node=0,populated=off

 That would also specify the start address.

 What is base? the start address? I think the start address should be 
 calculated by the
 chipset / board, not by the user.

Yes.


 The -dimm option is supposed to specify the dimm/memory layout, and not 
 create
 any devices.

 If we don't want this new option, I have a question:

 A -device/device_add means we create a new qdev device at startup or as a
 hotplug operation respectively. So, the semantics of
 -device dimm,id=dimm0,size=512M,node=0,populated=on are clear to me.

 What does -device dimm,populated=off mean from a qdev perspective? There 
 are 2
 alternatives:

 - The device is created on the dimmbus, but is not used/populated yet. Than 
 the
 activation/acpi-hotplug of the dimm may require a separate command (we used 
 to have
 dimm_add in versions  3). device_add handling always hotplugs a new qdev
 device, so this wouldn't fit this usecase, because the device already exists. 
 In
 this case, the actual acpi hotplug operation is decoupled from qdev device
 creation.

The bus exists but the devices do not, device_add would add DIMMs to
the bus. This matches PCI bus created by the host bridge and PCI
device hotplug.

A more complex setup would be dimm bus, dimm slot devices and DIMM
devices. The intermediate slot device would contain one DIMM device if
plugged.


 - The dimmdevice is not created when -device dimm,populated=off (this would
 require some ugly checking in normal -device argument handling). Only the dimm
 layout is saved. The hotplug is triggered from a normal device_add later. So 
 in
 this case, the acpi hotplug happens at the same time as the qdev hotplug.

 Do you see a simpler alternative without introducing a new option?

 Using the -dimm option follows the second semantic and avoids changing the 
 -device
 semantics. Dimm layout description is decoupled from dimmdevice creation, and 
 qdev
 hotplug coincides with acpi hotplug.

Maybe even the dimmbus device shouldn't exist by itself after all, or
it should be pretty much invisible to users. On real HW, the memory
controller or south bridge handles the memory. For i440fx, it's part
of the same chipset. So I think we should just add qdev properties to
i440fx to specify the sizes, nodes etc. Then i440fx should create the
dimmbus device unconditionally using the properties. The default
properties should create a sane configuration, otherwise -global
i440fx.dimm_size=512M etc. could be used. Then the bus would be
populated as before or with device_add.


 thanks,

 - Vasilis
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH v3 08/19] pc: calculate dimm physical addresses and adjust memory map

2012-09-29 Thread Blue Swirl

On Mon, Sep 24, 2012 at 3:27 PM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 On Sat, Sep 22, 2012 at 02:15:28PM +, Blue Swirl wrote:
  +
  +/* Function to configure memory offsets of hotpluggable dimms */
  +
  +target_phys_addr_t pc_set_hp_memory_offset(uint64_t size)
  +{
  +target_phys_addr_t ret;
  +
  +/* on first call, initialize ram_hp_offset */
  +if (!ram_hp_offset) {
  +if (ram_size = PCI_HOLE_START ) {
  +ram_hp_offset = 0x1LL + (ram_size - PCI_HOLE_START);
  +} else {
  +ram_hp_offset = ram_size;
  +}
  +}
  +
  +if (ram_hp_offset = 0x1LL) {
  +ret = ram_hp_offset;
  +above_4g_hp_mem_size += size;
  +ram_hp_offset += size;
  +}
  +/* if dimm fits before pci hole, append it normally */
  +else if (ram_hp_offset + size = PCI_HOLE_START) {

 } else if ...

  +ret = ram_hp_offset;
  +below_4g_hp_mem_size += size;
  +ram_hp_offset += size;
  +}
  +/* otherwise place it above 4GB */
  +else {

 } else {

  +ret = 0x1LL;
  +above_4g_hp_mem_size += size;
  +ram_hp_offset = 0x1LL + size;
  +}
  +
  +return ret;
  +}

 But the function and use of lots of global variables is ugly. The dimm
 devices should be just created in piix_pci.c (i440fx) directly with
 correct offsets and sizes, so all  below_4g_mem_size etc. calculations
 should be moved there. That would implement the PMC part of i440fx.

 For ISA PC, probably the board should create the DIMMs since there may
 not be a memory controller. The 4G logic does not make sense there
 anyway.

 What about moving the implementation to pc_piix.c?
 Initial RAM and pci windows are already calculated in pc_init1, and then 
 passed
 to i440fx_init. The memory bus could be attached to i440fx for pci-enabled pc
 and to isabus-bridge for isa-pc (isa-pc not tested yet).

I'd move the calculations also to i440fx, it (PMC) determines the
memory configuration on real HW too.


 Something like the following:

 ---
  hw/pc.h  |1 +
  hw/pc_piix.c |   57 +++--
  2 files changed, 52 insertions(+), 6 deletions(-)

 diff --git a/hw/pc.h b/hw/pc.h
 index e4db071..d6cc43b 100644
 --- a/hw/pc.h
 +++ b/hw/pc.h
 @@ -10,6 +10,7 @@
  #include memory.h
  #include ioapic.h

 +#define PCI_HOLE_START 0xe000
  /* PC-style peripherals (also used by other machines).  */

  /* serial.c */
 diff --git a/hw/pc_piix.c b/hw/pc_piix.c
 index 88ff041..17db95a 100644
 --- a/hw/pc_piix.c
 +++ b/hw/pc_piix.c
 @@ -43,6 +43,7 @@
  #include xen.h
  #include memory.h
  #include exec-memory.h
 +#include dimm.h
  #ifdef CONFIG_XEN
  #  include xen/hvm/hvm_info_table.h
  #endif
 @@ -52,6 +53,8 @@
  static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
  static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
  static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
 +static ram_addr_t below_4g_hp_mem_size = 0;
 +static ram_addr_t above_4g_hp_mem_size = 0;

  static void kvm_piix3_setup_irq_routing(bool pci_enabled)
  {
 @@ -117,6 +120,41 @@ static void ioapic_init(GSIState *gsi_state)
  }
  }

 +static target_phys_addr_t pc_set_hp_memory_offset(uint64_t size)
 +{
 +target_phys_addr_t ret;
 +static ram_addr_t ram_hp_offset = 0;
 +
 +/* on first call, initialize ram_hp_offset */
 +if (!ram_hp_offset) {
 +if (ram_size = PCI_HOLE_START ) {
 +ram_hp_offset = 0x1LL + (ram_size - PCI_HOLE_START);
 +} else {
 +ram_hp_offset = ram_size;
 +}
 +}
 +
 +if (ram_hp_offset = 0x1LL) {
 +ret = ram_hp_offset;
 +above_4g_hp_mem_size += size;
 +ram_hp_offset += size;
 +}
 +/* if dimm fits before pci hole, append it normally */
 +else if (ram_hp_offset + size = PCI_HOLE_START) {
 +ret = ram_hp_offset;
 +below_4g_hp_mem_size += size;
 +ram_hp_offset += size;
 +}
 +/* otherwise place it above 4GB */
 +else {
 +ret = 0x1LL;
 +above_4g_hp_mem_size += size;
 +ram_hp_offset = 0x1LL + size;
 +}
 +
 +return ret;
 +}
 +
  /* PC hardware initialisation */
  static void pc_init1(MemoryRegion *system_memory,
   MemoryRegion *system_io,
 @@ -155,9 +193,9 @@ static void pc_init1(MemoryRegion *system_memory,
  kvmclock_create();
  }

 -if (ram_size = 0xe000 ) {
 -above_4g_mem_size = ram_size - 0xe000;
 -below_4g_mem_size = 0xe000;
 +if (ram_size = PCI_HOLE_START ) {
 +above_4g_mem_size = ram_size - PCI_HOLE_START;
 +below_4g_mem_size = PCI_HOLE_START;
  } else {
  above_4g_mem_size = 0;
  below_4g_mem_size = ram_size;
 @@ -172,6 +210,9 @@ static void pc_init1(MemoryRegion *system_memory,
  rom_memory = system_memory

Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga

2012-09-22 Thread Blue Swirl

On Thu, Sep 20, 2012 at 5:43 AM, Gerd Hoffmann kra...@redhat.com wrote:
   Hi,

 +vbe_ioport_write_index(d-vga, 0, index);
 +return vbe_ioport_read_data(d-vga, 0);

 These functions are only available with CONFIG_BOCHS_VBE #defined, so
 this code should be conditional as well.

 But building without CONFIG_BOCHS_VBE is not very useful since it's
 used by the BIOS and there's no display output without it IIRC.

 Well, text mode is still there, but no (by modern standards) useful
 graphics modes, only standard vga ones (i.e. up to 800x600 @ 256 colors
 or something like that).

 I guess it is better to just remove CONFIG_BOCHS_VBE, /me goes prepare a
 patch.

Bochs support could be disabled with a property, but again the value
is probably low.


 cheers,
   Gerd

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v5 00/17] Allow changing of Hypervisor CPUIDs.

2012-09-22 Thread Blue Swirl

On Sat, Sep 22, 2012 at 12:13 AM, Don Slutz d...@cloudswitch.com wrote:
Also known as Paravirtualization CPUIDs.

This is primarily done so that the guest will think it is running
under vmware when hypervisor-vendor=vmware is specified as a
property of a cpu.

Please use checkpatch.pl to check for missing braces etc.

This depends on:

http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg01400.html

As far as I know it is #4. It depends on (1) and (2) and (3).

This change is based on:

Microsoft Hypervisor CPUID Leaves:

http://msdn.microsoft.com/en-us/library/windows/hardware/ff542428%28v=vs.85%29.aspx

Linux kernel change starts with:
http://fixunix.com/kernel/538707-use-cpuid-communicate-hypervisor.html
Also:
http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html

VMware documention on CPUIDs (Mechanisms to determine if software is
running in a VMware virtual machine):

http://kb.vmware.com/selfservice/microsites/search.do?language=en_UScmd=displayKCexternalId=1009458

Changes from v4 to v5:
Undo kvm_clock2 change.
Add cpuid_hv_level_set; cpuid_hv_level == 0 is now valid.
Add cpuid_hv_vendor_set; the null string is now valid.
Handle kvm and cpuid_hv_level == 0.
hypervisor-vendor=kvm,hypervisor-level=0 and
hypervisor-level=0,hypervisor-vendor=kvm
now do the same thing.

Changes from v3 to v4:
Added CPUID_HV_LEVEL_HYPERV, CPUID_HV_LEVEL_KVM.
Added CPUID_HV_VENDOR_HYPERV.
Added hyperv as known hypservisor-vendor.
Allow hypervisor-level to be 0.

Changes from v2 to v3:
Clean post to qemu-devel.

Changes from v1 to v2:

1) Added 1/4 from
http://lists.gnu.org/archive/html/qemu-devel/2012-08/msg05153.html

Because Fred is changing jobs and so will not be pushing to get
this in. It needed to be rebased, And I needed it to complete the
testing of this change.

2) Added 2/4 because of the re-work I needed a way to clear all KVM bits,

3) The rework of v1. Make it fit into the object model re-work of cpu.c for
x86.

4) Added 3/4 -- The split out of the code that is not needed for accel=kvm.

Changes from v2 to v3:

Marcelo Tosatti:
Its one big patch, better split in logically correlated patches
(with better changelog). This would help reviewers.

So split 3 and 4 into 3 to 17. More info in change log.
No code change.

Don Slutz (17):
target-i386: Allow tsc-frequency to be larger then 2.147G
target-i386: Add missing kvm bits.
target-i386: Add Hypervisor level.
target-i386: Add cpu object access routines for Hypervisor level.
target-i386: Add cpu object access routines for Hypervisor level.
target-i386: Use Hypervisor level in -machine pc,accel=kvm.
target-i386: Use Hypervisor level in -machine pc,accel=tcg.
target-i386: Add Hypervisor vendor.
target-i386: Add cpu object access routines for Hypervisor vendor.
target-i386: Use Hypervisor vendor in -machine pc,accel=kvm.
target-i386: Use Hypervisor vendor in -machine pc,accel=tcg.
target-i386: Add some known names to Hypervisor vendor.
target-i386: Add optional Hypervisor leaf extra.
target-i386: Add cpu object access routines for Hypervisor leaf
extra.
target-i386: Add setting of Hypervisor leaf extra for known vmare4.
target-i386: Use Hypervisor leaf extra in -machine pc,accel=kvm.
target-i386: Use Hypervisor leaf extra in -machine pc,accel=tcg.

target-i386/cpu.c | 285
-
target-i386/cpu.h | 34 +++
target-i386/kvm.c | 33 +-
3 files changed, 341 insertions(+), 11 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH v3 06/19] Implement -dimm command line option

2012-09-22 Thread Blue Swirl

On Fri, Sep 21, 2012 at 11:17 AM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 Example:
 -dimm id=dimm0,size=512M,node=0,populated=off

There should not be a need to introduce a new top level option,
instead you should just use -device, like
-device dimm,base=0,id=dimm0,size=512M,node=0,populated=off

That would also specify the start address.

 will define a 512M memory slot belonging to numa node 0.

 When populated=on, a DimmDevice is created and hot-plugged at system 
 startup.

 Signed-off-by: Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com
 ---
  hw/Makefile.objs |2 +-
  qemu-config.c|   25 +
  qemu-options.hx  |5 +
  sysemu.h |1 +
  vl.c |   50 ++
  5 files changed, 82 insertions(+), 1 deletions(-)

 diff --git a/hw/Makefile.objs b/hw/Makefile.objs
 index 6dfebd2..8c5c39a 100644
 --- a/hw/Makefile.objs
 +++ b/hw/Makefile.objs
 @@ -26,7 +26,7 @@ hw-obj-$(CONFIG_I8254) += i8254_common.o i8254.o
  hw-obj-$(CONFIG_PCSPK) += pcspk.o
  hw-obj-$(CONFIG_PCKBD) += pckbd.o
  hw-obj-$(CONFIG_FDC) += fdc.o
 -hw-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o
 +hw-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o dimm.o
  hw-obj-$(CONFIG_APM) += pm_smbus.o apm.o
  hw-obj-$(CONFIG_DMA) += dma.o
  hw-obj-$(CONFIG_I82374) += i82374.o
 diff --git a/qemu-config.c b/qemu-config.c
 index eba977e..4022d64 100644
 --- a/qemu-config.c
 +++ b/qemu-config.c
 @@ -646,6 +646,30 @@ QemuOptsList qemu_boot_opts = {
  },
  };

 +static QemuOptsList qemu_dimm_opts = {
 +.name = dimm,
 +.head = QTAILQ_HEAD_INITIALIZER(qemu_dimm_opts.head),
 +.desc = {
 +{
 +.name = id,
 +.type = QEMU_OPT_STRING,
 +.help = id of this dimm device,
 +},{
 +.name = size,
 +.type = QEMU_OPT_SIZE,
 +.help = memory size for this dimm,
 +},{
 +.name = populated,
 +.type = QEMU_OPT_BOOL,
 +.help = populated for this dimm,
 +},{
 +.name = node,
 +.type = QEMU_OPT_NUMBER,
 +.help = NUMA node number (i.e. proximity) for this dimm,
 +},
 +{ /* end of list */ }
 +},
 +};
  static QemuOptsList *vm_config_groups[32] = {
  qemu_drive_opts,
  qemu_chardev_opts,
 @@ -662,6 +686,7 @@ static QemuOptsList *vm_config_groups[32] = {
  qemu_boot_opts,
  qemu_iscsi_opts,
  qemu_sandbox_opts,
 +qemu_dimm_opts,
  NULL,
  };

 diff --git a/qemu-options.hx b/qemu-options.hx
 index 804a2d1..3687722 100644
 --- a/qemu-options.hx
 +++ b/qemu-options.hx
 @@ -2842,3 +2842,8 @@ HXCOMM This is the last statement. Insert new options 
 before this line!
  STEXI
  @end table
  ETEXI
 +
 +DEF(dimm, HAS_ARG, QEMU_OPTION_dimm,
 +-dimm id=dimmid,size=sz,node=nd,populated=on|off\n
 +specify memory dimm device with name dimmid, size sz on node nd,
 +QEMU_ARCH_ALL)
 diff --git a/sysemu.h b/sysemu.h
 index 65552ac..7baf9c9 100644
 --- a/sysemu.h
 +++ b/sysemu.h
 @@ -139,6 +139,7 @@ extern QEMUClock *rtc_clock;
  extern int nb_numa_nodes;
  extern uint64_t node_mem[MAX_NODES];
  extern unsigned long *node_cpumask[MAX_NODES];
 +extern int nb_hp_dimms;

  #define MAX_OPTION_ROMS 16
  typedef struct QEMUOptionRom {
 diff --git a/vl.c b/vl.c
 index 7c577fa..af1745c 100644
 --- a/vl.c
 +++ b/vl.c
 @@ -126,6 +126,7 @@ int main(int argc, char **argv)
  #include hw/xen.h
  #include hw/qdev.h
  #include hw/loader.h
 +#include hw/dimm.h
  #include bt-host.h
  #include net.h
  #include net/slirp.h
 @@ -248,6 +249,7 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = 
 QTAILQ_HEAD_INITIALIZER(fw_boot_order
  int nb_numa_nodes;
  uint64_t node_mem[MAX_NODES];
  unsigned long *node_cpumask[MAX_NODES];
 +int nb_hp_dimms;

This counter (if needed) should be private to dimm.c.


  uint8_t qemu_uuid[16];

 @@ -530,6 +532,37 @@ static void configure_rtc_date_offset(const char 
 *startdate, int legacy)
  }
  }

 +static void configure_dimm(QemuOpts *opts)
 +{
 +const char *id;
 +uint64_t size, node;
 +bool populated;
 +QemuOpts *devopts;
 +char buf[256];
 +if (nb_hp_dimms == MAX_DIMMS) {

Why should there be any limit of DIMMS? Please use lists etc. to avoid
restrictions.

 +fprintf(stderr, qemu: maximum number of DIMMs (%d) exceeded\n,
 +MAX_DIMMS);
 +exit(1);
 +}
 +id = qemu_opts_id(opts);
 +size = qemu_opt_get_size(opts, size, DEFAULT_DIMMSIZE);
 +populated = qemu_opt_get_bool(opts, populated, 0);
 +node = qemu_opt_get_number(opts, node, 0);
 +
 +dimm_config_create((char*)id, size, node, nb_hp_dimms, 0);
 +
 +if (populated) {
 +devopts = qemu_opts_create(qemu_find_opts(device), id, 0, NULL);
 +qemu_opt_set(devopts, driver, dimm);
 +snprintf(buf, sizeof(buf), %lu, size);
 +qemu_opt_set(devopts, size, buf);
 +

Re: [RFC PATCH v3 07/19] acpi_piix4: Implement memory device hotplug registers

2012-09-22 Thread Blue Swirl

On Fri, Sep 21, 2012 at 11:17 AM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 A 32-byte register is used to present up to 256 hotplug-able memory devices
 to BIOS and OSPM. Hot-add and hot-remove functions trigger an ACPI hotplug
 event through these. Only reads are allowed from these registers.

 An ACPI hot-remove event but needs to wait for OSPM to eject the device.
 We use a single-byte register to know when OSPM has called the _EJ function
 for a particular dimm. A write to this byte will depopulate the respective 
 dimm.
 Only writes are allowed to this byte.

 v1-v2:
 mems_sts address moved from 0xaf20 to 0xaf80 (to accomodate more space for
 cpu-hotplugging in the future).
 _EJ array is reduced to a single byte.
 Add documentation in docs/specs/acpi_hotplug.txt

 v2-v3:
 minor name changes

 Signed-off-by: Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com
 ---
  docs/specs/acpi_hotplug.txt |   22 +
  hw/acpi_piix4.c |   73 --
  2 files changed, 91 insertions(+), 4 deletions(-)
  create mode 100644 docs/specs/acpi_hotplug.txt

 diff --git a/docs/specs/acpi_hotplug.txt b/docs/specs/acpi_hotplug.txt
 new file mode 100644
 index 000..cf86242
 --- /dev/null
 +++ b/docs/specs/acpi_hotplug.txt
 @@ -0,0 +1,22 @@
 +QEMU-ACPI BIOS hotplug interface
 +--
 +This document describes the interface between QEMU and the ACPI BIOS for 
 non-PCI
 +space. For the PCI interface please look at docs/specs/acpi_pci_hotplug.txt
 +
 +QEMU-ACPI BIOS memory hotplug interface
 +--
 +
 +Memory Dimm status array (IO port 0xaf80-0xaf9f, 1-byte access):
 +---
 +Dimm hot-plug notification pending. One bit per slot.
 +
 +Read by ACPI BIOS GPE.3 handler to notify OS of memory hot-add or hot-remove
 +events.  Read-only.
 +
 +Memory Dimm ejection success notification (IO port 0xafa0, 1-byte access):
 +---
 +Dimm hot-remove _EJ0 notification. Byte value indicates Dimm slot that was
 +ejected.
 +
 +Written by ACPI memory device _EJ0 method to notify qemu of successfull
 +hot-removal.  Write-only.
 diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
 index c56220b..8776669 100644
 --- a/hw/acpi_piix4.c
 +++ b/hw/acpi_piix4.c
 @@ -28,6 +28,8 @@
  #include range.h
  #include ioport.h
  #include fw_cfg.h
 +#include sysbus.h
 +#include dimm.h

  //#define DEBUG

 @@ -45,9 +47,15 @@
  #define PCI_DOWN_BASE 0xae04
  #define PCI_EJ_BASE 0xae08
  #define PCI_RMV_BASE 0xae0c
 +#define MEM_BASE 0xaf80
 +#define MEM_EJ_BASE 0xafa0

 +#define PIIX4_MEM_HOTPLUG_STATUS 8
  #define PIIX4_PCI_HOTPLUG_STATUS 2

 +struct gpe_regs {

GPERegs

 +uint8_t mems_sts[DIMM_BITMAP_BYTES];
 +};
  struct pci_status {
  uint32_t up; /* deprecated, maintained for migration compatibility */
  uint32_t down;
 @@ -69,6 +77,7 @@ typedef struct PIIX4PMState {
  Notifier machine_ready;

  /* for pci hotplug */
 +struct gpe_regs gperegs;
  struct pci_status pci0_status;
  uint32_t pci0_hotplug_enable;
  uint32_t pci0_slot_device_present;
 @@ -93,8 +102,8 @@ static void pm_update_sci(PIIX4PMState *s)
 ACPI_BITMASK_POWER_BUTTON_ENABLE |
 ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
 ACPI_BITMASK_TIMER_ENABLE)) != 0) ||
 -(((s-ar.gpe.sts[0]  s-ar.gpe.en[0])
 -   PIIX4_PCI_HOTPLUG_STATUS) != 0);
 +(((s-ar.gpe.sts[0]  s-ar.gpe.en[0]) 
 +  (PIIX4_PCI_HOTPLUG_STATUS | PIIX4_MEM_HOTPLUG_STATUS)) != 0);

  qemu_set_irq(s-irq, sci_level);
  /* schedule a timer interruption if needed */
 @@ -499,7 +508,16 @@ type_init(piix4_pm_register_types)
  static uint32_t gpe_readb(void *opaque, uint32_t addr)
  {
  PIIX4PMState *s = opaque;
 -uint32_t val = acpi_gpe_ioport_readb(s-ar, addr);
 +uint32_t val = 0;
 +struct gpe_regs *g = s-gperegs;
 +
 +switch (addr) {
 +case MEM_BASE ... MEM_BASE+DIMM_BITMAP_BYTES:
 +val = g-mems_sts[addr - MEM_BASE];
 +break;
 +default:
 +val = acpi_gpe_ioport_readb(s-ar, addr);
 +}

  PIIX4_DPRINTF(gpe read %x == %x\n, addr, val);
  return val;
 @@ -509,7 +527,13 @@ static void gpe_writeb(void *opaque, uint32_t addr, 
 uint32_t val)
  {
  PIIX4PMState *s = opaque;

 -acpi_gpe_ioport_writeb(s-ar, addr, val);
 +switch (addr) {
 +case MEM_EJ_BASE:
 +dimm_notify(val, DIMM_REMOVE_SUCCESS);
 +break;
 +default:
 +acpi_gpe_ioport_writeb(s-ar, addr, val);
 +}
  pm_update_sci(s);

  PIIX4_DPRINTF(gpe write %x == %d\n, addr, val);
 @@ -560,9 +584,11 @@ static uint32_t pcirmv_read(void *opaque, uint32_t addr)

  static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,

Re: [RFC PATCH v3 08/19] pc: calculate dimm physical addresses and adjust memory map

2012-09-22 Thread Blue Swirl

On Fri, Sep 21, 2012 at 11:17 AM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 Dimm physical address offsets are calculated automatically and memory map is
 adjusted accordingly. If a DIMM can fit before the PCI_HOLE_START (currently
 0xe000), it will be added normally, otherwise its physical address will be
 above 4GB.

 Also create memory bus on i440fx-pcihost device.

 Signed-off-by: Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com
 ---
  hw/pc.c  |   41 +
  hw/pc.h  |6 ++
  hw/pc_piix.c |   20 ++--
  vl.c |1 +
  4 files changed, 62 insertions(+), 6 deletions(-)

 diff --git a/hw/pc.c b/hw/pc.c
 index 112739a..2c9664d 100644
 --- a/hw/pc.c
 +++ b/hw/pc.c
 @@ -52,6 +52,7 @@
  #include arch_init.h
  #include bitmap.h
  #include vga-pci.h
 +#include dimm.h

  /* output Bochs bios info messages */
  //#define DEBUG_BIOS
 @@ -93,6 +94,9 @@ struct e820_table {
  static struct e820_table e820_table;
  struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};

 +ram_addr_t below_4g_hp_mem_size = 0;
 +ram_addr_t above_4g_hp_mem_size = 0;
 +extern target_phys_addr_t ram_hp_offset;

extern declarations belong to headers only.

  void gsi_handler(void *opaque, int n, int level)
  {
  GSIState *s = opaque;
 @@ -1160,3 +1164,40 @@ void pc_pci_device_init(PCIBus *pci_bus)
  pci_create_simple(pci_bus, -1, lsi53c895a);
  }
  }
 +
 +
 +/* Function to configure memory offsets of hotpluggable dimms */
 +
 +target_phys_addr_t pc_set_hp_memory_offset(uint64_t size)
 +{
 +target_phys_addr_t ret;
 +
 +/* on first call, initialize ram_hp_offset */
 +if (!ram_hp_offset) {
 +if (ram_size = PCI_HOLE_START ) {
 +ram_hp_offset = 0x1LL + (ram_size - PCI_HOLE_START);
 +} else {
 +ram_hp_offset = ram_size;
 +}
 +}
 +
 +if (ram_hp_offset = 0x1LL) {
 +ret = ram_hp_offset;
 +above_4g_hp_mem_size += size;
 +ram_hp_offset += size;
 +}
 +/* if dimm fits before pci hole, append it normally */
 +else if (ram_hp_offset + size = PCI_HOLE_START) {

} else if ...

 +ret = ram_hp_offset;
 +below_4g_hp_mem_size += size;
 +ram_hp_offset += size;
 +}
 +/* otherwise place it above 4GB */
 +else {

} else {

 +ret = 0x1LL;
 +above_4g_hp_mem_size += size;
 +ram_hp_offset = 0x1LL + size;
 +}
 +
 +return ret;
 +}

But the function and use of lots of global variables is ugly. The dimm
devices should be just created in piix_pci.c (i440fx) directly with
correct offsets and sizes, so all  below_4g_mem_size etc. calculations
should be moved there. That would implement the PMC part of i440fx.

For ISA PC, probably the board should create the DIMMs since there may
not be a memory controller. The 4G logic does not make sense there
anyway.

 diff --git a/hw/pc.h b/hw/pc.h
 index e4db071..f3304fc 100644
 --- a/hw/pc.h
 +++ b/hw/pc.h
 @@ -10,6 +10,7 @@
  #include memory.h
  #include ioapic.h

 +#define PCI_HOLE_START 0xe000
  /* PC-style peripherals (also used by other machines).  */

  /* serial.c */
 @@ -214,6 +215,11 @@ static inline bool isa_ne2000_init(ISABus *bus, int 
 base, int irq, NICInfo *nd)
  /* pc_sysfw.c */
  void pc_system_firmware_init(MemoryRegion *rom_memory);

 +/* memory hotplug */
 +target_phys_addr_t pc_set_hp_memory_offset(uint64_t size);
 +extern ram_addr_t below_4g_hp_mem_size;
 +extern ram_addr_t above_4g_hp_mem_size;
 +
  /* e820 types */
  #define E820_RAM1
  #define E820_RESERVED   2
 diff --git a/hw/pc_piix.c b/hw/pc_piix.c
 index 88ff041..d1fd276 100644
 --- a/hw/pc_piix.c
 +++ b/hw/pc_piix.c
 @@ -43,6 +43,7 @@
  #include xen.h
  #include memory.h
  #include exec-memory.h
 +#include dimm.h
  #ifdef CONFIG_XEN
  #  include xen/hvm/hvm_info_table.h
  #endif
 @@ -155,9 +156,9 @@ static void pc_init1(MemoryRegion *system_memory,
  kvmclock_create();
  }

 -if (ram_size = 0xe000 ) {
 -above_4g_mem_size = ram_size - 0xe000;
 -below_4g_mem_size = 0xe000;
 +if (ram_size = PCI_HOLE_START ) {
 +above_4g_mem_size = ram_size - PCI_HOLE_START;
 +below_4g_mem_size = PCI_HOLE_START;
  } else {
  above_4g_mem_size = 0;
  below_4g_mem_size = ram_size;
 @@ -172,6 +173,9 @@ static void pc_init1(MemoryRegion *system_memory,
  rom_memory = system_memory;
  }

 +/* adjust memory map for hotplug dimms */
 +dimm_calc_offsets(pc_set_hp_memory_offset);
 +
  /* allocate ram and load rom/bios */
  if (!xen_enabled()) {
  fw_cfg = pc_memory_init(system_memory,
 @@ -192,9 +196,11 @@ static void pc_init1(MemoryRegion *system_memory,
  if (pci_enabled) {
  pci_bus = i440fx_init(i440fx_state, piix3_devfn, isa_bus, gsi,
system_memory, system_io, ram_size,
 -

Re: [RFC PATCH v3 00/19] ACPI memory hotplug

2012-09-22 Thread Blue Swirl

On Fri, Sep 21, 2012 at 11:17 AM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 This is v3 of the ACPI memory hotplug functionality. Only x86_64 target is 
 supported
 for now.

 Overview:

 Dimm device layout is modeled with a new qemu command line

 -dimm id=name,size=sz,node=pxm,populated=on|off

 The starting physical address for all dimms is calculated automatically from
 top of memory, skipping the pci hole at [PCI_HOLE_START, 4G).
 Node is defining numa proximity for this dimm. When not defined it defaults
 to zero.
 -dimm id=dimm0,size=512M,node=0,populated=off
 will define a 512M memory slot belonging to numa node 0.

 Dimms are added or removed with normal device_add, device_del operations:
 Hot-add syntax: device_add dimm,id=mydimm0
 Hot-remove syntax: dimm_del dimm,id=mydimm0

 Changes v2-v3

 - qdev integration. Dimms are attached to a dimmbus. The dimmbus is a child
   of i440fx device in the pc machine. Hot-add and hot-remove are done with 
 normal
   device_add / device_del operations on the dimmbus. New commands dimm_add 
 and
   dimm_del are obsolete. (In previous versions, dimms were always present 
 on the
   qdev tree, and dimm_add/del simply meant allocating or deallocating memory 
 for
   the devices. This version actually does hot-operations on the qdev tree)
 - Add _PS3 method to allow OSPM-induced hot operations.
 - pci-window calculation in Seabios takes dimms into account(for both 32-bit 
 and
   64-bit windows)
 - rename new qmp commands: query-memory-total and query-memory-hotplug
 - balloon driver can see the hotplugged memory

 Changes v1-v2

 - memory map is automatically calculated for hotplug dimms. Dimms are added 
 from
 top-of-memory skipping the pci hole at [PCI_HOLE_START, 4G).
 - Renamed from -memslot to -dimm. Commands changed to dimm_add, 
 dimm_del.
 - Seabios ejection array reduced to a byte. Use extraction macros for dimm 
 ssdt.
 - additional SRAT paravirt info does not break previous SRAT fw_cfg layout.
 - Documentation of new acpi_piix4 registers and paravirt data.
 - add ACPI _OST support for _OST enabled guests. This allows qemu to receive
 notification for success / failure of memory hot-add and hot-remove 
 operations.
 Guest needs to support _OST (https://lkml.org/lkml/2012/6/25/321)
 - add monitor info command to report total guest memory (initial + hot-added)
 - add command line options and monitor commands for batch dimm
 creation/population (obsolete from v3 onwards)

 Issues:

 - A main blocker issue is windows guest functionality. The patchset does not 
 work for
 windows currently. My guess is the windows pnpmem driver does not like the
 seabios dimm device implementation (or the seabios dimm implementation is not
 fully ACPI-compliant). If someone can review the seabios patches or has any
 ideas to debug this, let me know.

 Testing on win2012 server RC or windows2008 consumer prerelease. When adding a
 DIMM, the device shows up in DeviceManager but does not work.
 Relevant messages:

  This device cannot start. (Code 10)
 Device configured(memory.inf) (UserPnP eventID 400)
 Device installed (memory.inf) ACPI/PNP0C80\2daba3ff1 was configured
 Device not started(PNPMEM) (Kernel-PnP eventID 411, kernelID)
 Device ACPI\PNP0C80\2daba3ff1 had a problem starting Driver Name: memory.inf
 (c:\Windows\system32\DRIVERS\pnpmem.sys 6.2.8400 winmain_win8rc))
 Memory range:0x8000 - 0x9000 (Initial memory of VM is 2GB. The 
 hotplugged DIMM
  was a 256GB with physical address range starting at 2GB )
 Conflicting device list: No conflicts. 

 Adding a 2nd or more dimms causes a crash (PNP_DETECTED_FATAL_ERROR with blue
 screen of death) and makes windows reboot. After this, the VM keeps rebooting 
 with
 ACPI_BIOS_ERROR. The VM refuses to boot anymore once a 2nd (or more) extra 
 dimm is
 plugged-in.

 - Is the dimmbus the correct way to go about integrating into qdev/qom? In a 
 v1
 comment, Anthony mentioned attaching dimms directly to an i440fx device as
 children. Is this possible without a bus?

 - Live migration works as long as the dimm layout (-dimm command line args) 
 are
 identical at the source and destination qemu command line. Patch 10/19
 creates the DimmDevice that corresponds to the unknown incoming ramblock.
 Ramblocks are migrated before qdev VMStates are migrated (the DimmDevice 
 structure
 currently does not define a VMStateDescription). So the DimmDevice is handled
 diferrently than other devices. If this is not acceptable, any suggestions on
 how should it be reworked?

 - Hot-operation notification lists need to be added to migration state.

 Please review. Could people state which other issues they consider blocker for
 including this upstream?

Please check the patches with checkpatch.pl, there are plenty of
missing braces and uses of __FUNCTION__ etc.

I also have other comments to specific patches, especially the
architecture does not look correct.


 Does this patchset need to wait for 1.4 or could

Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga

2012-09-19 Thread Blue Swirl

On Tue, Sep 18, 2012 at 9:51 AM, Gerd Hoffmann kra...@redhat.com wrote:
 This patch adds a mmio bar to the qemu standard vga which allows to
 access the standard vga registers and bochs dispi interface registers
 via mmio.

 Cc: Benjamin Herrenschmidt b...@kernel.crashing.org
 Signed-off-by: Gerd Hoffmann kra...@redhat.com
 ---
  hw/vga-pci.c |   97 
 ++
  hw/vga.c |6 ++--
  hw/vga_int.h |6 +++
  3 files changed, 106 insertions(+), 3 deletions(-)

 diff --git a/hw/vga-pci.c b/hw/vga-pci.c
 index 9abbada..e05e2ef 100644
 --- a/hw/vga-pci.c
 +++ b/hw/vga-pci.c
 @@ -30,9 +30,36 @@
  #include qemu-timer.h
  #include loader.h

 +/*
 + * QEMU Standard VGA -- MMIO area spec.
 + *
 + * Using PCI bar #2, keeping #1 free, which leaves the
 + * door open to upgrade bar #0 to 64bit.
 + *
 + * mmio area layout:
 + *   0x - 0x03ff  reserved, for possible virtio extension.
 + *   0x0400 - 0x041f  vga ioports (0x3c0 - 0x3df), remapped 1:1
 + *   0x0500 - 0x0515  bochs dispi interface registers, mapped flat without
 + * index/data ports.  Use (index  1) as offset for
 + * (16bit) register access.
 + */
 +#define PCI_VGA_IOPORT_OFFSET 0x400
 +#define PCI_VGA_IOPORT_SIZE   (0x3e0 - 0x3c0)
 +#define PCI_VGA_BOCHS_OFFSET  0x500
 +#define PCI_VGA_BOCHS_SIZE(0x0b * 2)
 +#define PCI_VGA_MMIO_SIZE 0x1000
 +
 +enum vga_pci_flags {
 +PCI_VGA_FLAG_ENABLE_MMIO = 1,
 +};
 +
  typedef struct PCIVGAState {
  PCIDevice dev;
  VGACommonState vga;
 +uint32_t flags;
 +MemoryRegion mmio;
 +MemoryRegion ioport;
 +MemoryRegion bochs;
  } PCIVGAState;

  static const VMStateDescription vmstate_vga_pci = {
 @@ -47,6 +74,60 @@ static const VMStateDescription vmstate_vga_pci = {
  }
  };

 +static uint64_t pci_vga_ioport_read(void *ptr, target_phys_addr_t addr,
 +unsigned size)
 +{
 +PCIVGAState *d = ptr;
 +return vga_ioport_read(d-vga, addr);
 +}
 +
 +static void pci_vga_ioport_write(void *ptr, target_phys_addr_t addr,
 + uint64_t val, unsigned size)
 +{
 +PCIVGAState *d = ptr;
 +vga_ioport_write(d-vga, addr, val);
 +}
 +
 +static const MemoryRegionOps pci_vga_ioport_ops = {
 +.read = pci_vga_ioport_read,
 +.write = pci_vga_ioport_write,
 +.valid.min_access_size = 1,
 +.valid.max_access_size = 4,
 +.impl.min_access_size = 1,
 +.impl.max_access_size = 1,
 +.endianness = DEVICE_LITTLE_ENDIAN,
 +};
 +
 +static uint64_t pci_vga_bochs_read(void *ptr, target_phys_addr_t addr,
 +   unsigned size)
 +{
 +PCIVGAState *d = ptr;
 +int index = addr  1;
 +
 +vbe_ioport_write_index(d-vga, 0, index);
 +return vbe_ioport_read_data(d-vga, 0);

These functions are only available with CONFIG_BOCHS_VBE #defined, so
this code should be conditional as well.

But building without CONFIG_BOCHS_VBE is not very useful since it's
used by the BIOS and there's no display output without it IIRC.

 +}
 +
 +static void pci_vga_bochs_write(void *ptr, target_phys_addr_t addr,
 +uint64_t val, unsigned size)
 +{
 +PCIVGAState *d = ptr;
 +int index = addr  1;
 +
 +vbe_ioport_write_index(d-vga, 0, index);
 +vbe_ioport_write_data(d-vga, 0, val);
 +}
 +
 +static const MemoryRegionOps pci_vga_bochs_ops = {
 +.read = pci_vga_bochs_read,
 +.write = pci_vga_bochs_write,
 +.valid.min_access_size = 1,
 +.valid.max_access_size = 4,
 +.impl.min_access_size = 2,
 +.impl.max_access_size = 2,
 +.endianness = DEVICE_LITTLE_ENDIAN,
 +};
 +
  static int pci_vga_initfn(PCIDevice *dev)
  {
   PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev);
 @@ -62,6 +143,21 @@ static int pci_vga_initfn(PCIDevice *dev)
   /* XXX: VGA_RAM_SIZE must be a power of two */
   pci_register_bar(d-dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, s-vram);

 + /* mmio bar for vga register access */
 + if (d-flags  (1  PCI_VGA_FLAG_ENABLE_MMIO)) {
 + memory_region_init(d-mmio, vga.mmio, 4096);
 + memory_region_init_io(d-ioport, pci_vga_ioport_ops, d,
 +   vga ioports remapped, PCI_VGA_IOPORT_SIZE);
 + memory_region_init_io(d-bochs, pci_vga_bochs_ops, d,
 +   bochs dispi interface, PCI_VGA_BOCHS_SIZE);

Also this region should only be available with CONFIG_BOCHS_VBE.

 +
 + memory_region_add_subregion(d-mmio, PCI_VGA_IOPORT_OFFSET,
 + d-ioport);
 + memory_region_add_subregion(d-mmio, PCI_VGA_BOCHS_OFFSET,
 + d-bochs);
 + pci_register_bar(d-dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, 
 d-mmio);
 + }
 +
   if (!dev-rom_bar) {
   /* compatibility with pc-0.13 and older */
   vga_init_vbe(s, pci_address_space(dev));
 @@ -77,6 +173,7 @@ DeviceState

Re: [Qemu-ppc] [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-08 Thread Blue Swirl

On Thu, Sep 6, 2012 at 3:42 AM, Alexander Graf ag...@suse.de wrote:

 On 05.09.2012, at 15:38, Blue Swirl wrote:

 On Wed, Sep 5, 2012 at 7:22 PM, Anthony Liguori anth...@codemonkey.ws 
 wrote:
 Blue Swirl blauwir...@gmail.com writes:

 On Wed, Sep 5, 2012 at 3:41 PM, Anthony Liguori anth...@codemonkey.ws 
 wrote:
 Avi Kivity a...@redhat.com writes:

 On 09/05/2012 12:00 AM, Anthony Liguori wrote:

 Why? The way this is being submitted I don't see why we should treat
 Jan's patch any different from a patch by IBM or Samsung where we've
 asked folks to fix the license to comply with what I thought was our 
 new
 policy (it does not even contain a from-x-on-GPLv2+ notice).

 Asking is one thing.  Requiring is another.

 I would prefer that people submitted GPLv2+, but I don't think it should
 be a hard requirement.  It means, among other things, that we cannot
 accept most code that originates from the Linux kernel.

 We could extend this to require unless there is a reason to grant an
 exception if we wanted to (not saying I know whether we want to or
 not).

 I don't want QEMU to be GPLv3.  I don't like the terms of the GPLv3.

 I don't mind GPLv2+, if people want to share code from QEMU in GPLv3
 projects, GPLv2+ enables that.

 The advantage of 100% GPLv2+ (or other GPLv3 compatible) would be that
 QEMU could share code from GPLv3 projects, specifically latest
 binutils. Reinventing a disassembler for ever growing x86 assembly is
 no fun.

 But we can't share code with Linux (like for virtio).

 It's a tradeoff between reimplementing disassembler without using
 binutils vs. reimplementing virtio without using Linux. Both have
 their problems and both are growing areas. Disassembler is a bit
 smaller and the basic function does not ever change.


 Yes, the GPLv3 sucks and FSF screwed up massively not making it v2
 compatible.

 I sort of agree. They had their reasons, of course. Too bad binutils
 licensing is fully controlled by FSF, for us it would be enough if
 they had some sort of dual licensing scheme (GPLv3 + BSD for example)
 in place.

 What do the BSD guys do here? They want to have a disassembler too that works 
 across all different sorts of architectures, no?

There's at least GDB and DDD. The DDB kernel debugger contains a
disassembler for several architectures:
http://fxr.watson.org/fxr/ident?v=NETBSDi=db_disasm

At least cris, lm32, microblaze, unicore32 and s390x are still missing
and I don't know if sh3 equals sh4. For some of those, maybe current
code from old binutils will be good enough forever.

It looks like the most recent change for x86 is from 2009 and there's
no support for even MMX so it does not look very potential way to
handle the x86 instruction set growth.



 Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 4/4] kvm: i386: Add classic PCI device assignment

2012-09-08 Thread Blue Swirl

On Thu, Sep 6, 2012 at 4:06 PM, Andreas Färber afaer...@suse.de wrote:
 Am 06.09.2012 10:44, schrieb Jan Kiszka:
 On 2012-08-30 20:30, Jan Kiszka wrote:
 This adds PCI device assignment for i386 targets using the classic KVM
 interfaces. This version is 100% identical to what is being maintained
 in qemu-kvm for several years and is supported by libvirt as well. It is
 expected to remain relevant for another couple of years until kernels
 without full-features and performance-wise equivalent VFIO support are
 obsolete.

 A refactoring to-do that should be done in-tree is to model MSI and
 MSI-X support via the generic PCI layer, similar to what VFIO is already
 doing for MSI-X. This should improve the correctness and clean up the
 code from duplicate logic.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---

 Changes in v3:
  - addressed comment by Peter (changed device name to kvm-pci-assign +
alias)
  - addressed (most) comments by Michael
  - fixed INT pin regression

 Does someone _disagree_ that there are no open (and reasonably solvable)
 issues and that this can now be merged through uq/master?

 My implicit suggestion was to add a notice that new patch contributions
 to the file from date -mm-dd on would be declared GPLv2+, as Paolo
 has done elsewhere. That would limit the amount of people to ask for a
 potential relicensing attempt.

+1


 For the record, Anthony explained on IRC that the code originated from
 Xen originally and thus qemu-kvm.git does not contain the full history
 anyway and that pulling in the Mercurial file history and replaying the
 KVM history on top was too difficult, therefore this patch with a single
 SoB by Jan.

 Andreas

 --
 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
 GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-08 Thread Blue Swirl

On Thu, Sep 6, 2012 at 8:44 AM, Avi Kivity a...@redhat.com wrote:
 On 09/05/2012 10:04 PM, Blue Swirl wrote:

 Reinventing a disassembler for ever growing x86 assembly is
 no fun.

 We can try linking to a disassembler library.  I use udis86 to
 disassemble instructions in kvm tracepoints
 (http://udis86.git.sourceforge.net/git/gitweb.cgi?p=udis86/udis86;a=shortlog),
 it's maintained but not heavily so.

I think commonality with KVM would be preferred. The library looks
neat and based on changelog, more actively developed than BSD DDB.


 Of course for non-x86 we'd need to continue using binutils; this is
 about copying code vs. libraries, not about licensing.

For most architectures, pre-GPLv3 binutils is good enough since the
instruction set does not change anymore. Maybe only PPC and Sparc64
still change besides x86. New CPUs types more recent than 2007 will
have problems.



 --
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-ppc] [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-08 Thread Blue Swirl

On Sat, Sep 8, 2012 at 9:28 AM, Alexander Graf ag...@suse.de wrote:


 On 08.09.2012, at 10:06, Blue Swirl blauwir...@gmail.com wrote:

 On Thu, Sep 6, 2012 at 8:44 AM, Avi Kivity a...@redhat.com wrote:
 On 09/05/2012 10:04 PM, Blue Swirl wrote:

 Reinventing a disassembler for ever growing x86 assembly is
 no fun.

 We can try linking to a disassembler library.  I use udis86 to
 disassemble instructions in kvm tracepoints
 (http://udis86.git.sourceforge.net/git/gitweb.cgi?p=udis86/udis86;a=shortlog),
 it's maintained but not heavily so.

 I think commonality with KVM would be preferred. The library looks
 neat and based on changelog, more actively developed than BSD DDB.


 Of course for non-x86 we'd need to continue using binutils; this is
 about copying code vs. libraries, not about licensing.

 For most architectures, pre-GPLv3 binutils is good enough since the
 instruction set does not change anymore. Maybe only PPC and Sparc64
 still change besides x86. New CPUs types more recent than 2007 will
 have problems.

 Alternatively we could try to run the disassembler in a different process, 
 right?

For qemu.log this would be doable and even improve performance since
only binary data would be transferred.

But for monitor disassembly command x/i it may be too clumsy. There's
some overlap with GDB support, so maybe we could deprecate monitor
disassembly.


 Alex




 --
 error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-ppc] [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-08 Thread Blue Swirl

On Sat, Sep 8, 2012 at 12:13 PM, Alexander Graf ag...@suse.de wrote:


 On 08.09.2012, at 12:16, Blue Swirl blauwir...@gmail.com wrote:

 On Sat, Sep 8, 2012 at 9:28 AM, Alexander Graf ag...@suse.de wrote:


 On 08.09.2012, at 10:06, Blue Swirl blauwir...@gmail.com wrote:

 On Thu, Sep 6, 2012 at 8:44 AM, Avi Kivity a...@redhat.com wrote:
 On 09/05/2012 10:04 PM, Blue Swirl wrote:

 Reinventing a disassembler for ever growing x86 assembly is
 no fun.

 We can try linking to a disassembler library.  I use udis86 to
 disassemble instructions in kvm tracepoints
 (http://udis86.git.sourceforge.net/git/gitweb.cgi?p=udis86/udis86;a=shortlog),
 it's maintained but not heavily so.

 I think commonality with KVM would be preferred. The library looks
 neat and based on changelog, more actively developed than BSD DDB.


 Of course for non-x86 we'd need to continue using binutils; this is
 about copying code vs. libraries, not about licensing.

 For most architectures, pre-GPLv3 binutils is good enough since the
 instruction set does not change anymore. Maybe only PPC and Sparc64
 still change besides x86. New CPUs types more recent than 2007 will
 have problems.

 Alternatively we could try to run the disassembler in a different process, 
 right?

 For qemu.log this would be doable and even improve performance since
 only binary data would be transferred.

 But for monitor disassembly command x/i it may be too clumsy.

 Why would it be clumsy? We'd have to make sure we are communicating 
 synchronously with the daemon, but apart from that it shouldn't be too 
 different from the log, no?

The log file should be written as binary which the disassembly tool
could read. The daemon would probably slow down execution back to
original speed since it would be writing ASCII, though this mode could
be supported too.


 There's
 some overlap with GDB support, so maybe we could deprecate monitor
 disassembly.

 I really like the way the monitor goes through special v-p lookup, as it's a 
 lot easier to debug...

GDB could be taught new tricks, but that would not help users with old dogs.


 Alex



 Alex




 --
 error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-05 Thread Blue Swirl

On Wed, Sep 5, 2012 at 3:41 PM, Anthony Liguori anth...@codemonkey.ws wrote:
 Avi Kivity a...@redhat.com writes:

 On 09/05/2012 12:00 AM, Anthony Liguori wrote:

 Why? The way this is being submitted I don't see why we should treat
 Jan's patch any different from a patch by IBM or Samsung where we've
 asked folks to fix the license to comply with what I thought was our new
 policy (it does not even contain a from-x-on-GPLv2+ notice).

 Asking is one thing.  Requiring is another.

 I would prefer that people submitted GPLv2+, but I don't think it should
 be a hard requirement.  It means, among other things, that we cannot
 accept most code that originates from the Linux kernel.

 We could extend this to require unless there is a reason to grant an
 exception if we wanted to (not saying I know whether we want to or
 not).

 I don't want QEMU to be GPLv3.  I don't like the terms of the GPLv3.

 I don't mind GPLv2+, if people want to share code from QEMU in GPLv3
 projects, GPLv2+ enables that.

The advantage of 100% GPLv2+ (or other GPLv3 compatible) would be that
QEMU could share code from GPLv3 projects, specifically latest
binutils. Reinventing a disassembler for ever growing x86 assembly is
no fun.


 But if new code is coming in and happens to be under GPLv2, that just
 means that the contribution cannot be used outside of QEMU in a GPLv3
 project.  That's fine and that's a decision for the submitter to make.

This policy means that we are locked in with GPLv2.


 Regards,

 Anthony Liguori



 --
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-05 Thread Blue Swirl

On Tue, Sep 4, 2012 at 9:28 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Tue, Sep 04, 2012 at 07:27:32PM +, Blue Swirl wrote:
 On Tue, Sep 4, 2012 at 8:32 AM, Avi Kivity a...@redhat.com wrote:
  On 09/03/2012 10:32 PM, Blue Swirl wrote:
  On Mon, Sep 3, 2012 at 4:14 PM, Avi Kivity a...@redhat.com wrote:
  On 08/29/2012 11:27 AM, Markus Armbruster wrote:
 
  I don't see a point in making contributors avoid non-problems that might
  conceivably become trivial problems some day.  Especially when there's
  no automated help with the avoiding.
 
  -Wpointer-arith
 
  +1
 
  FWIW, I'm not in favour of enabling it, just pointing out that it
  exists.  In general I prefer avoiding unnecessary use of extensions, but
  in this case the extension is trivial and improves readability.

 Void pointers are not so type safe as uint8_t pointers.

 casts are even worse.

 There's also
 little difference in readability between those in my opinion.

 here too, casts are worse for readability.

I agree they are bad in both accounts, but in most cases it is
possible to use different types consistently (like uint8_t * or char *
instead of void *) without adding casts.


 
 
  --
  error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-05 Thread Blue Swirl

On Wed, Sep 5, 2012 at 7:22 PM, Anthony Liguori anth...@codemonkey.ws wrote:
 Blue Swirl blauwir...@gmail.com writes:

 On Wed, Sep 5, 2012 at 3:41 PM, Anthony Liguori anth...@codemonkey.ws 
 wrote:
 Avi Kivity a...@redhat.com writes:

 On 09/05/2012 12:00 AM, Anthony Liguori wrote:

 Why? The way this is being submitted I don't see why we should treat
 Jan's patch any different from a patch by IBM or Samsung where we've
 asked folks to fix the license to comply with what I thought was our new
 policy (it does not even contain a from-x-on-GPLv2+ notice).

 Asking is one thing.  Requiring is another.

 I would prefer that people submitted GPLv2+, but I don't think it should
 be a hard requirement.  It means, among other things, that we cannot
 accept most code that originates from the Linux kernel.

 We could extend this to require unless there is a reason to grant an
 exception if we wanted to (not saying I know whether we want to or
 not).

 I don't want QEMU to be GPLv3.  I don't like the terms of the GPLv3.

 I don't mind GPLv2+, if people want to share code from QEMU in GPLv3
 projects, GPLv2+ enables that.

 The advantage of 100% GPLv2+ (or other GPLv3 compatible) would be that
 QEMU could share code from GPLv3 projects, specifically latest
 binutils. Reinventing a disassembler for ever growing x86 assembly is
 no fun.

 But we can't share code with Linux (like for virtio).

It's a tradeoff between reimplementing disassembler without using
binutils vs. reimplementing virtio without using Linux. Both have
their problems and both are growing areas. Disassembler is a bit
smaller and the basic function does not ever change.


 Yes, the GPLv3 sucks and FSF screwed up massively not making it v2
 compatible.

I sort of agree. They had their reasons, of course. Too bad binutils
licensing is fully controlled by FSF, for us it would be enough if
they had some sort of dual licensing scheme (GPLv3 + BSD for example)
in place.


 Regards,

 Anthony Liguori



 But if new code is coming in and happens to be under GPLv2, that just
 means that the contribution cannot be used outside of QEMU in a GPLv3
 project.  That's fine and that's a decision for the submitter to make.

 This policy means that we are locked in with GPLv2.


 Regards,

 Anthony Liguori



 --
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-05 Thread Blue Swirl

On Wed, Sep 5, 2012 at 7:24 PM, Eric Blake ebl...@redhat.com wrote:
 On 09/05/2012 01:04 PM, Blue Swirl wrote:
 I don't mind GPLv2+, if people want to share code from QEMU in GPLv3
 projects, GPLv2+ enables that.

 The advantage of 100% GPLv2+ (or other GPLv3 compatible) would be that
 QEMU could share code from GPLv3 projects, specifically latest
 binutils. Reinventing a disassembler for ever growing x86 assembly is
 no fun.

 Not quite right.

 If qemu is 100% GPLv2+ and binutils is GPLv3+, then binutils can borrow
 code from qemu and the result is that binutils is still GPLv3+; but in
 the converse direction, if qemu borrows code from binutils then qemu is
 no longer 100% GPLv2+ but becomes GPLv3+ by tainting.

I don't see how this disagrees with what I wrote. GPLv2+ QEMU sharing
code from GPLv3 would of course become GPLv3.


 That is, requesting GPLv2+ allows qemu code to be reused elsewhere, but
 does not help qemu import external code that is not already GPLv2+.

Unless we demanded relicensing to GPLv2+ for all GPLv2 QEMU code and
forbid new GPLv2 entries.




 But if new code is coming in and happens to be under GPLv2, that just
 means that the contribution cannot be used outside of QEMU in a GPLv3
 project.  That's fine and that's a decision for the submitter to make.

 This policy means that we are locked in with GPLv2.

 I'm afraid we're already locked at GPLv2 (and not GPLv2+), for good or
 for bad.

 --
 Eric Blake   ebl...@redhat.com+1-919-301-3266
 Libvirt virtualization library http://libvirt.org

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-04 Thread Blue Swirl

On Tue, Sep 4, 2012 at 8:32 AM, Avi Kivity a...@redhat.com wrote:
 On 09/03/2012 10:32 PM, Blue Swirl wrote:
 On Mon, Sep 3, 2012 at 4:14 PM, Avi Kivity a...@redhat.com wrote:
 On 08/29/2012 11:27 AM, Markus Armbruster wrote:

 I don't see a point in making contributors avoid non-problems that might
 conceivably become trivial problems some day.  Especially when there's
 no automated help with the avoiding.

 -Wpointer-arith

 +1

 FWIW, I'm not in favour of enabling it, just pointing out that it
 exists.  In general I prefer avoiding unnecessary use of extensions, but
 in this case the extension is trivial and improves readability.

Void pointers are not so type safe as uint8_t pointers. There's also
little difference in readability between those in my opinion.



 --
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-03 Thread Blue Swirl

On Mon, Sep 3, 2012 at 4:14 PM, Avi Kivity a...@redhat.com wrote:
 On 08/29/2012 11:27 AM, Markus Armbruster wrote:

 I don't see a point in making contributors avoid non-problems that might
 conceivably become trivial problems some day.  Especially when there's
 no automated help with the avoiding.

 -Wpointer-arith

+1




 --
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-09-01 Thread Blue Swirl

On Tue, Aug 28, 2012 at 9:51 PM, Anthony Liguori anth...@codemonkey.ws wrote:
 Blue Swirl blauwir...@gmail.com writes:

 On Tue, Aug 28, 2012 at 7:31 PM, Anthony Liguori anth...@codemonkey.ws 
 wrote:
 Blue Swirl blauwir...@gmail.com writes:

 On Tue, Aug 28, 2012 at 5:28 PM, Michael S. Tsirkin m...@redhat.com 
 wrote:
 On Tue, Aug 28, 2012 at 05:01:55PM +, Blue Swirl wrote:
 On Tue, Aug 28, 2012 at 7:35 AM, Michael Tokarev m...@tls.msk.ru wrote:
  On 27.08.2012 22:56, Blue Swirl wrote:
  []
  +static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t 
  addr)
  +{
  +AssignedDevRegion *d = opaque;
  +uint8_t *in = d-u.r_virtbase + addr;
 
  Don't perform arithmetic with void pointers.
 
  There are a few places in common qemu code which does this for a very
  long time.  So I guess it is safe now.

 It's a non-standard GCC extension.

 So?  We use many other GCC extensions. grep for typeof.

 Dependencies should not be introduced trivially. In this case, it's
 pretty easy to avoid void pointer arithmetic as Jan's next version
 shows.

 The standard is vague with respect void arithmetic.  Most compilers
 allow it.  A very good analysis of the standard can be found below.

 http://stackoverflow.com/questions/3523145/pointer-arithmetic-for-void-pointer-in-c

 The analysis would seem to show that arithmetic may be acceptable, but
 it doesn't say that void pointers must be treated like char pointers.
 In my view, this would make sense:

 char *cptr;
 void *vptr;

 Since
 cptr++;
 is equivalent to
 cptr = (char *)((uintptr_t)cptr + sizeof(*cptr));

 therefore

 vptr++;
 should be equivalent to
 vptr = (void *)((uintptr_t)vptr + sizeof(*vptr));
 That is, vptr++ should be equivalent to vptr += 0 because sizeof(void)
 should be 0 if allowed.

 sizeof(void) == 1

 With GCC at least.

It's not valid C (0 is just how I think it should be if allowed). Also
GCC can reject it even with std=gnu89 (default, C89 with GNU
extensions):
$ cat void.c
unsigned long x = sizeof(void);
$ gcc -pedantic void.c -c
void.c:1: warning: invalid application of 'sizeof' to a void type

 Regards,

 Anthony Liguori


 Regards,

 Anthony Liguori



 Is there a work in progress to build GCC with visual studio?
 If yes what are the chances KVM device assignment
 will work on windows?

 IIRC there was really a project to use KVM on Windows and another
 project to build QEMU with MSVC.


 Look QEMU codebase is what it is. Unless you rework all existing
 code to confirm to your taste, I do not see why you NACK valid new code
 unless it confirms to same.

 Yes, I'd be happy to fix the style with huge patches at once. But our
 fearless leader does not agree, so we are stuck with the codebase
 being what it is until it is fixed one step at a time.


 
  /mjt
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-08-28 Thread Blue Swirl

On Tue, Aug 28, 2012 at 7:35 AM, Michael Tokarev m...@tls.msk.ru wrote:
 On 27.08.2012 22:56, Blue Swirl wrote:
 []
 +static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
 +{
 +AssignedDevRegion *d = opaque;
 +uint8_t *in = d-u.r_virtbase + addr;

 Don't perform arithmetic with void pointers.

 There are a few places in common qemu code which does this for a very
 long time.  So I guess it is safe now.

It's a non-standard GCC extension.


 /mjt
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCHv3 3/4] cpuid: disable pv eoi for 1.1 and older compat types

2012-08-28 Thread Blue Swirl

On Tue, Aug 28, 2012 at 1:22 PM, Michael S. Tsirkin m...@redhat.com wrote:
 In preparation for adding PV EOI support, disable PV EOI by default for
 1.1 and older machine types, to avoid CPUID changing during migration.

 PV EOI can still be enabled/disabled by specifying it explicitly.
 Enable for 1.1
 -M pc-1.1 -cpu kvm64,+kvm_pv_eoi
 Disable for 1.2
 -M pc-1.2 -cpu kvm64,-kvm_pv_eoi

 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 ---
  hw/Makefile.objs  |  2 +-
  hw/cpu_flags.c| 32 
  hw/cpu_flags.h|  9 +
  hw/pc_piix.c  |  2 ++
  target-i386/cpu.c |  8 
  5 files changed, 52 insertions(+), 1 deletion(-)
  create mode 100644 hw/cpu_flags.c
  create mode 100644 hw/cpu_flags.h

 diff --git a/hw/Makefile.objs b/hw/Makefile.objs
 index 850b87b..3f2532a 100644
 --- a/hw/Makefile.objs
 +++ b/hw/Makefile.objs
 @@ -1,5 +1,5 @@
  hw-obj-y = usb/ ide/
 -hw-obj-y += loader.o
 +hw-obj-y += loader.o cpu_flags.o
  hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
  hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
  hw-obj-y += fw_cfg.o
 diff --git a/hw/cpu_flags.c b/hw/cpu_flags.c
 new file mode 100644
 index 000..d821d8c
 --- /dev/null
 +++ b/hw/cpu_flags.c
 @@ -0,0 +1,32 @@
 +/*
 + * CPU compatibility flags.
 + *
 + * Copyright (c) 2012 Red Hat Inc.
 + * Author: Michael S. Tsirkin.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with this program; if not, see http://www.gnu.org/licenses/.
 + */
 +#include hw/cpu_flags.h
 +
 +static bool _kvm_pv_eoi_disabled;

NACK. I find your lack of compliance disturbing.

 +
 +void disable_kvm_pv_eoi(void)
 +{
 +   _kvm_pv_eoi_disabled = true;
 +}
 +
 +bool kvm_pv_eoi_disabled(void)
 +{
 +   return _kvm_pv_eoi_disabled;
 +}
 diff --git a/hw/cpu_flags.h b/hw/cpu_flags.h
 new file mode 100644
 index 000..05777b6
 --- /dev/null
 +++ b/hw/cpu_flags.h
 @@ -0,0 +1,9 @@
 +#ifndef HW_CPU_FLAGS_H
 +#define HW_CPU_FLAGS_H
 +
 +#include stdbool.h
 +
 +void disable_kvm_pv_eoi(void);
 +bool kvm_pv_eoi_disabled(void);
 +
 +#endif
 diff --git a/hw/pc_piix.c b/hw/pc_piix.c
 index 008d42f..bdbceda 100644
 --- a/hw/pc_piix.c
 +++ b/hw/pc_piix.c
 @@ -46,6 +46,7 @@
  #ifdef CONFIG_XEN
  #  include xen/hvm/hvm_info_table.h
  #endif
 +#include cpu_flags.h

  #define MAX_IDE_BUS 2

 @@ -371,6 +372,7 @@ static QEMUMachine pc_machine_v1_2 = {

  static void pc_machine_v1_1_compat(void)
  {
 +disable_kvm_pv_eoi();
  }

  static void pc_init_pci_v1_1(ram_addr_t ram_size,
 diff --git a/target-i386/cpu.c b/target-i386/cpu.c
 index 120a2e3..0d02fd1 100644
 --- a/target-i386/cpu.c
 +++ b/target-i386/cpu.c
 @@ -23,6 +23,7 @@

  #include cpu.h
  #include kvm.h
 +#include asm/kvm_para.h

  #include qemu-option.h
  #include qemu-config.h
 @@ -33,6 +34,7 @@
  #include hyperv.h

  #include hw/hw.h
 +#include hw/cpu_flags.h

  /* feature flags taken from Intel Processor Identification and the CPUID
   * Instruction and AMD's CPUID Specification.  In cases of disagreement
 @@ -889,6 +891,12 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, 
 const char *cpu_model)

  plus_kvm_features = ~0; /* not supported bits will be filtered out later 
 */

 +/* Disable PV EOI for old machine types.
 + * Feature flags can still override. */
 +if (kvm_pv_eoi_disabled()) {
 +plus_kvm_features = ~(0x1  KVM_FEATURE_PV_EOI);
 +}
 +
  add_flagname_to_bitmaps(hypervisor, plus_features,
  plus_ext_features, plus_ext2_features, plus_ext3_features,
  plus_kvm_features, plus_svm_features);
 --
 MST


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCHv3 3/4] cpuid: disable pv eoi for 1.1 and older compat types

2012-08-28 Thread Blue Swirl

On Tue, Aug 28, 2012 at 5:22 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Tue, Aug 28, 2012 at 05:05:25PM +, Blue Swirl wrote:
  +static bool _kvm_pv_eoi_disabled;

 NACK. I find your lack of compliance disturbing.

 Compliance with what? Could you please add some
 motivation for the NACK?

You did not respect my review comments.


 --
 MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-08-28 Thread Blue Swirl

On Tue, Aug 28, 2012 at 5:28 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Tue, Aug 28, 2012 at 05:01:55PM +, Blue Swirl wrote:
 On Tue, Aug 28, 2012 at 7:35 AM, Michael Tokarev m...@tls.msk.ru wrote:
  On 27.08.2012 22:56, Blue Swirl wrote:
  []
  +static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
  +{
  +AssignedDevRegion *d = opaque;
  +uint8_t *in = d-u.r_virtbase + addr;
 
  Don't perform arithmetic with void pointers.
 
  There are a few places in common qemu code which does this for a very
  long time.  So I guess it is safe now.

 It's a non-standard GCC extension.

 So?  We use many other GCC extensions. grep for typeof.

Dependencies should not be introduced trivially. In this case, it's
pretty easy to avoid void pointer arithmetic as Jan's next version shows.


 Is there a work in progress to build GCC with visual studio?
 If yes what are the chances KVM device assignment
 will work on windows?

IIRC there was really a project to use KVM on Windows and another
project to build QEMU with MSVC.


 Look QEMU codebase is what it is. Unless you rework all existing
 code to confirm to your taste, I do not see why you NACK valid new code
 unless it confirms to same.

Yes, I'd be happy to fix the style with huge patches at once. But our
fearless leader does not agree, so we are stuck with the codebase
being what it is until it is fixed one step at a time.


 
  /mjt
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-08-28 Thread Blue Swirl

On Tue, Aug 28, 2012 at 7:31 PM, Anthony Liguori anth...@codemonkey.ws wrote:
 Blue Swirl blauwir...@gmail.com writes:

 On Tue, Aug 28, 2012 at 5:28 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Tue, Aug 28, 2012 at 05:01:55PM +, Blue Swirl wrote:
 On Tue, Aug 28, 2012 at 7:35 AM, Michael Tokarev m...@tls.msk.ru wrote:
  On 27.08.2012 22:56, Blue Swirl wrote:
  []
  +static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
  +{
  +AssignedDevRegion *d = opaque;
  +uint8_t *in = d-u.r_virtbase + addr;
 
  Don't perform arithmetic with void pointers.
 
  There are a few places in common qemu code which does this for a very
  long time.  So I guess it is safe now.

 It's a non-standard GCC extension.

 So?  We use many other GCC extensions. grep for typeof.

 Dependencies should not be introduced trivially. In this case, it's
 pretty easy to avoid void pointer arithmetic as Jan's next version
 shows.

 The standard is vague with respect void arithmetic.  Most compilers
 allow it.  A very good analysis of the standard can be found below.

 http://stackoverflow.com/questions/3523145/pointer-arithmetic-for-void-pointer-in-c

The analysis would seem to show that arithmetic may be acceptable, but
it doesn't say that void pointers must be treated like char pointers.
In my view, this would make sense:

char *cptr;
void *vptr;

Since
cptr++;
is equivalent to
cptr = (char *)((uintptr_t)cptr + sizeof(*cptr));

therefore

vptr++;
should be equivalent to
vptr = (void *)((uintptr_t)vptr + sizeof(*vptr));

That is, vptr++ should be equivalent to vptr += 0 because sizeof(void)
should be 0 if allowed.


 BTW: can we please stop arguing about C standards.  If we currently are
 using something in QEMU that's supported by clang and GCC, it's fine and
 we ought to continue using it.

 The reserved names actually did bite us when porting to a new platform.
 But the only requirement for C extensions ought to be reasonable support
 in GCC and clang.

 I don't care at all about supporting proprietary compilers.

We also don't have crowds banging doors with their money bags with a
need for such support.


 Regards,

 Anthony Liguori



 Is there a work in progress to build GCC with visual studio?
 If yes what are the chances KVM device assignment
 will work on windows?

 IIRC there was really a project to use KVM on Windows and another
 project to build QEMU with MSVC.


 Look QEMU codebase is what it is. Unless you rework all existing
 code to confirm to your taste, I do not see why you NACK valid new code
 unless it confirms to same.

 Yes, I'd be happy to fix the style with huge patches at once. But our
 fearless leader does not agree, so we are stuck with the codebase
 being what it is until it is fixed one step at a time.


 
  /mjt
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCHv2 3/4] cpuid: disable pv eoi for 1.1 and older compat types

2012-08-27 Thread Blue Swirl

On Mon, Aug 27, 2012 at 12:20 PM, Michael S. Tsirkin m...@redhat.com wrote:
 In preparation for adding PV EOI support, disable PV EOI by default for
 1.1 and older machine types, to avoid CPUID changing during migration.

 PV EOI can still be enabled/disabled by specifying it explicitly.
 Enable for 1.1
 -M pc-1.1 -cpu kvm64,+kvm_pv_eoi
 Disable for 1.2
 -M pc-1.2 -cpu kvm64,-kvm_pv_eoi

 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 ---
  hw/Makefile.objs  |  2 +-
  hw/cpu_flags.c| 32 
  hw/cpu_flags.h|  9 +
  hw/pc_piix.c  |  2 ++
  target-i386/cpu.c |  8 
  5 files changed, 52 insertions(+), 1 deletion(-)
  create mode 100644 hw/cpu_flags.c
  create mode 100644 hw/cpu_flags.h

 diff --git a/hw/Makefile.objs b/hw/Makefile.objs
 index 850b87b..3f2532a 100644
 --- a/hw/Makefile.objs
 +++ b/hw/Makefile.objs
 @@ -1,5 +1,5 @@
  hw-obj-y = usb/ ide/
 -hw-obj-y += loader.o
 +hw-obj-y += loader.o cpu_flags.o
  hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
  hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
  hw-obj-y += fw_cfg.o
 diff --git a/hw/cpu_flags.c b/hw/cpu_flags.c
 new file mode 100644
 index 000..2422d20
 --- /dev/null
 +++ b/hw/cpu_flags.c
 @@ -0,0 +1,32 @@
 +/*
 + * CPU compatibility flags.
 + *
 + * Copyright (c) 2012 Red Hat Inc.
 + * Author: Michael S. Tsirkin.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with this program; if not, see http://www.gnu.org/licenses/.
 + */
 +#include hw/cpu_flags.h
 +
 +static bool __kvm_pv_eoi_disabled;

Don't use identifiers with leading underscores.

 +
 +void disable_kvm_pv_eoi(void)
 +{
 +   __kvm_pv_eoi_disabled = true;
 +}
 +
 +bool kvm_pv_eoi_disabled(void)
 +{
 +   return __kvm_pv_eoi_disabled;
 +}
 diff --git a/hw/cpu_flags.h b/hw/cpu_flags.h
 new file mode 100644
 index 000..05777b6
 --- /dev/null
 +++ b/hw/cpu_flags.h
 @@ -0,0 +1,9 @@
 +#ifndef HW_CPU_FLAGS_H
 +#define HW_CPU_FLAGS_H
 +
 +#include stdbool.h
 +
 +void disable_kvm_pv_eoi(void);
 +bool kvm_pv_eoi_disabled(void);
 +
 +#endif
 diff --git a/hw/pc_piix.c b/hw/pc_piix.c
 index 008d42f..bdbceda 100644
 --- a/hw/pc_piix.c
 +++ b/hw/pc_piix.c
 @@ -46,6 +46,7 @@
  #ifdef CONFIG_XEN
  #  include xen/hvm/hvm_info_table.h
  #endif
 +#include cpu_flags.h

  #define MAX_IDE_BUS 2

 @@ -371,6 +372,7 @@ static QEMUMachine pc_machine_v1_2 = {

  static void pc_machine_v1_1_compat(void)
  {
 +disable_kvm_pv_eoi();
  }

  static void pc_init_pci_v1_1(ram_addr_t ram_size,
 diff --git a/target-i386/cpu.c b/target-i386/cpu.c
 index 120a2e3..0d02fd1 100644
 --- a/target-i386/cpu.c
 +++ b/target-i386/cpu.c
 @@ -23,6 +23,7 @@

  #include cpu.h
  #include kvm.h
 +#include asm/kvm_para.h

  #include qemu-option.h
  #include qemu-config.h
 @@ -33,6 +34,7 @@
  #include hyperv.h

  #include hw/hw.h
 +#include hw/cpu_flags.h

  /* feature flags taken from Intel Processor Identification and the CPUID
   * Instruction and AMD's CPUID Specification.  In cases of disagreement
 @@ -889,6 +891,12 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, 
 const char *cpu_model)

  plus_kvm_features = ~0; /* not supported bits will be filtered out later 
 */

 +/* Disable PV EOI for old machine types.
 + * Feature flags can still override. */
 +if (kvm_pv_eoi_disabled()) {
 +plus_kvm_features = ~(0x1  KVM_FEATURE_PV_EOI);
 +}
 +
  add_flagname_to_bitmaps(hypervisor, plus_features,
  plus_ext_features, plus_ext2_features, plus_ext3_features,
  plus_kvm_features, plus_svm_features);
 --
 MST


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 4/4] kvm: i386: Add classic PCI device assignment

2012-08-27 Thread Blue Swirl

On Mon, Aug 27, 2012 at 7:01 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Mon, Aug 27, 2012 at 06:56:38PM +, Blue Swirl wrote:
  +static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
  +{
  +AssignedDevRegion *d = opaque;
  +uint8_t *in = d-u.r_virtbase + addr;

 Don't perform arithmetic with void pointers.

 Why not?
 We require gcc and it's a documented extension there.

We don't require GCC, Clang can be used for some targets already.
Though it supports this non-standard extension too.

It's a bad idea to introduce dependencies where it's not necessary.

In this case it's not much effort to add the identifier for the struct
and in fact the only benefit ever is that the lazy coder saves a few
key presses.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCHv2 3/4] cpuid: disable pv eoi for 1.1 and older compat types

2012-08-27 Thread Blue Swirl

On Mon, Aug 27, 2012 at 7:06 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Mon, Aug 27, 2012 at 06:58:29PM +, Blue Swirl wrote:
 On Mon, Aug 27, 2012 at 12:20 PM, Michael S. Tsirkin m...@redhat.com wrote:
  In preparation for adding PV EOI support, disable PV EOI by default for
  1.1 and older machine types, to avoid CPUID changing during migration.
 
  PV EOI can still be enabled/disabled by specifying it explicitly.
  Enable for 1.1
  -M pc-1.1 -cpu kvm64,+kvm_pv_eoi
  Disable for 1.2
  -M pc-1.2 -cpu kvm64,-kvm_pv_eoi
 
  Signed-off-by: Michael S. Tsirkin m...@redhat.com
  ---
   hw/Makefile.objs  |  2 +-
   hw/cpu_flags.c| 32 
   hw/cpu_flags.h|  9 +
   hw/pc_piix.c  |  2 ++
   target-i386/cpu.c |  8 
   5 files changed, 52 insertions(+), 1 deletion(-)
   create mode 100644 hw/cpu_flags.c
   create mode 100644 hw/cpu_flags.h
 
  diff --git a/hw/Makefile.objs b/hw/Makefile.objs
  index 850b87b..3f2532a 100644
  --- a/hw/Makefile.objs
  +++ b/hw/Makefile.objs
  @@ -1,5 +1,5 @@
   hw-obj-y = usb/ ide/
  -hw-obj-y += loader.o
  +hw-obj-y += loader.o cpu_flags.o
   hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
   hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
   hw-obj-y += fw_cfg.o
  diff --git a/hw/cpu_flags.c b/hw/cpu_flags.c
  new file mode 100644
  index 000..2422d20
  --- /dev/null
  +++ b/hw/cpu_flags.c
  @@ -0,0 +1,32 @@
  +/*
  + * CPU compatibility flags.
  + *
  + * Copyright (c) 2012 Red Hat Inc.
  + * Author: Michael S. Tsirkin.
  + *
  + * This program is free software; you can redistribute it and/or modify
  + * it under the terms of the GNU General Public License as published by
  + * the Free Software Foundation; either version 2 of the License, or
  + * (at your option) any later version.
  + *
  + * This program is distributed in the hope that it will be useful,
  + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  + * GNU General Public License for more details.
  + *
  + * You should have received a copy of the GNU General Public License along
  + * with this program; if not, see http://www.gnu.org/licenses/.
  + */
  +#include hw/cpu_flags.h
  +
  +static bool __kvm_pv_eoi_disabled;

 Don't use identifiers with leading underscores.

 C99 spec says 
 Any other predefined macro names
 shall begin with a leading underscore followed by an uppercase letter or
 a second underscore.
 

 what are chances of compiler predefining macro __kvm_pv_eoi_disabled?

Why do you even consider that since it's trivially easy to use
something else? If a standard (and HACKING in our case) specifies
something, why do you want to fight it?


 But OK, will rename _kvm_pv_eoi_disabled.
 _ + lower case is guaranteed OK.

No, just use kvm_pv_eoi_disabled, the underscore is useless.



  +
  +void disable_kvm_pv_eoi(void)
  +{
  +   __kvm_pv_eoi_disabled = true;
  +}
  +
  +bool kvm_pv_eoi_disabled(void)
  +{
  +   return __kvm_pv_eoi_disabled;
  +}
  diff --git a/hw/cpu_flags.h b/hw/cpu_flags.h
  new file mode 100644
  index 000..05777b6
  --- /dev/null
  +++ b/hw/cpu_flags.h
  @@ -0,0 +1,9 @@
  +#ifndef HW_CPU_FLAGS_H
  +#define HW_CPU_FLAGS_H
  +
  +#include stdbool.h
  +
  +void disable_kvm_pv_eoi(void);
  +bool kvm_pv_eoi_disabled(void);
  +
  +#endif
  diff --git a/hw/pc_piix.c b/hw/pc_piix.c
  index 008d42f..bdbceda 100644
  --- a/hw/pc_piix.c
  +++ b/hw/pc_piix.c
  @@ -46,6 +46,7 @@
   #ifdef CONFIG_XEN
   #  include xen/hvm/hvm_info_table.h
   #endif
  +#include cpu_flags.h
 
   #define MAX_IDE_BUS 2
 
  @@ -371,6 +372,7 @@ static QEMUMachine pc_machine_v1_2 = {
 
   static void pc_machine_v1_1_compat(void)
   {
  +disable_kvm_pv_eoi();
   }
 
   static void pc_init_pci_v1_1(ram_addr_t ram_size,
  diff --git a/target-i386/cpu.c b/target-i386/cpu.c
  index 120a2e3..0d02fd1 100644
  --- a/target-i386/cpu.c
  +++ b/target-i386/cpu.c
  @@ -23,6 +23,7 @@
 
   #include cpu.h
   #include kvm.h
  +#include asm/kvm_para.h
 
   #include qemu-option.h
   #include qemu-config.h
  @@ -33,6 +34,7 @@
   #include hyperv.h
 
   #include hw/hw.h
  +#include hw/cpu_flags.h
 
   /* feature flags taken from Intel Processor Identification and the CPUID
* Instruction and AMD's CPUID Specification.  In cases of disagreement
  @@ -889,6 +891,12 @@ static int cpu_x86_find_by_name(x86_def_t 
  *x86_cpu_def, const char *cpu_model)
 
   plus_kvm_features = ~0; /* not supported bits will be filtered out 
  later */
 
  +/* Disable PV EOI for old machine types.
  + * Feature flags can still override. */
  +if (kvm_pv_eoi_disabled()) {
  +plus_kvm_features = ~(0x1  KVM_FEATURE_PV_EOI);
  +}
  +
   add_flagname_to_bitmaps(hypervisor, plus_features,
   plus_ext_features, plus_ext2_features, plus_ext3_features,
   plus_kvm_features, plus_svm_features);
  --
  MST
 
 
--
To unsubscribe from this list: send

Re: [Qemu-devel] [PATCHv2 3/4] cpuid: disable pv eoi for 1.1 and older compat types

2012-08-27 Thread Blue Swirl

On Mon, Aug 27, 2012 at 7:24 PM, Michael S. Tsirkin m...@redhat.com wrote:
 On Mon, Aug 27, 2012 at 07:12:27PM +, Blue Swirl wrote:
 On Mon, Aug 27, 2012 at 7:06 PM, Michael S. Tsirkin m...@redhat.com wrote:
  On Mon, Aug 27, 2012 at 06:58:29PM +, Blue Swirl wrote:
  On Mon, Aug 27, 2012 at 12:20 PM, Michael S. Tsirkin m...@redhat.com 
  wrote:
   In preparation for adding PV EOI support, disable PV EOI by default for
   1.1 and older machine types, to avoid CPUID changing during migration.
  
   PV EOI can still be enabled/disabled by specifying it explicitly.
   Enable for 1.1
   -M pc-1.1 -cpu kvm64,+kvm_pv_eoi
   Disable for 1.2
   -M pc-1.2 -cpu kvm64,-kvm_pv_eoi
  
   Signed-off-by: Michael S. Tsirkin m...@redhat.com
   ---
hw/Makefile.objs  |  2 +-
hw/cpu_flags.c| 32 
hw/cpu_flags.h|  9 +
hw/pc_piix.c  |  2 ++
target-i386/cpu.c |  8 
5 files changed, 52 insertions(+), 1 deletion(-)
create mode 100644 hw/cpu_flags.c
create mode 100644 hw/cpu_flags.h
  
   diff --git a/hw/Makefile.objs b/hw/Makefile.objs
   index 850b87b..3f2532a 100644
   --- a/hw/Makefile.objs
   +++ b/hw/Makefile.objs
   @@ -1,5 +1,5 @@
hw-obj-y = usb/ ide/
   -hw-obj-y += loader.o
   +hw-obj-y += loader.o cpu_flags.o
hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
hw-obj-y += fw_cfg.o
   diff --git a/hw/cpu_flags.c b/hw/cpu_flags.c
   new file mode 100644
   index 000..2422d20
   --- /dev/null
   +++ b/hw/cpu_flags.c
   @@ -0,0 +1,32 @@
   +/*
   + * CPU compatibility flags.
   + *
   + * Copyright (c) 2012 Red Hat Inc.
   + * Author: Michael S. Tsirkin.
   + *
   + * This program is free software; you can redistribute it and/or modify
   + * it under the terms of the GNU General Public License as published by
   + * the Free Software Foundation; either version 2 of the License, or
   + * (at your option) any later version.
   + *
   + * This program is distributed in the hope that it will be useful,
   + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   + * GNU General Public License for more details.
   + *
   + * You should have received a copy of the GNU General Public License 
   along
   + * with this program; if not, see http://www.gnu.org/licenses/.
   + */
   +#include hw/cpu_flags.h
   +
   +static bool __kvm_pv_eoi_disabled;
 
  Don't use identifiers with leading underscores.
 
  C99 spec says 
  Any other predefined macro names
  shall begin with a leading underscore followed by an uppercase letter or
  a second underscore.
  
 
  what are chances of compiler predefining macro __kvm_pv_eoi_disabled?

 Why do you even consider that since it's trivially easy to use
 something else? If a standard (and HACKING in our case) specifies
 something, why do you want to fight it?

 I missed this in HACKING, you are right:

 2.4. Reserved namespaces in C and POSIX
 Underscore capital, double underscore, and underscore 't' suffixes
 should be avoided.

 so _kvm_pv_eoi_disabled is ok __kvm_pv_eoi_disabled is not.
 Will fix.

No leading underscores. They are not used in QEMU.


 
  But OK, will rename _kvm_pv_eoi_disabled.
  _ + lower case is guaranteed OK.

 No, just use kvm_pv_eoi_disabled, the underscore is useless.

 It isn't useless, this avoids conflict with function name.
 _ says it's an internal variable used to implement kvm_pv_eoi_disabled
 in a very clear way.

Sure, but there are infinite number of ways of making the identifiers
unique. Using leading underscores is a way to ever conflict with
compiler, linker,  libc, POSIX etc. Don't do it.

Where's your imagination, can't you invent any other prefix or suffix?


 --
 MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v8 5/6] introduce a new qom device to deal with panicked event

2012-08-25 Thread Blue Swirl

On Wed, Aug 22, 2012 at 7:30 AM, Wen Congyang we...@cn.fujitsu.com wrote:
 At 08/09/2012 03:01 AM, Blue Swirl Wrote:
 On Wed, Aug 8, 2012 at 2:47 AM, Wen Congyang we...@cn.fujitsu.com wrote:
 If the target is x86/x86_64, the guest's kernel will write 0x01 to the
 port KVM_PV_EVENT_PORT when it is panciked. This patch introduces a new
 qom device kvm_pv_ioport to listen this I/O port, and deal with panicked
 event according to panicked_action's value. The possible actions are:
 1. emit QEVENT_GUEST_PANICKED only
 2. emit QEVENT_GUEST_PANICKED and pause the guest
 3. emit QEVENT_GUEST_PANICKED and poweroff the guest
 4. emit QEVENT_GUEST_PANICKED and reset the guest

 I/O ports does not work for some targets(for example: s390). And you
 can implement another qom device, and include it's code into pv_event.c
 for such target.

 Note: if we emit QEVENT_GUEST_PANICKED only, and the management
 application does not receive this event(the management may not
 run when the event is emitted), the management won't know the
 guest is panicked.

 Signed-off-by: Wen Congyang we...@cn.fujitsu.com
 ---
  hw/kvm/Makefile.objs |2 +-
  hw/kvm/pv_event.c|  109 
 ++
  hw/kvm/pv_ioport.c   |   93 ++
  hw/pc_piix.c |9 
  kvm.h|2 +
  5 files changed, 214 insertions(+), 1 deletions(-)
  create mode 100644 hw/kvm/pv_event.c
  create mode 100644 hw/kvm/pv_ioport.c

 diff --git a/hw/kvm/Makefile.objs b/hw/kvm/Makefile.objs
 index 226497a..23e3b30 100644
 --- a/hw/kvm/Makefile.objs
 +++ b/hw/kvm/Makefile.objs
 @@ -1 +1 @@
 -obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o
 +obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o pv_event.o
 diff --git a/hw/kvm/pv_event.c b/hw/kvm/pv_event.c
 new file mode 100644
 index 000..8897237
 --- /dev/null
 +++ b/hw/kvm/pv_event.c
 @@ -0,0 +1,109 @@
 +/*
 + * QEMU KVM support, paravirtual event device
 + *
 + * Copyright Fujitsu, Corp. 2012
 + *
 + * Authors:
 + * Wen Congyang we...@cn.fujitsu.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or 
 later.
 + * See the COPYING file in the top-level directory.
 + *
 + */
 +
 +#include linux/kvm_para.h
 +#include asm/kvm_para.h
 +#include qobject.h
 +#include qjson.h
 +#include monitor.h
 +#include sysemu.h
 +#include kvm.h
 +
 +/* Possible values for action parameter. */
 +#define PANICKED_REPORT 1   /* emit QEVENT_GUEST_PANICKED only */
 +#define PANICKED_PAUSE  2   /* emit QEVENT_GUEST_PANICKED and pause VM 
 */
 +#define PANICKED_POWEROFF   3   /* emit QEVENT_GUEST_PANICKED and quit VM 
 */
 +#define PANICKED_RESET  4   /* emit QEVENT_GUEST_PANICKED and reset VM 
 */
 +
 +#define PV_EVENT_DRIVER kvm_pv_event
 +
 +struct pv_event_action {

 PVEventAction

 +char *panicked_action;
 +int panicked_action_value;
 +};
 +
 +#define DEFINE_PV_EVENT_PROPERTIES(_state, _conf)   \
 +DEFINE_PROP_STRING(panicked_action, _state, _conf.panicked_action)
 +
 +static void panicked_mon_event(const char *action)
 +{
 +QObject *data;
 +
 +data = qobject_from_jsonf({ 'action': %s }, action);
 +monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
 +qobject_decref(data);
 +}
 +
 +static void panicked_perform_action(uint32_t panicked_action)
 +{
 +switch (panicked_action) {
 +case PANICKED_REPORT:
 +panicked_mon_event(report);
 +break;
 +
 +case PANICKED_PAUSE:
 +panicked_mon_event(pause);
 +vm_stop(RUN_STATE_GUEST_PANICKED);
 +break;
 +
 +case PANICKED_POWEROFF:
 +panicked_mon_event(poweroff);
 +qemu_system_shutdown_request();
 +break;

 Misses a line break unlike other cases.

 +case PANICKED_RESET:
 +panicked_mon_event(reset);
 +qemu_system_reset_request();
 +break;
 +}
 +}
 +
 +static uint64_t supported_event(void)
 +{
 +return 1  KVM_PV_FEATURE_PANICKED;
 +}
 +
 +static void handle_event(int event, struct pv_event_action *conf)
 +{
 +if (event == KVM_PV_EVENT_PANICKED) {
 +panicked_perform_action(conf-panicked_action_value);
 +}
 +}
 +
 +static int pv_event_init(struct pv_event_action *conf)
 +{
 +if (!conf-panicked_action) {
 +conf-panicked_action_value = PANICKED_REPORT;
 +} else if (strcasecmp(conf-panicked_action, none) == 0) {
 +conf-panicked_action_value = PANICKED_REPORT;
 +} else if (strcasecmp(conf-panicked_action, pause) == 0) {
 +conf-panicked_action_value = PANICKED_PAUSE;
 +} else if (strcasecmp(conf-panicked_action, poweroff) == 0) {
 +conf-panicked_action_value = PANICKED_POWEROFF;
 +} else if (strcasecmp(conf-panicked_action, reset) == 0) {
 +conf-panicked_action_value = PANICKED_RESET;
 +} else {
 +return -1;
 +}
 +
 +return 0;
 +}
 +
 +#if defined(KVM_PV_EVENT_PORT)
 +
 +#include pv_ioport.c

 I'd rather

Re: [Qemu-devel] [RFC-v2 1/6] msix: Work-around for vhost-scsi with KVM in-kernel MSI injection

2012-08-13 Thread Blue Swirl

On Mon, Aug 13, 2012 at 8:35 AM, Nicholas A. Bellinger
n...@linux-iscsi.org wrote:
 From: Nicholas Bellinger n...@linux-iscsi.org

 This is required to get past the following assert with:

 commit 1523ed9e1d46b0b54540049d491475ccac7e6421
 Author: Jan Kiszka jan.kis...@siemens.com
 Date:   Thu May 17 10:32:39 2012 -0300

 virtio/vhost: Add support for KVM in-kernel MSI injection

 Cc: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
 Cc: Jan Kiszka jan.kis...@siemens.com
 Cc: Paolo Bonzini pbonz...@redhat.com
 Cc: Anthony Liguori aligu...@us.ibm.com
 Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
 ---
  hw/msix.c |3 +++
  1 files changed, 3 insertions(+), 0 deletions(-)

 diff --git a/hw/msix.c b/hw/msix.c
 index 800fc32..c1e6dc3 100644
 --- a/hw/msix.c
 +++ b/hw/msix.c
 @@ -544,6 +544,9 @@ void msix_unset_vector_notifiers(PCIDevice *dev)
  {
  int vector;

 +if (!dev-msix_vector_use_notifier  !dev-msix_vector_release_notifier)
 +return;

Missing braces, please read CODING_STYLE.

 +
  assert(dev-msix_vector_use_notifier 
 dev-msix_vector_release_notifier);

 --
 1.7.2.5


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [RFC-v2 3/6] vhost-scsi: add -vhost-scsi host device for use with tcm-vhost

2012-08-13 Thread Blue Swirl

On Mon, Aug 13, 2012 at 8:35 AM, Nicholas A. Bellinger
n...@linux-iscsi.org wrote:
 From: Stefan Hajnoczi stefa...@linux.vnet.ibm.com

 This patch adds a new type of host device that drives the vhost_scsi
 device.  The syntax to add vhost-scsi is:

   qemu -vhost-scsi id=vhost-scsi0,wwpn=...,tpgt=123

 The virtio-scsi emulated device will make use of vhost-scsi to process
 virtio-scsi requests inside the kernel and hand them to the in-kernel
 SCSI target stack using the tcm_vhost fabric driver.

 The tcm_vhost driver was merged into the upstream linux kernel for 3.6-rc2,
 and the commit can be found here:

 http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=057cbf49a1f08297

 Changelog v1 - v2:

 - Expose ABI version via VHOST_SCSI_GET_ABI_VERSION + use Rev 0 as
   starting point for v3.6-rc code (Stefan + ALiguori + nab)
 - Fix upstream qemu conflict in hw/qdev-properties.c
 - Make GET_ABI_VERSION use int (nab + mst)
 - Fix vhost-scsi case lables in configure (reported by paolo)
 - Convert qdev_prop_vhost_scsi to use -get() + -set() following
   qdev_prop_netdev (reported by paolo)
 - Fix typo in qemu-options.hx definition of vhost-scsi (reported by paolo)

 Changelog v0 - v1:

 - Add VHOST_SCSI_SET_ENDPOINT call (stefan)
 - Enable vhost notifiers for multiple queues (Zhi)
 - clear vhost-scsi endpoint on stopped (Zhi)
 - Add CONFIG_VHOST_SCSI for QEMU build configure (nab)
 - Rename vhost_vring_target - vhost_scsi_target (mst + nab)
 - Add support for VHOST_SCSI_GET_ABI_VERSION ioctl (aliguori + nab)

 Cc: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
 Cc: Zhi Yong Wu wu...@linux.vnet.ibm.com
 Cc: Anthony Liguori aligu...@us.ibm.com
 Cc: Paolo Bonzini pbonz...@redhat.com
 Cc: Michael S. Tsirkin m...@redhat.com
 Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
 ---
  configure|   10 +++
  hw/Makefile.objs |1 +
  hw/qdev-properties.c |   40 
  hw/qdev.h|3 +
  hw/vhost-scsi.c  |  170 
 ++
  hw/vhost-scsi.h  |   50 +++
  qemu-common.h|1 +
  qemu-config.c|   16 +
  qemu-options.hx  |4 +
  vl.c |   18 +
  10 files changed, 313 insertions(+), 0 deletions(-)
  create mode 100644 hw/vhost-scsi.c
  create mode 100644 hw/vhost-scsi.h

 diff --git a/configure b/configure
 index f0dbc03..1f03202 100755
 --- a/configure
 +++ b/configure
 @@ -168,6 +168,7 @@ libattr=
  xfs=

  vhost_net=no
 +vhost_scsi=no
  kvm=no
  gprof=no
  debug_tcg=no
 @@ -513,6 +514,7 @@ Haiku)
usb=linux
kvm=yes
vhost_net=yes
 +  vhost_scsi=yes
if [ $cpu = i386 -o $cpu = x86_64 ] ; then
  audio_possible_drivers=$audio_possible_drivers fmod
fi
 @@ -818,6 +820,10 @@ for opt do
;;
--enable-vhost-net) vhost_net=yes
;;
 +  --disable-vhost-scsi) vhost_scsi=no
 +  ;;
 +  --enable-vhost-scsi) vhost_scsi=yes
 +  ;;
--disable-opengl) opengl=no
;;
--enable-opengl) opengl=yes
 @@ -3116,6 +3122,7 @@ echo posix_madvise $posix_madvise
  echo uuid support  $uuid
  echo libcap-ng support $cap_ng
  echo vhost-net support $vhost_net
 +echo vhost-scsi support $vhost_scsi
  echo Trace backend $trace_backend
  echo Trace output file $trace_file-pid
  echo spice support $spice
 @@ -3828,6 +3835,9 @@ case $target_arch2 in
if test $vhost_net = yes ; then
  echo CONFIG_VHOST_NET=y  $config_target_mak
fi
 +  if test $vhost_scsi = yes ; then
 +echo CONFIG_VHOST_SCSI=y  $config_target_mak
 +  fi
  fi
  esac
  case $target_arch2 in
 diff --git a/hw/Makefile.objs b/hw/Makefile.objs
 index 3ba5dd0..6ab75ec 100644
 --- a/hw/Makefile.objs
 +++ b/hw/Makefile.objs
 @@ -169,6 +169,7 @@ obj-$(CONFIG_VIRTIO) += virtio.o virtio-blk.o 
 virtio-balloon.o virtio-net.o
  obj-$(CONFIG_VIRTIO) += virtio-serial-bus.o virtio-scsi.o
  obj-$(CONFIG_SOFTMMU) += vhost_net.o
  obj-$(CONFIG_VHOST_NET) += vhost.o
 +obj-$(CONFIG_VHOST_SCSI) += vhost-scsi.o
  obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/
  obj-$(CONFIG_NO_PCI) += pci-stub.o
  obj-$(CONFIG_VGA) += vga.o
 diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
 index 8aca0d4..0266266 100644
 --- a/hw/qdev-properties.c
 +++ b/hw/qdev-properties.c
 @@ -4,6 +4,7 @@
  #include blockdev.h
  #include hw/block-common.h
  #include net/hub.h
 +#include vhost-scsi.h

  void *qdev_get_prop_ptr(DeviceState *dev, Property *prop)
  {
 @@ -696,6 +697,45 @@ PropertyInfo qdev_prop_vlan = {
  .set   = set_vlan,
  };

 +/* --- vhost-scsi --- */
 +
 +static int parse_vhost_scsi_dev(DeviceState *dev, const char *str, void 
 **ptr)
 +{
 +   VHostSCSI *p;
 +
 +   p = find_vhost_scsi(str);
 +   if (p == NULL)
 +   return -ENOENT;

Braces, please.

 +
 +   *ptr = p;
 +   return 0;
 +}
 +
 +static const char *print_vhost_scsi_dev(void *ptr)
 +{
 +VHostSCSI *p = ptr;
 +
 +return (p) ? vhost_scsi_get_id(p) : null;
 +}
 +
 +static void

Re: [PATCH 0/3] VFIO-based PCI device assignment for QEMU 1.2

2012-08-13 Thread Blue Swirl

On Mon, Aug 13, 2012 at 7:33 PM, Anthony Liguori aligu...@us.ibm.com wrote:
 Alex Williamson alex.william...@redhat.com writes:

 On Mon, 2012-08-13 at 08:27 -0500, Anthony Liguori wrote:
 Alex Williamson alex.william...@redhat.com writes:

  VFIO kernel support was just merged into Linux, so I'd like to
  formally propose inclusion of the QEMU vfio-pci driver for
  QEMU 1.2.  Included here is support for x86 PCI device assignment.
  PCI INTx is not yet enabled, but devices making use of either MSI
  or MSI-X work.  The level irqfd and eoifd support I've proposed
  for KVM enable an accelerated patch for this through KVM.  I'd
  like to get this base driver in first and enable the remaining
  support in-tree.
 
  I've split this version up a little from the RFC to make it a bit
  easier to review.  Review comments from Blue Swirl and Avi are
  already incorporated, including Avi's requests to simplify both
  the PCI BAR mapping and unmapping paths.

 Hi Alex,

 Thanks for pushing this forward!  Hopefully this will finally kill off
 qemu-kvm.git for good.

 I think this series is going to have to wait for 1.3 to open up.  We
 have a very short release window for this release and I'd feel a lot
 more comfortable having such a significant feature spend some time in
 the development cycle getting testing/review.

 I'd like to see a few Reviewed-by's too for this series before it goes
 in.  I expect they won't be hard to get but I also expect it will take a
 few more revisions of this series to get there.

 That's disappointing, but I can understand your reluctance.  Blue Swirl
 reviewed the RFC and could perhaps add a Reviewed-by.  Alexey has been
 working on the POWER port and I'm sure could provide a Reviewed-by.  We
 also have a few early adopters that are already making use of this code.
 Towards accepting it, the driver is entirely self contained, there's
 really no risk to the rest of qemu.  The only missing functionality is
 legacy interrupt support.  Perhaps there's a compromise where this
 driver could be considered a tech preview in 1.2 (x-vfio-pci?).
 Thanks,

 Yeah, if a few people were willing to at least give an Acked-by by
 Wednesday, I'd be okay taking this in a preview or something like
 that.

Acked-by: Blue Swirl blauwir...@gmail.com


 I wouldn't bother renaming it or anything like that.  We can just
 declare in the release notes that it's an experimental feature and may
 eat your lunch while you're not looking.

 Regards,

 Anthony Liguori


 Alex

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 04/15] memory: MemoryRegion topology must be stable when updating

2012-08-09 Thread Blue Swirl

On Thu, Aug 9, 2012 at 7:28 AM, liu ping fan qemul...@gmail.com wrote:
 On Thu, Aug 9, 2012 at 3:17 AM, Blue Swirl blauwir...@gmail.com wrote:
 On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan qemul...@gmail.com wrote:
 From: Liu Ping Fan pingf...@linux.vnet.ibm.com

 Using mem_map_lock to protect among updaters. So we can get the intact
 snapshot of mem topology -- FlatView  radix-tree.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  exec.c   |3 +++
  memory.c |   22 ++
  memory.h |2 ++
  3 files changed, 27 insertions(+), 0 deletions(-)

 diff --git a/exec.c b/exec.c
 index 8244d54..0e29ef9 100644
 --- a/exec.c
 +++ b/exec.c
 @@ -210,6 +210,8 @@ static unsigned phys_map_nodes_nb, 
 phys_map_nodes_nb_alloc;
 The bottom level has pointers to MemoryRegionSections.  */
  static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };

 +QemuMutex mem_map_lock;
 +
  static void io_mem_init(void);
  static void memory_map_init(void);

 @@ -637,6 +639,7 @@ void cpu_exec_init_all(void)
  #if !defined(CONFIG_USER_ONLY)
  memory_map_init();
  io_mem_init();
 +qemu_mutex_init(mem_map_lock);

 I'd move this and the mutex to memory.c since there are no other uses.
 The mutex could be static then.

 But the init entry is in exec.c, not memory.c.

Memory subsystem does not have an init function of its own, this can
be the start of it.


 Regards,
 pingfan

  #endif
  }

 diff --git a/memory.c b/memory.c
 index aab4a31..5986532 100644
 --- a/memory.c
 +++ b/memory.c
 @@ -761,7 +761,9 @@ void memory_region_transaction_commit(void)
  assert(memory_region_transaction_depth);
  --memory_region_transaction_depth;
  if (!memory_region_transaction_depth  memory_region_update_pending) {
 +qemu_mutex_lock(mem_map_lock);
  memory_region_update_topology(NULL);
 +qemu_mutex_unlock(mem_map_lock);
  }
  }

 @@ -1069,8 +1071,10 @@ void memory_region_set_log(MemoryRegion *mr, bool 
 log, unsigned client)
  {
  uint8_t mask = 1  client;

 +qemu_mutex_lock(mem_map_lock);
  mr-dirty_log_mask = (mr-dirty_log_mask  ~mask) | (log * mask);
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }

  bool memory_region_get_dirty(MemoryRegion *mr, target_phys_addr_t addr,
 @@ -1103,8 +1107,10 @@ void memory_region_sync_dirty_bitmap(MemoryRegion 
 *mr)
  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
  {
  if (mr-readonly != readonly) {
 +qemu_mutex_lock(mem_map_lock);
  mr-readonly = readonly;
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }
  }

 @@ -1112,7 +1118,9 @@ void 
 memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable)
  {
  if (mr-readable != readable) {
  mr-readable = readable;
 +qemu_mutex_lock(mem_map_lock);
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }
  }

 @@ -1206,6 +1214,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
  };
  unsigned i;

 +qemu_mutex_lock(mem_map_lock);
  for (i = 0; i  mr-ioeventfd_nb; ++i) {
  if (memory_region_ioeventfd_before(mrfd, mr-ioeventfds[i])) {
  break;
 @@ -1218,6 +1227,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
  sizeof(*mr-ioeventfds) * (mr-ioeventfd_nb-1 - i));
  mr-ioeventfds[i] = mrfd;
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }

  void memory_region_del_eventfd(MemoryRegion *mr,
 @@ -1236,6 +1246,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
  };
  unsigned i;

 +qemu_mutex_lock(mem_map_lock);
  for (i = 0; i  mr-ioeventfd_nb; ++i) {
  if (memory_region_ioeventfd_equal(mrfd, mr-ioeventfds[i])) {
  break;
 @@ -1248,6 +1259,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
  mr-ioeventfds = g_realloc(mr-ioeventfds,
sizeof(*mr-ioeventfds)*mr-ioeventfd_nb 
 + 1);
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }

  static void memory_region_add_subregion_common(MemoryRegion *mr,
 @@ -1259,6 +1271,8 @@ static void 
 memory_region_add_subregion_common(MemoryRegion *mr,
  assert(!subregion-parent);
  subregion-parent = mr;
  subregion-addr = offset;
 +
 +qemu_mutex_lock(mem_map_lock);
  QTAILQ_FOREACH(other, mr-subregions, subregions_link) {
  if (subregion-may_overlap || other-may_overlap) {
  continue;
 @@ -1289,6 +1303,7 @@ static void 
 memory_region_add_subregion_common(MemoryRegion *mr,
  QTAILQ_INSERT_TAIL(mr-subregions, subregion, subregions_link);
  done:
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }


 @@ -1316,8 +1331,11 @@ void memory_region_del_subregion(MemoryRegion *mr,
  {
  assert(subregion-parent == mr);
  subregion-parent = NULL

Re: [Qemu-devel] [PATCH v8 5/6] introduce a new qom device to deal with panicked event

2012-08-08 Thread Blue Swirl

On Wed, Aug 8, 2012 at 2:47 AM, Wen Congyang we...@cn.fujitsu.com wrote:
 If the target is x86/x86_64, the guest's kernel will write 0x01 to the
 port KVM_PV_EVENT_PORT when it is panciked. This patch introduces a new
 qom device kvm_pv_ioport to listen this I/O port, and deal with panicked
 event according to panicked_action's value. The possible actions are:
 1. emit QEVENT_GUEST_PANICKED only
 2. emit QEVENT_GUEST_PANICKED and pause the guest
 3. emit QEVENT_GUEST_PANICKED and poweroff the guest
 4. emit QEVENT_GUEST_PANICKED and reset the guest

 I/O ports does not work for some targets(for example: s390). And you
 can implement another qom device, and include it's code into pv_event.c
 for such target.

 Note: if we emit QEVENT_GUEST_PANICKED only, and the management
 application does not receive this event(the management may not
 run when the event is emitted), the management won't know the
 guest is panicked.

 Signed-off-by: Wen Congyang we...@cn.fujitsu.com
 ---
  hw/kvm/Makefile.objs |2 +-
  hw/kvm/pv_event.c|  109 
 ++
  hw/kvm/pv_ioport.c   |   93 ++
  hw/pc_piix.c |9 
  kvm.h|2 +
  5 files changed, 214 insertions(+), 1 deletions(-)
  create mode 100644 hw/kvm/pv_event.c
  create mode 100644 hw/kvm/pv_ioport.c

 diff --git a/hw/kvm/Makefile.objs b/hw/kvm/Makefile.objs
 index 226497a..23e3b30 100644
 --- a/hw/kvm/Makefile.objs
 +++ b/hw/kvm/Makefile.objs
 @@ -1 +1 @@
 -obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o
 +obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o pv_event.o
 diff --git a/hw/kvm/pv_event.c b/hw/kvm/pv_event.c
 new file mode 100644
 index 000..8897237
 --- /dev/null
 +++ b/hw/kvm/pv_event.c
 @@ -0,0 +1,109 @@
 +/*
 + * QEMU KVM support, paravirtual event device
 + *
 + * Copyright Fujitsu, Corp. 2012
 + *
 + * Authors:
 + * Wen Congyang we...@cn.fujitsu.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + *
 + */
 +
 +#include linux/kvm_para.h
 +#include asm/kvm_para.h
 +#include qobject.h
 +#include qjson.h
 +#include monitor.h
 +#include sysemu.h
 +#include kvm.h
 +
 +/* Possible values for action parameter. */
 +#define PANICKED_REPORT 1   /* emit QEVENT_GUEST_PANICKED only */
 +#define PANICKED_PAUSE  2   /* emit QEVENT_GUEST_PANICKED and pause VM */
 +#define PANICKED_POWEROFF   3   /* emit QEVENT_GUEST_PANICKED and quit VM */
 +#define PANICKED_RESET  4   /* emit QEVENT_GUEST_PANICKED and reset VM */
 +
 +#define PV_EVENT_DRIVER kvm_pv_event
 +
 +struct pv_event_action {

PVEventAction

 +char *panicked_action;
 +int panicked_action_value;
 +};
 +
 +#define DEFINE_PV_EVENT_PROPERTIES(_state, _conf)   \
 +DEFINE_PROP_STRING(panicked_action, _state, _conf.panicked_action)
 +
 +static void panicked_mon_event(const char *action)
 +{
 +QObject *data;
 +
 +data = qobject_from_jsonf({ 'action': %s }, action);
 +monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
 +qobject_decref(data);
 +}
 +
 +static void panicked_perform_action(uint32_t panicked_action)
 +{
 +switch (panicked_action) {
 +case PANICKED_REPORT:
 +panicked_mon_event(report);
 +break;
 +
 +case PANICKED_PAUSE:
 +panicked_mon_event(pause);
 +vm_stop(RUN_STATE_GUEST_PANICKED);
 +break;
 +
 +case PANICKED_POWEROFF:
 +panicked_mon_event(poweroff);
 +qemu_system_shutdown_request();
 +break;

Misses a line break unlike other cases.

 +case PANICKED_RESET:
 +panicked_mon_event(reset);
 +qemu_system_reset_request();
 +break;
 +}
 +}
 +
 +static uint64_t supported_event(void)
 +{
 +return 1  KVM_PV_FEATURE_PANICKED;
 +}
 +
 +static void handle_event(int event, struct pv_event_action *conf)
 +{
 +if (event == KVM_PV_EVENT_PANICKED) {
 +panicked_perform_action(conf-panicked_action_value);
 +}
 +}
 +
 +static int pv_event_init(struct pv_event_action *conf)
 +{
 +if (!conf-panicked_action) {
 +conf-panicked_action_value = PANICKED_REPORT;
 +} else if (strcasecmp(conf-panicked_action, none) == 0) {
 +conf-panicked_action_value = PANICKED_REPORT;
 +} else if (strcasecmp(conf-panicked_action, pause) == 0) {
 +conf-panicked_action_value = PANICKED_PAUSE;
 +} else if (strcasecmp(conf-panicked_action, poweroff) == 0) {
 +conf-panicked_action_value = PANICKED_POWEROFF;
 +} else if (strcasecmp(conf-panicked_action, reset) == 0) {
 +conf-panicked_action_value = PANICKED_RESET;
 +} else {
 +return -1;
 +}
 +
 +return 0;
 +}
 +
 +#if defined(KVM_PV_EVENT_PORT)
 +
 +#include pv_ioport.c

I'd rather not include any .c files but insert the contents here directly.

 +
 +#else
 +void kvm_pv_event_init(void *opaque)
 +{
 +}
 +#endif
 diff

Re: [Qemu-devel] [PATCH 3/5] s390: Add new channel I/O based virtio transport.

2012-08-08 Thread Blue Swirl

On Wed, Aug 8, 2012 at 8:28 AM, Cornelia Huck cornelia.h...@de.ibm.com wrote:
 On Tue, 7 Aug 2012 20:47:22 +
 Blue Swirl blauwir...@gmail.com wrote:


  diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
  new file mode 100644
  index 000..8a90c3a
  --- /dev/null
  +++ b/hw/s390x/virtio-ccw.c
  @@ -0,0 +1,962 @@
  +/*
  + * virtio ccw target implementation
  + *
  + * Copyright 2012 IBM Corp.
  + * Author(s): Cornelia Huck cornelia.h...@de.ibm.com
  + *
  + * This work is licensed under the terms of the GNU GPL, version 2 or (at
  + * your option) any later version. See the COPYING file in the top-level
  + * directory.
  + */
  +
  +#include hw/hw.h
  +#include block.h
  +#include blockdev.h
  +#include sysemu.h
  +#include net.h
  +#include monitor.h
  +#include qemu-thread.h
  +#include ../virtio.h
  +#include ../virtio-serial.h
  +#include ../virtio-net.h
  +#include ../sysbus.h

 hw/virtio... for the above

 OK.

  +#include bitops.h
  +
  +#include ioinst.h
  +#include css.h
  +#include virtio-ccw.h
  +
  +static const TypeInfo virtio_ccw_bus_info = {
  +.name = TYPE_VIRTIO_CCW_BUS,
  +.parent = TYPE_BUS,
  +.instance_size = sizeof(VirtioCcwBus),
  +};
  +
  +static const VirtIOBindings virtio_ccw_bindings;
  +
  +typedef struct sch_entry {
  +SubchDev *sch;
  +QLIST_ENTRY(sch_entry) entry;
  +} sch_entry;

 SubchEntry, see CODING_STYLE. Also other struct and typedef names below.

  +
  +QLIST_HEAD(subch_list, sch_entry);

 static, but please put this to a structure that is passed around instead.

  +
  +typedef struct devno_entry {
  +uint16_t devno;
  +QLIST_ENTRY(devno_entry) entry;
  +} devno_entry;
  +
  +QLIST_HEAD(devno_list, devno_entry);

 Ditto

  +
  +struct subch_set {
  +struct subch_list *s_list[256];
  +struct devno_list *d_list[256];
  +};
  +
  +struct css_set {
  +struct subch_set *set[MAX_SSID + 1];
  +};
  +
  +static struct css_set *channel_subsys[MAX_CSSID + 1];

 OK, will try to come up with some kind of structure for this and
 CamelCasify it.

  +
  +VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch)
  +{
  +VirtIODevice *vdev = NULL;
  +
  +if (sch-driver_data) {
  +vdev = ((VirtioCcwData *)sch-driver_data)-vdev;
  +}
  +return vdev;
  +}
  +

  +VirtioCcwBus *virtio_ccw_bus_init(void)
  +{
  +VirtioCcwBus *bus;
  +BusState *_bus;

 Please avoid identifiers with leading underscores.

 OK.


  +DeviceState *dev;
  +
  +css_set_subch_cb(virtio_ccw_find_subch);
  +
  +/* Create bridge device */
  +dev = qdev_create(NULL, virtio-ccw-bridge);
  +qdev_init_nofail(dev);
  +
  +/* Create bus on bridge device */
  +_bus = qbus_create(TYPE_VIRTIO_CCW_BUS, dev, virtio-ccw);
  +bus = DO_UPCAST(VirtioCcwBus, bus, _bus);
  +
  +/* Enable hotplugging */
  +_bus-allow_hotplug = 1;
  +
  +return bus;
  +}
  +
  +struct vq_info_block {
  +uint64_t queue;
  +uint16_t num;
  +} QEMU_PACKED;
  +
  +struct vq_config_block {
  +uint16_t index;
  +uint16_t num;
  +} QEMU_PACKED;

 Aren't these KVM structures? They should be defined in a KVM header
 file file in linux-headers.

 Not really, virtio-ccw isn't tied to kvm.

 I see this more as command blocks that are specific to the control
 unit - like something that would be defined in an attachment
 specification for a classic s390 device (and in the virtio spec in this
 case) and modeled as C structures here.

OK. Then please use CamelCase for these too.




  +case CCW_CMD_WRITE_CONF:
  +if (check_len) {
  +if (ccw-count  data-vdev-config_len) {
  +ret = -EINVAL;
  +break;
  +}
  +}
  +len = MIN(ccw-count, data-vdev-config_len);
  +config = qemu_get_ram_ptr(ccw-cda);

 Please use cpu_physical_memory_read() (or DMA versions) instead of
 this + memcpy().

 Will check.

  +if (!config) {
  +ret = -EFAULT;
  +} else {
  +memcpy(data-vdev-config, config, len);
  +if (data-vdev-set_config) {
  +data-vdev-set_config(data-vdev, data-vdev-config);
  +}
  +sch-curr_status.scsw.count = ccw-count - len;
  +ret = 0;
  +}
  +break;

  +case CCW_CMD_READ_VQ_CONF:
  +if (check_len) {
  +if (ccw-count != sizeof(vq_config)) {
  +ret = -EINVAL;
  +break;
  +}
  +} else if (ccw-count  sizeof(vq_config)) {
  +/* Can't execute command. */
  +ret = -EINVAL;
  +break;
  +}
  +if (!qemu_get_ram_ptr(ccw-cda)) {
  +ret = -EFAULT;
  +} else {
  +vq_config.index = lduw_phys(ccw-cda);

 lduw_{b,l}e_phys()

  +vq_config.num = virtio_queue_get_num(data-vdev, 
  vq_config.index);
  +stw_phys(ccw-cda + sizeof(vq_config.index), vq_config.num);

 stw_

Re: [Qemu-devel] [PATCH 2/5] s390: Virtual channel subsystem support.

2012-08-08 Thread Blue Swirl

On Wed, Aug 8, 2012 at 8:17 AM, Cornelia Huck cornelia.h...@de.ibm.com wrote:
 On Tue, 7 Aug 2012 21:00:59 +
 Blue Swirl blauwir...@gmail.com wrote:


  diff --git a/hw/s390x/css.c b/hw/s390x/css.c
  new file mode 100644
  index 000..7941c44
  --- /dev/null
  +++ b/hw/s390x/css.c
  @@ -0,0 +1,440 @@
  +/*
  + * Channel subsystem base support.
  + *
  + * Copyright 2012 IBM Corp.
  + * Author(s): Cornelia Huck cornelia.h...@de.ibm.com
  + *
  + * This work is licensed under the terms of the GNU GPL, version 2 or (at
  + * your option) any later version. See the COPYING file in the top-level
  + * directory.
  + */
  +
  +#include qemu-thread.h
  +#include qemu-queue.h
  +#include hw/qdev.h
  +#include kvm.h
  +#include cpu.h
  +#include ioinst.h
  +#include css.h
  +
  +struct chp_info {

 CamelCase, please.

 OK.

  +uint8_t in_use;
  +uint8_t type;
  +};
  +
  +static struct chp_info chpids[MAX_CSSID + 1][MAX_CHPID + 1];
  +
  +static css_subch_cb_func css_subch_cb;

 Probably these can be put to a container structure which can be passed 
 around.

 Still trying to come up with a good model for that.



  +case CCW_CMD_SENSE_ID:
  +{
  +uint8_t sense_bytes[256];
  +
  +/* Sense ID information is device specific. */
  +memcpy(sense_bytes, sch-id, sizeof(sense_bytes));
  +if (check_len) {
  +if (ccw-count != sizeof(sense_bytes)) {
  +ret = -EINVAL;
  +break;
  +}
  +}
  +len = MIN(ccw-count, sizeof(sense_bytes));
  +/*
  + * Only indicate 0xff in the first sense byte if we actually
  + * have enough place to store at least bytes 0-3.
  + */
  +if (len = 4) {
  +stb_phys(ccw-cda, 0xff);
  +} else {
  +stb_phys(ccw-cda, 0);
  +}
  +i = 1;
  +for (i = 1; i  len - 1; i++) {
  +stb_phys(ccw-cda + i, sense_bytes[i]);
  +}

 cpu_physical_memory_write()

 Hm, what's wrong with storing byte-by-byte?

cpu_physical_memory_write() could be more optimal, for example resolve
guest addresses only once per page.



  +sch-curr_status.scsw.count = ccw-count - len;
  +ret = 0;
  +break;
  +}
  +case CCW_CMD_TIC:
  +if (sch-last_cmd-cmd_code == CCW_CMD_TIC) {
  +ret = -EINVAL;
  +break;
  +}
  +if (ccw-flags  (CCW_FLAG_CC | CCW_FLAG_DC)) {
  +ret = -EINVAL;
  +break;
  +}
  +sch-channel_prog = qemu_get_ram_ptr(ccw-cda);
  +ret = sch-channel_prog ? -EAGAIN : -EFAULT;
  +break;
  +default:
  +if (sch-ccw_cb) {
  +/* Handle device specific commands. */
  +ret = sch-ccw_cb(sch, ccw);
  +} else {
  +ret = -EOPNOTSUPP;
  +}
  +break;
  +}
  +sch-last_cmd = ccw;
  +if (ret == 0) {
  +if (ccw-flags  CCW_FLAG_CC) {
  +sch-channel_prog += 8;
  +ret = -EAGAIN;
  +}
  +}
  +
  +return ret;

  diff --git a/hw/s390x/css.h b/hw/s390x/css.h
  new file mode 100644
  index 000..b8a95cc
  --- /dev/null
  +++ b/hw/s390x/css.h
  @@ -0,0 +1,62 @@
  +/*
  + * Channel subsystem structures and definitions.
  + *
  + * Copyright 2012 IBM Corp.
  + * Author(s): Cornelia Huck cornelia.h...@de.ibm.com
  + *
  + * This work is licensed under the terms of the GNU GPL, version 2 or (at
  + * your option) any later version. See the COPYING file in the top-level
  + * directory.
  + */
  +
  +#ifndef CSS_H
  +#define CSS_H
  +
  +#include ioinst.h
  +
  +/* Channel subsystem constants. */
  +#define MAX_SCHID 65535
  +#define MAX_SSID 3
  +#define MAX_CSSID 254 /* 255 is reserved */
  +#define MAX_CHPID 255
  +
  +#define MAX_CIWS 8
  +
  +struct senseid {

 SenseID

 OK.

  +/* common part */
  +uint32_t  reserved:8;/* always 0x'FF' */

 The standard syntax calls for 'unsigned' instead of uint32_t for bit
 fields. But bit fields are not very well defined, it's better to avoid
 them.

 Well, the equivalent Linux structure also looks like that :) But I can
 switch this to a uint8_t/uint16_t structure.


  +uint32_t  cu_type:16;/* control unit type */
  +uint32_t  cu_model:8;/* control unit model */
  +uint32_t  dev_type:16;   /* device type */
  +uint32_t  dev_model:8;   /* device model */
  +uint32_t  unused:8;  /* padding byte */
  +/* extended part */
  +uint32_t ciw[MAX_CIWS];  /* variable # of CIWs */
  +};
  +

  diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h
  new file mode 100644
  index 000..79628b4
  --- /dev/null
  +++ b/target-s390x/ioinst.h
  @@ -0,0 +1,173 @@
  +/*
  + * S/390 channel I/O instructions
  + *
  + * Copyright 2012 IBM Corp.
  + * Author(s): Cornelia Huck cornelia.h...@de.ibm.com
  + *
  + * This work is licensed under the terms of the GNU GPL, version 2

Re: [PATCH 04/15] memory: MemoryRegion topology must be stable when updating

2012-08-08 Thread Blue Swirl

On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan qemul...@gmail.com wrote:
 From: Liu Ping Fan pingf...@linux.vnet.ibm.com

 Using mem_map_lock to protect among updaters. So we can get the intact
 snapshot of mem topology -- FlatView  radix-tree.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  exec.c   |3 +++
  memory.c |   22 ++
  memory.h |2 ++
  3 files changed, 27 insertions(+), 0 deletions(-)

 diff --git a/exec.c b/exec.c
 index 8244d54..0e29ef9 100644
 --- a/exec.c
 +++ b/exec.c
 @@ -210,6 +210,8 @@ static unsigned phys_map_nodes_nb, 
 phys_map_nodes_nb_alloc;
 The bottom level has pointers to MemoryRegionSections.  */
  static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };

 +QemuMutex mem_map_lock;
 +
  static void io_mem_init(void);
  static void memory_map_init(void);

 @@ -637,6 +639,7 @@ void cpu_exec_init_all(void)
  #if !defined(CONFIG_USER_ONLY)
  memory_map_init();
  io_mem_init();
 +qemu_mutex_init(mem_map_lock);

I'd move this and the mutex to memory.c since there are no other uses.
The mutex could be static then.

  #endif
  }

 diff --git a/memory.c b/memory.c
 index aab4a31..5986532 100644
 --- a/memory.c
 +++ b/memory.c
 @@ -761,7 +761,9 @@ void memory_region_transaction_commit(void)
  assert(memory_region_transaction_depth);
  --memory_region_transaction_depth;
  if (!memory_region_transaction_depth  memory_region_update_pending) {
 +qemu_mutex_lock(mem_map_lock);
  memory_region_update_topology(NULL);
 +qemu_mutex_unlock(mem_map_lock);
  }
  }

 @@ -1069,8 +1071,10 @@ void memory_region_set_log(MemoryRegion *mr, bool log, 
 unsigned client)
  {
  uint8_t mask = 1  client;

 +qemu_mutex_lock(mem_map_lock);
  mr-dirty_log_mask = (mr-dirty_log_mask  ~mask) | (log * mask);
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }

  bool memory_region_get_dirty(MemoryRegion *mr, target_phys_addr_t addr,
 @@ -1103,8 +1107,10 @@ void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
  {
  if (mr-readonly != readonly) {
 +qemu_mutex_lock(mem_map_lock);
  mr-readonly = readonly;
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }
  }

 @@ -1112,7 +1118,9 @@ void memory_region_rom_device_set_readable(MemoryRegion 
 *mr, bool readable)
  {
  if (mr-readable != readable) {
  mr-readable = readable;
 +qemu_mutex_lock(mem_map_lock);
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }
  }

 @@ -1206,6 +1214,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
  };
  unsigned i;

 +qemu_mutex_lock(mem_map_lock);
  for (i = 0; i  mr-ioeventfd_nb; ++i) {
  if (memory_region_ioeventfd_before(mrfd, mr-ioeventfds[i])) {
  break;
 @@ -1218,6 +1227,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
  sizeof(*mr-ioeventfds) * (mr-ioeventfd_nb-1 - i));
  mr-ioeventfds[i] = mrfd;
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }

  void memory_region_del_eventfd(MemoryRegion *mr,
 @@ -1236,6 +1246,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
  };
  unsigned i;

 +qemu_mutex_lock(mem_map_lock);
  for (i = 0; i  mr-ioeventfd_nb; ++i) {
  if (memory_region_ioeventfd_equal(mrfd, mr-ioeventfds[i])) {
  break;
 @@ -1248,6 +1259,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
  mr-ioeventfds = g_realloc(mr-ioeventfds,
sizeof(*mr-ioeventfds)*mr-ioeventfd_nb + 
 1);
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }

  static void memory_region_add_subregion_common(MemoryRegion *mr,
 @@ -1259,6 +1271,8 @@ static void 
 memory_region_add_subregion_common(MemoryRegion *mr,
  assert(!subregion-parent);
  subregion-parent = mr;
  subregion-addr = offset;
 +
 +qemu_mutex_lock(mem_map_lock);
  QTAILQ_FOREACH(other, mr-subregions, subregions_link) {
  if (subregion-may_overlap || other-may_overlap) {
  continue;
 @@ -1289,6 +1303,7 @@ static void 
 memory_region_add_subregion_common(MemoryRegion *mr,
  QTAILQ_INSERT_TAIL(mr-subregions, subregion, subregions_link);
  done:
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }


 @@ -1316,8 +1331,11 @@ void memory_region_del_subregion(MemoryRegion *mr,
  {
  assert(subregion-parent == mr);
  subregion-parent = NULL;
 +
 +qemu_mutex_lock(mem_map_lock);
  QTAILQ_REMOVE(mr-subregions, subregion, subregions_link);
  memory_region_update_topology(mr);
 +qemu_mutex_unlock(mem_map_lock);
  }

  void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
 @@ -1325,8 +1343,10 @@ void memory_region_set_enabled(MemoryRegion *mr,

Re: [PATCH 08/15] memory: introduce PhysMap to present snapshot of toploygy

2012-08-08 Thread Blue Swirl

On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan qemul...@gmail.com wrote:
 From: Liu Ping Fan pingf...@linux.vnet.ibm.com

 PhysMap contain the flatview and radix-tree view, they are snapshot
 of system topology and should be consistent. With PhysMap, we can
 swap the pointer when updating and achieve the atomic.

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  exec.c   |8 
  memory.c |   33 -
  memory.h |   62 
 --
  3 files changed, 60 insertions(+), 43 deletions(-)

 diff --git a/exec.c b/exec.c
 index 0e29ef9..01b91b0 100644
 --- a/exec.c
 +++ b/exec.c
 @@ -156,8 +156,6 @@ typedef struct PageDesc {
  #endif

  /* Size of the L2 (and L3, etc) page tables.  */

Please copy this comment to the header file.

 -#define L2_BITS 10
 -#define L2_SIZE (1  L2_BITS)

  #define P_L2_LEVELS \
  (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
 @@ -185,7 +183,6 @@ uintptr_t qemu_host_page_mask;
  static void *l1_map[V_L1_SIZE];

  #if !defined(CONFIG_USER_ONLY)
 -typedef struct PhysPageEntry PhysPageEntry;

  static MemoryRegionSection *phys_sections;
  static unsigned phys_sections_nb, phys_sections_nb_alloc;
 @@ -194,11 +191,6 @@ static uint16_t phys_section_notdirty;
  static uint16_t phys_section_rom;
  static uint16_t phys_section_watch;

 -struct PhysPageEntry {
 -uint16_t is_leaf : 1;
 - /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
 -uint16_t ptr : 15;
 -};

  /* Simple allocator for PhysPageEntry nodes */
  static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
 diff --git a/memory.c b/memory.c
 index 2eaa2fc..c7f2cfd 100644
 --- a/memory.c
 +++ b/memory.c
 @@ -31,17 +31,6 @@ static bool global_dirty_log = false;
  static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners
  = QTAILQ_HEAD_INITIALIZER(memory_listeners);

 -typedef struct AddrRange AddrRange;
 -
 -/*
 - * Note using signed integers limits us to physical addresses at most
 - * 63 bits wide.  They are needed for negative offsetting in aliases
 - * (large MemoryRegion::alias_offset).
 - */
 -struct AddrRange {
 -Int128 start;
 -Int128 size;
 -};

  static AddrRange addrrange_make(Int128 start, Int128 size)
  {
 @@ -197,28 +186,6 @@ static bool 
 memory_region_ioeventfd_equal(MemoryRegionIoeventfd a,
   !memory_region_ioeventfd_before(b, a);
  }

 -typedef struct FlatRange FlatRange;
 -typedef struct FlatView FlatView;
 -
 -/* Range of memory in the global map.  Addresses are absolute. */
 -struct FlatRange {
 -MemoryRegion *mr;
 -target_phys_addr_t offset_in_region;
 -AddrRange addr;
 -uint8_t dirty_log_mask;
 -bool readable;
 -bool readonly;
 -};
 -
 -/* Flattened global view of current active memory hierarchy.  Kept in sorted
 - * order.
 - */
 -struct FlatView {
 -FlatRange *ranges;
 -unsigned nr;
 -unsigned nr_allocated;
 -};
 -
  typedef struct AddressSpace AddressSpace;
  typedef struct AddressSpaceOps AddressSpaceOps;

 diff --git a/memory.h b/memory.h
 index 740f018..357edd8 100644
 --- a/memory.h
 +++ b/memory.h
 @@ -29,12 +29,72 @@
  #include qemu-thread.h
  #include qemu/reclaimer.h

 +typedef struct AddrRange AddrRange;
 +typedef struct FlatRange FlatRange;
 +typedef struct FlatView FlatView;
 +typedef struct PhysPageEntry PhysPageEntry;
 +typedef struct PhysMap PhysMap;
 +typedef struct MemoryRegionSection MemoryRegionSection;
  typedef struct MemoryRegionOps MemoryRegionOps;
  typedef struct MemoryRegionLifeOps MemoryRegionLifeOps;
  typedef struct MemoryRegion MemoryRegion;
  typedef struct MemoryRegionPortio MemoryRegionPortio;
  typedef struct MemoryRegionMmio MemoryRegionMmio;

 +/*
 + * Note using signed integers limits us to physical addresses at most
 + * 63 bits wide.  They are needed for negative offsetting in aliases
 + * (large MemoryRegion::alias_offset).
 + */
 +struct AddrRange {
 +Int128 start;
 +Int128 size;
 +};
 +
 +/* Range of memory in the global map.  Addresses are absolute. */
 +struct FlatRange {
 +MemoryRegion *mr;
 +target_phys_addr_t offset_in_region;
 +AddrRange addr;
 +uint8_t dirty_log_mask;
 +bool readable;
 +bool readonly;
 +};
 +
 +/* Flattened global view of current active memory hierarchy.  Kept in sorted
 + * order.
 + */
 +struct FlatView {
 +FlatRange *ranges;
 +unsigned nr;
 +unsigned nr_allocated;
 +};
 +
 +struct PhysPageEntry {
 +uint16_t is_leaf:1;
 + /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
 +uint16_t ptr:15;
 +};
 +
 +#define L2_BITS 10
 +#define L2_SIZE (1  L2_BITS)
 +/* This is a multi-level map on the physical address space.
 +   The bottom level has pointers to MemoryRegionSections.  */
 +struct PhysMap {
 +Atomic ref;
 +PhysPageEntry root;
 +PhysPageEntry (*phys_map_nodes)[L2_SIZE];
 +unsigned phys_map_nodes_nb;
 +unsigned

Re: [PATCH 09/15] memory: prepare flatview and radix-tree for rcu style access

2012-08-08 Thread Blue Swirl

On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan qemul...@gmail.com wrote:
 From: Liu Ping Fan pingf...@linux.vnet.ibm.com

 Flatview and radix view are all under the protection of pointer.
 And this make sure the change of them seem to be atomic!

 The mr accessed by radix-tree leaf or flatview will be reclaimed
 after the prev PhysMap not in use any longer

 Signed-off-by: Liu Ping Fan pingf...@linux.vnet.ibm.com
 ---
  exec.c  |  303 +++---
  hw/vhost.c  |2 +-
  hw/xen_pt.c |2 +-
  kvm-all.c   |2 +-
  memory.c|   92 ++-
  memory.h|9 ++-
  vl.c|1 +
  xen-all.c   |2 +-
  8 files changed, 286 insertions(+), 127 deletions(-)

 diff --git a/exec.c b/exec.c
 index 01b91b0..97addb9 100644
 --- a/exec.c
 +++ b/exec.c
 @@ -24,6 +24,7 @@
  #include sys/mman.h
  #endif

 +#include qemu/atomic.h
  #include qemu-common.h
  #include cpu.h
  #include tcg.h
 @@ -35,6 +36,8 @@
  #include qemu-timer.h
  #include memory.h
  #include exec-memory.h
 +#include qemu-thread.h
 +#include qemu/reclaimer.h
  #if defined(CONFIG_USER_ONLY)
  #include qemu.h
  #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 @@ -184,25 +187,17 @@ static void *l1_map[V_L1_SIZE];

  #if !defined(CONFIG_USER_ONLY)

 -static MemoryRegionSection *phys_sections;
 -static unsigned phys_sections_nb, phys_sections_nb_alloc;
  static uint16_t phys_section_unassigned;
  static uint16_t phys_section_notdirty;
  static uint16_t phys_section_rom;
  static uint16_t phys_section_watch;

 -
 -/* Simple allocator for PhysPageEntry nodes */
 -static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
 -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
 -
  #define PHYS_MAP_NODE_NIL (((uint16_t)~0)  1)

 -/* This is a multi-level map on the physical address space.
 -   The bottom level has pointers to MemoryRegionSections.  */
 -static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
 -
 +static QemuMutex cur_map_lock;
 +static PhysMap *cur_map;
  QemuMutex mem_map_lock;
 +static PhysMap *next_map;

  static void io_mem_init(void);
  static void memory_map_init(void);
 @@ -383,41 +378,38 @@ static inline PageDesc *page_find(tb_page_addr_t index)

  #if !defined(CONFIG_USER_ONLY)

 -static void phys_map_node_reserve(unsigned nodes)
 +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
  {
 -if (phys_map_nodes_nb + nodes  phys_map_nodes_nb_alloc) {
 +if (map-phys_map_nodes_nb + nodes  map-phys_map_nodes_nb_alloc) {
  typedef PhysPageEntry Node[L2_SIZE];
 -phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
 -phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
 -  phys_map_nodes_nb + nodes);
 -phys_map_nodes = g_renew(Node, phys_map_nodes,
 - phys_map_nodes_nb_alloc);
 +map-phys_map_nodes_nb_alloc = MAX(map-phys_map_nodes_nb_alloc * 2,
 +16);
 +map-phys_map_nodes_nb_alloc = MAX(map-phys_map_nodes_nb_alloc,
 +  map-phys_map_nodes_nb + nodes);
 +map-phys_map_nodes = g_renew(Node, map-phys_map_nodes,
 + map-phys_map_nodes_nb_alloc);
  }
  }

 -static uint16_t phys_map_node_alloc(void)
 +static uint16_t phys_map_node_alloc(PhysMap *map)
  {
  unsigned i;
  uint16_t ret;

 -ret = phys_map_nodes_nb++;
 +ret = map-phys_map_nodes_nb++;
  assert(ret != PHYS_MAP_NODE_NIL);
 -assert(ret != phys_map_nodes_nb_alloc);
 +assert(ret != map-phys_map_nodes_nb_alloc);
  for (i = 0; i  L2_SIZE; ++i) {
 -phys_map_nodes[ret][i].is_leaf = 0;
 -phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
 +map-phys_map_nodes[ret][i].is_leaf = 0;
 +map-phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
  }
  return ret;
  }

 -static void phys_map_nodes_reset(void)
 -{
 -phys_map_nodes_nb = 0;
 -}
 -
 -
 -static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
 -target_phys_addr_t *nb, uint16_t leaf,
 +static void phys_page_set_level(PhysMap *map, PhysPageEntry *lp,
 +target_phys_addr_t *index,
 +target_phys_addr_t *nb,
 +uint16_t leaf,
  int level)
  {
  PhysPageEntry *p;
 @@ -425,8 +417,8 @@ static void phys_page_set_level(PhysPageEntry *lp, 
 target_phys_addr_t *index,
  target_phys_addr_t step = (target_phys_addr_t)1  (level * L2_BITS);

  if (!lp-is_leaf  lp-ptr == PHYS_MAP_NODE_NIL) {
 -lp-ptr = phys_map_node_alloc();
 -p = phys_map_nodes[lp-ptr];
 +lp-ptr = phys_map_node_alloc(map);
 +p = map-phys_map_nodes[lp-ptr];
  if (level == 0) {
  for (i = 0; i

Re: [Qemu-devel] [PATCH 2/5] s390: Virtual channel subsystem support.

2012-08-08 Thread Blue Swirl

On Wed, Aug 8, 2012 at 7:34 PM, Peter Maydell peter.mayd...@linaro.org wrote:
 On 8 August 2012 20:16, Blue Swirl blauwir...@gmail.com wrote:
 On Wed, Aug 8, 2012 at 8:17 AM, Cornelia Huck cornelia.h...@de.ibm.com 
 wrote:
 On Tue, 7 Aug 2012 21:00:59 +
 Blue Swirl blauwir...@gmail.com wrote:
 Please use more descriptive names instead of acronyms, for example 
 SubChStatus.

 I'd rather leave these at the well-known scsw, pmcw, etc. names. These
 have been around for decades, and somebody familiar with channel I/O
 will instantly know what a struct scsw is, but will need to look hard
 at the code to figure out the meaning of SubChStatus.

 If they are well-known and have been around for so long time, are
 there any suitable header files (with compatible licenses) where they
 are defined which could be reused?

 Otherwise, please follow CODING_STYLE.

 I think we should follow CODING_STYLE for capitalisation issues
 but generally if the device's documentation has standard abbreviations
 for register names, structures, etc, etc we should use them. Often
 this code has to be maintained later by somebody else who might not
 be familiar with the general operation of the hardware and who is trying
 to match up the code with whatever the data sheet says. Following the
 naming used in the h/w docs makes that job easier.

Yes. typedef struct SCSW {} SCSW; should be OK too.


 (for instance I took the opportunity of making a bunch of structure
 member names in target-arm line up with the ARM ARM names
 as part of the refactoring that went on a while back.)

 -- PMM
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH 3/5] s390: Add new channel I/O based virtio transport.

2012-08-07 Thread Blue Swirl

On Tue, Aug 7, 2012 at 2:52 PM, Cornelia Huck cornelia.h...@de.ibm.com wrote:
 Add a new virtio transport that uses channel commands to perform
 virtio operations.

 Add a new machine type s390-ccw that uses this virtio-ccw transport
 and make it the default machine for s390.

 Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
 ---
  hw/qdev-monitor.c  |   5 +
  hw/s390-virtio.c   | 268 ++
  hw/s390x/Makefile.objs |   1 +
  hw/s390x/virtio-ccw.c  | 962 
 +
  hw/s390x/virtio-ccw.h  |  77 
  vl.c   |   1 +
  6 files changed, 1243 insertions(+), 71 deletions(-)
  create mode 100644 hw/s390x/virtio-ccw.c
  create mode 100644 hw/s390x/virtio-ccw.h

 diff --git a/hw/qdev-monitor.c b/hw/qdev-monitor.c
 index b22a37a..79f7e6b 100644
 --- a/hw/qdev-monitor.c
 +++ b/hw/qdev-monitor.c
 @@ -42,6 +42,11 @@ static const QDevAlias qdev_alias_table[] = {
  { virtio-blk-s390, virtio-blk, QEMU_ARCH_S390X },
  { virtio-net-s390, virtio-net, QEMU_ARCH_S390X },
  { virtio-serial-s390, virtio-serial, QEMU_ARCH_S390X },
 +{ virtio-blk-ccw, virtio-blk, QEMU_ARCH_S390X },
 +{ virtio-net-ccw, virtio-net, QEMU_ARCH_S390X },
 +{ virtio-serial-ccw, virtio-serial, QEMU_ARCH_S390X },
 +{ virtio-balloon-ccw, virtio-balloon, QEMU_ARCH_S390X },
 +{ virtio-scsi-ccw, virtio-scsi, QEMU_ARCH_S390X },
  { lsi53c895a, lsi },
  { ich9-ahci, ahci },
  { }
 diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
 index 47eed35..b8bdf80 100644
 --- a/hw/s390-virtio.c
 +++ b/hw/s390-virtio.c
 @@ -30,8 +30,11 @@
  #include hw/sysbus.h
  #include kvm.h
  #include exec-memory.h
 +#include qemu-thread.h

  #include hw/s390-virtio-bus.h
 +#include hw/s390x/css.h
 +#include hw/s390x/virtio-ccw.h

  //#define DEBUG_S390

 @@ -46,6 +49,7 @@
  #define KVM_S390_VIRTIO_NOTIFY  0
  #define KVM_S390_VIRTIO_RESET   1
  #define KVM_S390_VIRTIO_SET_STATUS  2
 +#define KVM_S390_VIRTIO_CCW_NOTIFY  3

  #define KERN_IMAGE_START0x01UL
  #define KERN_PARM_AREA  0x010480UL
 @@ -62,6 +66,7 @@

  static VirtIOS390Bus *s390_bus;
  static S390CPU **ipi_states;
 +VirtioCcwBus *ccw_bus;

  S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
  {
 @@ -75,15 +80,21 @@ S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
  int s390_virtio_hypercall(CPUS390XState *env, uint64_t mem, uint64_t 
 hypercall)
  {
  int r = 0, i;
 +int cssid, ssid, schid, m;
 +SubchDev *sch;

  dprintf(KVM hypercall: %ld\n, hypercall);
  switch (hypercall) {
  case KVM_S390_VIRTIO_NOTIFY:
  if (mem  ram_size) {
 -VirtIOS390Device *dev = s390_virtio_bus_find_vring(s390_bus,
 -   mem, i);
 -if (dev) {
 -virtio_queue_notify(dev-vdev, i);
 +if (s390_bus) {
 +VirtIOS390Device *dev = s390_virtio_bus_find_vring(s390_bus,
 +   mem, i);
 +if (dev) {
 +virtio_queue_notify(dev-vdev, i);
 +} else {
 +r = -EINVAL;
 +}
  } else {
  r = -EINVAL;
  }
 @@ -92,28 +103,49 @@ int s390_virtio_hypercall(CPUS390XState *env, uint64_t 
 mem, uint64_t hypercall)
  }
  break;
  case KVM_S390_VIRTIO_RESET:
 -{
 -VirtIOS390Device *dev;
 -
 -dev = s390_virtio_bus_find_mem(s390_bus, mem);
 -virtio_reset(dev-vdev);
 -stb_phys(dev-dev_offs + VIRTIO_DEV_OFFS_STATUS, 0);
 -s390_virtio_device_sync(dev);
 -s390_virtio_reset_idx(dev);
 +if (s390_bus) {
 +VirtIOS390Device *dev;
 +
 +dev = s390_virtio_bus_find_mem(s390_bus, mem);
 +virtio_reset(dev-vdev);
 +stb_phys(dev-dev_offs + VIRTIO_DEV_OFFS_STATUS, 0);
 +s390_virtio_device_sync(dev);
 +s390_virtio_reset_idx(dev);
 +} else {
 +r = -EINVAL;
 +}
  break;
 -}
  case KVM_S390_VIRTIO_SET_STATUS:
 -{
 -VirtIOS390Device *dev;
 +if (s390_bus) {
 +VirtIOS390Device *dev;

 -dev = s390_virtio_bus_find_mem(s390_bus, mem);
 -if (dev) {
 -s390_virtio_device_update_status(dev);
 +dev = s390_virtio_bus_find_mem(s390_bus, mem);
 +if (dev) {
 +s390_virtio_device_update_status(dev);
 +} else {
 +r = -EINVAL;
 +}
  } else {
  r = -EINVAL;
  }
  break;
 -}
 +case KVM_S390_VIRTIO_CCW_NOTIFY:
 +if (ccw_bus) {
 +if (ioinst_disassemble_sch_ident(env-regs[2], m, cssid, ssid,
 + schid)) {
 +r = -EINVAL;
 +} else {
 +

Re: [Qemu-devel] [PATCH 2/5] s390: Virtual channel subsystem support.

2012-08-07 Thread Blue Swirl

On Tue, Aug 7, 2012 at 2:52 PM, Cornelia Huck cornelia.h...@de.ibm.com wrote:
 Provide a mechanism for qemu to provide fully virtual subchannels to
 the guest. In the KVM case, this relies on the kernel's css support.
 The !KVM case is not yet supported.

 Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
 ---
  hw/s390x/Makefile.objs |   1 +
  hw/s390x/css.c | 440 
 +
  hw/s390x/css.h |  62 +++
  target-s390x/Makefile.objs |   2 +-
  target-s390x/cpu.h | 108 +++
  target-s390x/ioinst.c  |  38 
  target-s390x/ioinst.h  | 173 ++
  target-s390x/kvm.c | 101 +++
  8 files changed, 924 insertions(+), 1 deletion(-)
  create mode 100644 hw/s390x/css.c
  create mode 100644 hw/s390x/css.h
  create mode 100644 target-s390x/ioinst.c
  create mode 100644 target-s390x/ioinst.h

 diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
 index dcdcac8..93b41fb 100644
 --- a/hw/s390x/Makefile.objs
 +++ b/hw/s390x/Makefile.objs
 @@ -1,3 +1,4 @@
  obj-y = s390-virtio-bus.o s390-virtio.o

  obj-y := $(addprefix ../,$(obj-y))
 +obj-y += css.o
 diff --git a/hw/s390x/css.c b/hw/s390x/css.c
 new file mode 100644
 index 000..7941c44
 --- /dev/null
 +++ b/hw/s390x/css.c
 @@ -0,0 +1,440 @@
 +/*
 + * Channel subsystem base support.
 + *
 + * Copyright 2012 IBM Corp.
 + * Author(s): Cornelia Huck cornelia.h...@de.ibm.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or (at
 + * your option) any later version. See the COPYING file in the top-level
 + * directory.
 + */
 +
 +#include qemu-thread.h
 +#include qemu-queue.h
 +#include hw/qdev.h
 +#include kvm.h
 +#include cpu.h
 +#include ioinst.h
 +#include css.h
 +
 +struct chp_info {

CamelCase, please.

 +uint8_t in_use;
 +uint8_t type;
 +};
 +
 +static struct chp_info chpids[MAX_CSSID + 1][MAX_CHPID + 1];
 +
 +static css_subch_cb_func css_subch_cb;

Probably these can be put to a container structure which can be passed around.

 +
 +int css_set_subch_cb(css_subch_cb_func func)
 +{
 +if (func  css_subch_cb) {
 +return -EBUSY;
 +}
 +css_subch_cb = func;
 +return 0;
 +}
 +
 +static void css_inject_io_interrupt(SubchDev *sch, uint8_t func)
 +{
 +s390_io_interrupt(sch-cssid, sch-ssid, sch-schid, 
 sch-curr_status.scsw,
 +  sch-curr_status.pmcw, sch-sense_data, 0,
 +  sch-curr_status.pmcw.isc, 
 sch-curr_status.pmcw.intparm,
 +  func);
 +}
 +
 +void css_conditional_io_interrupt(SubchDev *sch)
 +{
 +s390_io_interrupt(sch-cssid, sch-ssid, sch-schid, 
 sch-curr_status.scsw,
 +  sch-curr_status.pmcw, sch-sense_data, 1,
 +  sch-curr_status.pmcw.isc, 
 sch-curr_status.pmcw.intparm, 0);
 +}
 +
 +static void sch_handle_clear_func(SubchDev *sch)
 +{
 +struct pmcw *p = sch-curr_status.pmcw;
 +struct scsw *s = sch-curr_status.scsw;
 +int path;
 +
 +/* Path management: In our simple css, we always choose the only path. */
 +path = 0x80;
 +
 +/* Reset values prior to 'issueing the clear signal'. */
 +p-lpum = 0;
 +p-pom = 0xff;
 +s-pno = 0;
 +
 +/* We always 'attempt to issue the clear signal', and we always succeed. 
 */
 +sch-orb = NULL;
 +sch-channel_prog = NULL;
 +sch-last_cmd = NULL;
 +s-actl = ~SCSW_ACTL_CLEAR_PEND;
 +s-stctl |= SCSW_STCTL_STATUS_PEND;
 +
 +s-dstat = 0;
 +s-cstat = 0;
 +p-lpum = path;
 +
 +}
 +
 +static void sch_handle_halt_func(SubchDev *sch)
 +{
 +
 +struct pmcw *p = sch-curr_status.pmcw;
 +struct scsw *s = sch-curr_status.scsw;
 +int path;
 +
 +/* Path management: In our simple css, we always choose the only path. */
 +path = 0x80;
 +
 +/* We always 'attempt to issue the halt signal', and we always succeed. 
 */
 +sch-orb = NULL;
 +sch-channel_prog = NULL;
 +sch-last_cmd = NULL;
 +s-actl = ~SCSW_ACTL_HALT_PEND;
 +s-stctl |= SCSW_STCTL_STATUS_PEND;
 +
 +if ((s-actl  (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) ||
 +!((s-actl  SCSW_ACTL_START_PEND) ||
 +  (s-actl  SCSW_ACTL_SUSP))) {
 +s-dstat = SCSW_DSTAT_DEVICE_END;
 +}
 +s-cstat = 0;
 +p-lpum = path;
 +
 +}
 +
 +static int css_interpret_ccw(SubchDev *sch, struct ccw1 *ccw)
 +{
 +int ret;
 +bool check_len;
 +int len;
 +int i;
 +
 +if (!ccw) {
 +return -EIO;
 +}
 +
 +/* Check for invalid command codes. */
 +if ((ccw-cmd_code  0x0f) == 0) {
 +return -EINVAL;
 +}
 +if (((ccw-cmd_code  0x0f) == CCW_CMD_TIC) 
 +((ccw-cmd_code  0xf0) != 0)) {
 +return -EINVAL;
 +}
 +
 +if (ccw-flags  CCW_FLAG_SUSPEND) {
 +return -ERESTART;
 +}
 +
 +check_len = !((ccw-flags  CCW_FLAG_SLI)  !(ccw-flags  
 CCW_FLAG_DC));
 +
 +/* Look at the command. */
 +switch (ccw-cmd_code) {

Re: [Qemu-devel] [PATCH v4] Fixes related to processing of qemu's -numa option

2012-08-04 Thread Blue Swirl

Thanks, applied.

On Tue, Jul 17, 2012 at 4:31 AM, Chegu Vinod chegu_vi...@hp.com wrote:
 Changes since v3:
- using bitmap_set() instead of set_bit() in numa_add() routine.
- removed call to bitmak_zero() since bitmap_new() also zeros' the bitmap.
- Rebased to the latest qemu.

 Changes since v2:
- Using unsigned long * for the node_cpumask[].
- Use bitmap_new() instead of g_malloc0() for allocation.
- Don't rely on max_cpus since it may not be initialized
  before the numa related qemu options are parsed  processed.

 Note: Continuing to use a new constant for allocation of
   the mask (This constant is currently set to 255 since
   with an 8bit APIC ID VCPUs can range from 0-254 in a
   guest. The APIC ID 255 (0xFF) is reserved for broadcast).

 Changes since v1:

- Use bitmap functions that are already in qemu (instead
  of cpu_set_t macro's from sched.h)
- Added a check for endvalue = max_cpus.
- Fix to address the round-robbing assignment when
  cpu's are not explicitly specified.
 ---

 v1:
 --

 The -numa option to qemu is used to create [fake] numa nodes
 and expose them to the guest OS instance.

 There are a couple of issues with the -numa option:

 a) Max VCPU's that can be specified for a guest while using
the qemu's -numa option is 64. Due to a typecasting issue
when the number of VCPUs is  32 the VCPUs don't show up
under the specified [fake] numa nodes.

 b) KVM currently has support for 160VCPUs per guest. The
qemu's -numa option has only support for upto 64VCPUs
per guest.
 This patch addresses these two issues.

 Below are examples of (a) and (b)

 a) 32 VCPUs are specified with the -numa option:

 /usr/local/bin/qemu-system-x86_64 \
 -enable-kvm \
 71:01:01 \
 -net tap,ifname=tap0,script=no,downscript=no \
 -vnc :4

 ...
 Upstream qemu :
 --

 QEMU 1.1.50 monitor - type 'help' for more information
 (qemu) info numa
 6 nodes
 node 0 cpus: 0 1 2 3 4 5 6 7 8 9 32 33 34 35 36 37 38 39 40 41
 node 0 size: 131072 MB
 node 1 cpus: 10 11 12 13 14 15 16 17 18 19 42 43 44 45 46 47 48 49 50 51
 node 1 size: 131072 MB
 node 2 cpus: 20 21 22 23 24 25 26 27 28 29 52 53 54 55 56 57 58 59
 node 2 size: 131072 MB
 node 3 cpus: 30
 node 3 size: 131072 MB
 node 4 cpus:
 node 4 size: 131072 MB
 node 5 cpus: 31
 node 5 size: 131072 MB

 With the patch applied :
 ---

 QEMU 1.1.50 monitor - type 'help' for more information
 (qemu) info numa
 6 nodes
 node 0 cpus: 0 1 2 3 4 5 6 7 8 9
 node 0 size: 131072 MB
 node 1 cpus: 10 11 12 13 14 15 16 17 18 19
 node 1 size: 131072 MB
 node 2 cpus: 20 21 22 23 24 25 26 27 28 29
 node 2 size: 131072 MB
 node 3 cpus: 30 31 32 33 34 35 36 37 38 39
 node 3 size: 131072 MB
 node 4 cpus: 40 41 42 43 44 45 46 47 48 49
 node 4 size: 131072 MB
 node 5 cpus: 50 51 52 53 54 55 56 57 58 59
 node 5 size: 131072 MB

 b) 64 VCPUs specified with -numa option:

 /usr/local/bin/qemu-system-x86_64 \
 -enable-kvm \
 -cpu 
 Westmere,+rdtscp,+pdpe1gb,+dca,+pdcm,+xtpr,+tm2,+est,+smx,+vmx,+ds_cpl,+monitor,+dtes64,+pclmuldq,+pbe,+tm,+ht,+ss,+acpi,+d-vnc
  :4

 ...

 Upstream qemu :
 --

 only 63 CPUs in NUMA mode supported.
 only 64 CPUs in NUMA mode supported.
 QEMU 1.1.50 monitor - type 'help' for more information
 (qemu) info numa
 8 nodes
 node 0 cpus: 6 7 8 9 38 39 40 41 70 71 72 73
 node 0 size: 65536 MB
 node 1 cpus: 10 11 12 13 14 15 16 17 18 19 42 43 44 45 46 47 48 49 50 51 74 
 75 76 77 78 79
 node 1 size: 65536 MB
 node 2 cpus: 20 21 22 23 24 25 26 27 28 29 52 53 54 55 56 57 58 59 60 61
 node 2 size: 65536 MB
 node 3 cpus: 30 62
 node 3 size: 65536 MB
 node 4 cpus:
 node 4 size: 65536 MB
 node 5 cpus:
 node 5 size: 65536 MB
 node 6 cpus: 31 63
 node 6 size: 65536 MB
 node 7 cpus: 0 1 2 3 4 5 32 33 34 35 36 37 64 65 66 67 68 69
 node 7 size: 65536 MB

 With the patch applied :
 ---

 QEMU 1.1.50 monitor - type 'help' for more information
 (qemu) info numa
 8 nodes
 node 0 cpus: 0 1 2 3 4 5 6 7 8 9
 node 0 size: 65536 MB
 node 1 cpus: 10 11 12 13 14 15 16 17 18 19
 node 1 size: 65536 MB
 node 2 cpus: 20 21 22 23 24 25 26 27 28 29
 node 2 size: 65536 MB
 node 3 cpus: 30 31 32 33 34 35 36 37 38 39
 node 3 size: 65536 MB
 node 4 cpus: 40 41 42 43 44 45 46 47 48 49
 node 4 size: 65536 MB
 node 5 cpus: 50 51 52 53 54 55 56 57 58 59
 node 5 size: 65536 MB
 node 6 cpus: 60 61 62 63 64 65 66 67 68 69
 node 6 size: 65536 MB
 node 7 cpus: 70 71 72 73 74 75 76 77 78 79

 Signed-off-by: Chegu Vinod chegu_vi...@hp.com, Jim Hull jim.h...@hp.com, 
 Craig Hada craig.h...@hp.com
 ---
  cpus.c   |3 ++-
  hw/pc.c  |3 ++-
  sysemu.h |3 ++-
  vl.c |   43 +--
  4 files changed, 27 insertions(+), 25 deletions(-)

 diff --git a/cpus.c b/cpus.c
 index b182b3d..acccd08 100644
 --- a/cpus.c
 +++ b/cpus.c
 @@ -36,6 +36,7 @@
  #include cpus.h
  #include qtest.h
  #include

Re: [Qemu-devel] [PATCH 1/5] scsi-disk: removable hard disks support START/STOP

2012-07-23 Thread Blue Swirl

On Mon, Jul 16, 2012 at 2:25 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 Support for START/STOP UNIT right now is limited to CD-ROMs.  This is wrong,
 since removable hard disks (in the real world: SD card readers) also support
 it in pretty much the same way.

I remember vaguely tuning a set of large SCSI hard disks
(non-removable) so that they all didn't start immediately at the same
time (which could have burned out the PSU) but only with START UNIT
command. I think Linux or maybe even the BIOS started the drives
(nicely in sequence) before accessing the drive.


 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  hw/scsi-disk.c |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
 index bcec66b..42bae3b 100644
 --- a/hw/scsi-disk.c
 +++ b/hw/scsi-disk.c
 @@ -1251,7 +1251,7 @@ static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
  bool start = req-cmd.buf[4]  1;
  bool loej = req-cmd.buf[4]  2; /* load on start, eject on !start */

 -if (s-qdev.type == TYPE_ROM  loej) {
 +if ((s-features  (1  SCSI_DISK_F_REMOVABLE))  loej) {
  if (!start  !s-tray_open  s-tray_locked) {
  scsi_check_condition(r,
   bdrv_is_inserted(s-qdev.conf.bs)
 --
 1.7.10.4



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [RFC PATCH v2 00/21] ACPI memory hotplug

2012-07-14 Thread Blue Swirl

On Fri, Jul 13, 2012 at 5:49 PM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 On Thu, Jul 12, 2012 at 08:04:56PM +, Blue Swirl wrote:
 On Wed, Jul 11, 2012 at 10:31 AM, Vasilis Liaskovitis
 vasilis.liaskovi...@profitbricks.com wrote:
  This is v2 of the ACPI memory hotplug prototype for x86_64 target.

 I think the concept of DIMMs (what about SIMMs? SODIMMs? I liked
 memslot) would be useful for most targets, but hotplugging may be
 limited to x86 only. It would be nice to keep these two separate or as
 loosely coupled as possible.

 agreed.
 what specific usecases besides hotplugging are you thinking about?

Most real boards have some kind of RAM module slots. Now this is
implemented with -m option, but a generic memory slot model would be
more accurate. Also the memory layout needs to be communicated to BIOS
somehow unless we want to spend cycles for BIOS memory probes. The
NUMA fw_cfg memory description should be usable for most cases even
for embedded UP machines.

 Also are there non-acpi hotplug platforms?

Some enterprise-class Sparc and PPC machines support memory hotplug.


 I am trying to keep generic dimm manipulation functions (e.g. population /
 depopulation and searching) in hw/dimm[.ch]. Currently the x86-acpi_piix4 
 backend
 registers a callback for hot-add / hot-remove. In theory other hotplug 
 backends
 can hook in.

 btw I don't mind using -memslot (I think someone during v1 mentioned 
 -dimm), we just
 need some consensus on the naming.


 
  Changes v1-v2
 
  - memory map is automatically calculated for hotplug dimms. Dimms are 
  added from
  top-of-memory skipping the pci hole at [PCI_HOLE_START, 4G).
  - Renamed from -memslot to -dimm. Commands changed to dimm_add, 
  dimm_del.
  - Seabios ejection array reduced to a byte. Use extraction macros for dimm 
  ssdt.
  - additional SRAT paravirt info does not break previous SRAT fw_cfg layout.
  - Documentation of new acpi_piix4 registers and paravirt data.
  - add ACPI _OST support for _OST enabled guests. This allows qemu to 
  receive
  notification for success / failure of memory hot-add and hot-remove 
  operations.
  Guest needs to support _OST (https://lkml.org/lkml/2012/6/25/321)
  - add monitor info command to report total guest memory (initial + 
  hot-added)
  - add command line options and monitor commands for batch dimm 
  creation/population
 
  Overview:
 
  Dimm devices are modeled with a new qemu command line
 
  -dimm id=name,size=sz,node=pxm,populated=on|off
 
  As already mentioned, the starting physical address for all dimms is 
  calculated
  automatically from top of memory, skipping the pci hole at 
  [PCI_HOLE_START, 4G).
  Node is defining numa proximity for this dimm. When not defined it defaults
  to zero.
  -dimm id=dimm0,size=512M,node=0,populated=off
  will define a 512M memory slot belonging to numa node 0.
 
  Dimms are added or removed with a new hmp command dimm_add/dimm_del:
  Hot-add syntax: dimm_add id
  Hot-remove syntax: dimm_del id
 
  Issues:
 
  - Live migration works as long as populated field is changed to on for
  hotplugged dimms at the destination qemu command line (patch 12/21 lifts
  this requirement). The DimmState structure does not yet define a
  VMStateDescription, but i assume this is the preferred way to pass state
  for migration.
 
  - Dimms are abstracted as qdevices attached to the main system bus. 
  However,
  memory hotplugging has its own side channel ignoring main_system_bus's 
  hotplug
  incapability. A cleaner integration is still needed, probably attaching 
  memory
  devices as children-links of an acpi-capable device (in the pc case 
  acpi_piix4)
  instead of the system bus (TBD). Then device_add/device_del instead of new
  commands can hopefully be used.
 
  Comments/review welcome.
 
  series is based on uq/master for qemu-kvm, and master for seabios. Can be 
  found
  also at:
  http://github.com/vliaskov/qemu-kvm/commits/memhp-v2
  http://github.com/vliaskov/seabios/commits/memhp-v2
 
  Vasilis Liaskovitis (14):
dimm: Implement memory device abstraction
acpi_piix4: Implement memory device hotplug registers
pc: calculate dimm physical addresses and adjust memory map
pc: Add dimm paravirt SRAT info
Implement -dimm command line option
Implement dimm_add and dimm_del commands for hmp and qmp
fix live-migration when populated=on is missing
Implement memory hotplug notification lists
acpi_piix4: _OST dimm support
acpi_piix4: Update dimm state on VM reboot
acpi_piix4: Update dimm bitmap state on hot-remove fail
Implement info memtotal and query-memtotal
Implement -dimms, -dimmspop command line options
Implement mem_increase, mem_decrease hmp/qmp commands
 
   arch_init.c |   23 ++-
   docs/specs/acpi_hotplug.txt |   46 +
   docs/specs/fwcfg.txt|   28 +++
   hmp-commands.hx |   67 +++
   hmp.c   |   24 +++
   hmp.h

Re: [Qemu-devel] [RFC PATCH v2 09/21] pc: Add dimm paravirt SRAT info

2012-07-12 Thread Blue Swirl

On Wed, Jul 11, 2012 at 10:31 AM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 The numa_fw_cfg paravirt interface is extended to include SRAT information for
 all hotplug-able dimms. There are 3 words for each hotplug-able memory slot,
 denoting start address, size and node proximity. The new info is appended 
 after
 existing numa info, so that the fw_cfg layout does not break.  This 
 information
 is used by Seabios to build hotplug memory device objects at runtime.
 nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info
 to SeaBIOS.

 v1-v2:
 Dimm SRAT info (#dimms) is appended at end of existing numa fw_cfg in order 
 not
 to break existing layout
 Documentation of the new fwcfg layout is included in docs/specs/fwcfg.txt

 Signed-off-by: Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com
 ---
  docs/specs/fwcfg.txt |   28 ++
  hw/pc.c  |   53 -
  vl.c |2 +-
  3 files changed, 80 insertions(+), 3 deletions(-)
  create mode 100644 docs/specs/fwcfg.txt

 diff --git a/docs/specs/fwcfg.txt b/docs/specs/fwcfg.txt
 new file mode 100644
 index 000..e6fcd8f
 --- /dev/null
 +++ b/docs/specs/fwcfg.txt
 @@ -0,0 +1,28 @@
 +QEMU-BIOS Paravirt Documentation
 +--
 +
 +This document describes paravirt data structures passed from QEMU to BIOS.
 +
 +fw_cfg SRAT paravirt info
 +
 +The SRAT info passed from QEMU to BIOS has the following layout:
 +
 +---
 +#nodes | cpu0_pxm | cpu1_pxm | ... | cpulast_pxm | node0_mem | node1_mem | 
 ... | nodelast_mem
 +
 +---
 +#dimms | dimm0_start | dimm0_sz | dimm0_pxm | ... | dimmlast_start | 
 dimmlast_sz | dimmlast_pxm
 +
 +Entry 0 contains the number of numa nodes (nb_numa_nodes).
 +
 +Entries 1..max_cpus: The next max_cpus entries describe node proximity for 
 each
 +one of the vCPUs in the system.
 +
 +Entries max_cpus+1..max_cpus+nb_numa_nodes+1:  The next nb_numa_nodes entries
 +describe the memory size for each one of the NUMA nodes in the system.
 +
 +Entry max_cpus+nb_numa_nodes+1 contains the number of memory dimms 
 (nb_hp_dimms)
 +
 +The last 3 * nb_hp_dimms entries are organized in triplets: Each triplet 
 contains
 +the physical address offset, size (in bytes), and node proximity for the
 +respective dimm.

The size and endianness are not specified, you are using LE 64 bit
values for each item.

 diff --git a/hw/pc.c b/hw/pc.c
 index ef9901a..cf651d0 100644
 --- a/hw/pc.c
 +++ b/hw/pc.c
 @@ -598,12 +598,15 @@ int e820_add_entry(uint64_t address, uint64_t length, 
 uint32_t type)
  return index;
  }

 +static void setup_hp_dimms(uint64_t *fw_cfg_slots);
 +
  static void *bochs_bios_init(void)
  {
  void *fw_cfg;
  uint8_t *smbios_table;
  size_t smbios_len;
  uint64_t *numa_fw_cfg;
 +uint64_t *hp_dimms_fw_cfg;
  int i, j;

  register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
 @@ -638,8 +641,10 @@ static void *bochs_bios_init(void)
  /* allocate memory for the NUMA channel: one (64bit) word for the number
   * of nodes, one word for each VCPU-node and one word for each node to
   * hold the amount of memory.
 + * Finally one word for the number of hotplug memory slots and three 
 words
 + * for each hotplug memory slot (start address, size and node proximity).
   */
 -numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8);
 +numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * nb_hp_dimms) 
 * 8);
  numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
  for (i = 0; i  max_cpus; i++) {
  for (j = 0; j  nb_numa_nodes; j++) {
 @@ -652,8 +657,15 @@ static void *bochs_bios_init(void)
  for (i = 0; i  nb_numa_nodes; i++) {
  numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
  }
 +
 +numa_fw_cfg[1 + max_cpus + nb_numa_nodes] = cpu_to_le64(nb_hp_dimms);
 +
 +hp_dimms_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes;
 +if (nb_hp_dimms)
 +setup_hp_dimms(hp_dimms_fw_cfg);

Braces.

 +
  fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
 - (1 + max_cpus + nb_numa_nodes) * 8);
 + (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_dimms) * 8);

  return fw_cfg;
  }
 @@ -1223,3 +1235,40 @@ target_phys_addr_t pc_set_hp_memory_offset(uint64_t 
 size)

  return ret;
  }
 +
 +static void setup_hp_dimms(uint64_t *fw_cfg_slots)
 +{
 +int i = 0;
 +Error *err = NULL;
 +DeviceState *dev;
 +DimmState *slot;
 +const char *type;
 +BusChild *kid;
 +BusState *bus = sysbus_get_default();
 +
 +QTAILQ_FOREACH(kid, bus-children, sibling) {
 +dev = kid-child;
 +type =

Re: [Qemu-devel] [RFC PATCH v2 06/21] dimm: Implement memory device abstraction

2012-07-12 Thread Blue Swirl

On Wed, Jul 11, 2012 at 10:31 AM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 Each hotplug-able memory slot is a SysBusDevice. A hot-add operation for a
 particular dimm creates a new MemoryRegion of the given physical address
 offset, size and node proximity, and attaches it to main system memory as a
 sub_region. A hot-remove operation detaches and frees the MemoryRegion from
 system memory.

 This prototype still lacks proper qdev integration: a separate
 hotplug side-channel is used and main system bus hotplug capability is
 ignored.

 Signed-off-by: Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com
 ---
  hw/Makefile.objs |2 +-
  hw/dimm.c|  234 
 ++
  hw/dimm.h|   58 +
  3 files changed, 293 insertions(+), 1 deletions(-)
  create mode 100644 hw/dimm.c
  create mode 100644 hw/dimm.h

 diff --git a/hw/Makefile.objs b/hw/Makefile.objs
 index 3d77259..e2184bf 100644
 --- a/hw/Makefile.objs
 +++ b/hw/Makefile.objs
 @@ -26,7 +26,7 @@ hw-obj-$(CONFIG_I8254) += i8254_common.o i8254.o
  hw-obj-$(CONFIG_PCSPK) += pcspk.o
  hw-obj-$(CONFIG_PCKBD) += pckbd.o
  hw-obj-$(CONFIG_FDC) += fdc.o
 -hw-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o
 +hw-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o dimm.o
  hw-obj-$(CONFIG_APM) += pm_smbus.o apm.o
  hw-obj-$(CONFIG_DMA) += dma.o
  hw-obj-$(CONFIG_I82374) += i82374.o
 diff --git a/hw/dimm.c b/hw/dimm.c
 new file mode 100644
 index 000..00c4623
 --- /dev/null
 +++ b/hw/dimm.c
 @@ -0,0 +1,234 @@
 +/*
 + * Dimm device for Memory Hotplug
 + *
 + * Copyright ProfitBricks GmbH 2012
 + * This library is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2 of the License, or (at your option) any later version.
 + *
 + * This library is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with this library; if not, see 
 http://www.gnu.org/licenses/
 + */
 +
 +#include trace.h
 +#include qdev.h
 +#include dimm.h
 +#include time.h
 +#include ../exec-memory.h
 +#include qmp-commands.h
 +
 +static DeviceState *dimm_hotplug_qdev;
 +static dimm_hotplug_fn dimm_hotplug;
 +static QTAILQ_HEAD(Dimmlist, DimmState)  dimmlist;

Using global state does not look right. It should always be possible
to pass around structures to avoid it.

 +
 +static Property dimm_properties[] = {
 +DEFINE_PROP_END_OF_LIST()
 +};
 +
 +void dimm_populate(DimmState *s)

All functions are global and exported but there does not seem to be
users. Please make all static which you can.

 +{
 +DeviceState *dev= (DeviceState*)s;
 +MemoryRegion *new = NULL;
 +
 +new = g_malloc(sizeof(MemoryRegion));
 +memory_region_init_ram(new, dev-id, s-size);
 +vmstate_register_ram_global(new);
 +memory_region_add_subregion(get_system_memory(), s-start, new);
 +s-mr = new;
 +s-populated = true;
 +}
 +
 +
 +void dimm_depopulate(DimmState *s)
 +{
 +assert(s);
 +if (s-populated) {
 +vmstate_unregister_ram(s-mr, NULL);
 +memory_region_del_subregion(get_system_memory(), s-mr);
 +memory_region_destroy(s-mr);
 +s-populated = false;
 +s-mr = NULL;
 +}
 +}
 +
 +DimmState *dimm_create(char *id, uint64_t size, uint64_t node, uint32_t
 +dimm_idx, bool populated)
 +{
 +DeviceState *dev;
 +DimmState *mdev;
 +
 +dev = sysbus_create_simple(dimm, -1, NULL);
 +dev-id = id;
 +
 +mdev = DIMM(dev);
 +mdev-idx = dimm_idx;
 +mdev-start = 0;
 +mdev-size = size;
 +mdev-node = node;
 +mdev-populated = populated;
 +QTAILQ_INSERT_TAIL(dimmlist, mdev, nextdimm);
 +return mdev;
 +}
 +
 +void dimm_register_hotplug(dimm_hotplug_fn hotplug, DeviceState *qdev)
 +{
 +dimm_hotplug_qdev = qdev;
 +dimm_hotplug = hotplug;
 +dimm_scan_populated();
 +}
 +
 +void dimm_activate(DimmState *slot)
 +{
 +dimm_populate(slot);
 +if (dimm_hotplug)
 +dimm_hotplug(dimm_hotplug_qdev, (SysBusDevice*)slot, 1);

Why the cast?

Also braces, please check your patches with checkpatch.pl.

 +}
 +
 +void dimm_deactivate(DimmState *slot)
 +{
 +if (dimm_hotplug)
 +dimm_hotplug(dimm_hotplug_qdev, (SysBusDevice*)slot, 0);
 +}
 +
 +DimmState *dimm_find_from_name(char *id)

const char *id?

 +{
 +Error *err = NULL;
 +DeviceState *qdev;
 +const char *type;
 +qdev = qdev_find_recursive(sysbus_get_default(), id);
 +if (qdev) {
 +type = object_property_get_str(OBJECT(qdev), type, err);
 +if (!type) {
 +return NULL;
 +}
 +if (!strcmp(type,

Re: [Qemu-devel] [RFC PATCH v2 00/21] ACPI memory hotplug

2012-07-12 Thread Blue Swirl

On Wed, Jul 11, 2012 at 10:31 AM, Vasilis Liaskovitis
vasilis.liaskovi...@profitbricks.com wrote:
 This is v2 of the ACPI memory hotplug prototype for x86_64 target.

I think the concept of DIMMs (what about SIMMs? SODIMMs? I liked
memslot) would be useful for most targets, but hotplugging may be
limited to x86 only. It would be nice to keep these two separate or as
loosely coupled as possible.


 Changes v1-v2

 - memory map is automatically calculated for hotplug dimms. Dimms are added 
 from
 top-of-memory skipping the pci hole at [PCI_HOLE_START, 4G).
 - Renamed from -memslot to -dimm. Commands changed to dimm_add, 
 dimm_del.
 - Seabios ejection array reduced to a byte. Use extraction macros for dimm 
 ssdt.
 - additional SRAT paravirt info does not break previous SRAT fw_cfg layout.
 - Documentation of new acpi_piix4 registers and paravirt data.
 - add ACPI _OST support for _OST enabled guests. This allows qemu to receive
 notification for success / failure of memory hot-add and hot-remove 
 operations.
 Guest needs to support _OST (https://lkml.org/lkml/2012/6/25/321)
 - add monitor info command to report total guest memory (initial + hot-added)
 - add command line options and monitor commands for batch dimm 
 creation/population

 Overview:

 Dimm devices are modeled with a new qemu command line

 -dimm id=name,size=sz,node=pxm,populated=on|off

 As already mentioned, the starting physical address for all dimms is 
 calculated
 automatically from top of memory, skipping the pci hole at [PCI_HOLE_START, 
 4G).
 Node is defining numa proximity for this dimm. When not defined it defaults
 to zero.
 -dimm id=dimm0,size=512M,node=0,populated=off
 will define a 512M memory slot belonging to numa node 0.

 Dimms are added or removed with a new hmp command dimm_add/dimm_del:
 Hot-add syntax: dimm_add id
 Hot-remove syntax: dimm_del id

 Issues:

 - Live migration works as long as populated field is changed to on for
 hotplugged dimms at the destination qemu command line (patch 12/21 lifts
 this requirement). The DimmState structure does not yet define a
 VMStateDescription, but i assume this is the preferred way to pass state
 for migration.

 - Dimms are abstracted as qdevices attached to the main system bus. However,
 memory hotplugging has its own side channel ignoring main_system_bus's hotplug
 incapability. A cleaner integration is still needed, probably attaching memory
 devices as children-links of an acpi-capable device (in the pc case 
 acpi_piix4)
 instead of the system bus (TBD). Then device_add/device_del instead of new
 commands can hopefully be used.

 Comments/review welcome.

 series is based on uq/master for qemu-kvm, and master for seabios. Can be 
 found
 also at:
 http://github.com/vliaskov/qemu-kvm/commits/memhp-v2
 http://github.com/vliaskov/seabios/commits/memhp-v2

 Vasilis Liaskovitis (14):
   dimm: Implement memory device abstraction
   acpi_piix4: Implement memory device hotplug registers
   pc: calculate dimm physical addresses and adjust memory map
   pc: Add dimm paravirt SRAT info
   Implement -dimm command line option
   Implement dimm_add and dimm_del commands for hmp and qmp
   fix live-migration when populated=on is missing
   Implement memory hotplug notification lists
   acpi_piix4: _OST dimm support
   acpi_piix4: Update dimm state on VM reboot
   acpi_piix4: Update dimm bitmap state on hot-remove fail
   Implement info memtotal and query-memtotal
   Implement -dimms, -dimmspop command line options
   Implement mem_increase, mem_decrease hmp/qmp commands

  arch_init.c |   23 ++-
  docs/specs/acpi_hotplug.txt |   46 +
  docs/specs/fwcfg.txt|   28 +++
  hmp-commands.hx |   67 +++
  hmp.c   |   24 +++
  hmp.h   |2 +
  hw/Makefile.objs|2 +-
  hw/acpi_piix4.c |  131 -
  hw/dimm.c   |  449 
 +++
  hw/dimm.h   |   72 +++
  hw/pc.c |   94 +-
  hw/pc.h |6 +
  hw/pc_piix.c|   18 ++-
  monitor.c   |   35 
  monitor.h   |5 +
  qapi-schema.json|   38 
  qemu-config.c   |   70 +++
  qemu-options.hx |   15 ++
  qmp-commands.hx |  137 +
  sysemu.h|1 +
  vl.c|  122 -
  21 files changed, 1368 insertions(+), 17 deletions(-)
  create mode 100644 docs/specs/acpi_hotplug.txt
  create mode 100644 docs/specs/fwcfg.txt
  create mode 100644 hw/dimm.c
  create mode 100644 hw/dimm.h

 Vasilis Liaskovitis (7):
   Add ACPI_EXTRACT_DEVICE* macros
   Add SSDT memory device support
   acpi-dsdt: Implement functions for memory hotplug.
   acpi: generate hotplug memory devices.
   pciinit: Fix pcimem_start value
   acpi_dsdt: Support _OST dimm method
   acpi_dsdt:

Re: [Qemu-devel] plan for device assignment upstream

2012-07-05 Thread Blue Swirl

On Wed, Jul 4, 2012 at 8:05 AM, Avi Kivity a...@redhat.com wrote:
 On 07/03/2012 10:06 PM, Blue Swirl wrote:
 On Mon, Jul 2, 2012 at 9:43 AM, Avi Kivity a...@redhat.com wrote:
 On 07/02/2012 12:30 PM, Jan Kiszka wrote:
 On 2012-07-02 11:18, Michael S. Tsirkin wrote:
 I've been thinking hard about Jan's patches for device
 assignment. Basically while I thought it makes sense
 to make all devices: assignment and not - behave the
 same and use same APIs for injecting irqs, Anthony thinks there is huge
 value in making irq propagation hierarchical and device assignment
 should be special cased.

 On the long term, we will need direct injection, ie. caching, to allow
 making it lock-less. Stepping through all intermediate layers will cause
 troubles, at least performance-wise, when having to take and drop a lock
 at each stop.

 So we precalculate everything beforehand.  Instead of each qemu_irq
 triggering a callback, calculating the next hop and firing the next
 qemu_irq, configure each qemu_irq array with a function that describes
 how to take the next hop.  Whenever the configuration changes,
 recalculate all routes.

 Yes, we had this discussion last year when I proposed the IRQ matrix:
 http://lists.nongnu.org/archive/html/qemu-devel/2011-09/msg00474.html

 One problem with the matrix is that it only works for enable/disable
 level, not for more complex situations like boolean logic or
 multiplexed outputs.

 I think we do need to support inverters etc.

 Perhaps the devices should describe the currently valid logic with
 packet filter type mechanism? I think that could scale arbitrarily and
 it could be more friendly even as a kernel interface?

 Interesting idea.  So qemu creates multiple eventfds, gives half to
 devices and half to kvm (as irqfds), and configures bpf programs that
 calculate the irqfd outputs from the vfio inputs.

I wasn't thinking of using fds, I guess that could work too but just
that the interface could be similar to packet filters. So a device
which implements an enable switch and ORs 8 inputs to a global output
could be implemented with:
context = rule_init();
context = append_rule(context, R_OR, 8, irq_array[]);
context = append_rule(context, R_AND, 1, irq_enable);
send_to_kernel_or_master_irq_controller(context);


 At least for x86 this is overkill.  I would be okay with
 one-input-one-output cases handled with the current code and everything
 else routed through qemu.

If this is efficient, some of the internal logic inside devices (for
example PCI) could be implemented with the rules. Usually devices have
one or just a few IRQ outputs but several possible internal sources
for these.


 --
 error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] plan for device assignment upstream

2012-07-03 Thread Blue Swirl

On Mon, Jul 2, 2012 at 9:43 AM, Avi Kivity a...@redhat.com wrote:
 On 07/02/2012 12:30 PM, Jan Kiszka wrote:
 On 2012-07-02 11:18, Michael S. Tsirkin wrote:
 I've been thinking hard about Jan's patches for device
 assignment. Basically while I thought it makes sense
 to make all devices: assignment and not - behave the
 same and use same APIs for injecting irqs, Anthony thinks there is huge
 value in making irq propagation hierarchical and device assignment
 should be special cased.

 On the long term, we will need direct injection, ie. caching, to allow
 making it lock-less. Stepping through all intermediate layers will cause
 troubles, at least performance-wise, when having to take and drop a lock
 at each stop.

 So we precalculate everything beforehand.  Instead of each qemu_irq
 triggering a callback, calculating the next hop and firing the next
 qemu_irq, configure each qemu_irq array with a function that describes
 how to take the next hop.  Whenever the configuration changes,
 recalculate all routes.

Yes, we had this discussion last year when I proposed the IRQ matrix:
http://lists.nongnu.org/archive/html/qemu-devel/2011-09/msg00474.html

One problem with the matrix is that it only works for enable/disable
level, not for more complex situations like boolean logic or
multiplexed outputs.

Perhaps the devices should describe the currently valid logic with
packet filter type mechanism? I think that could scale arbitrarily and
it could be more friendly even as a kernel interface?


 For device assignment or vhost, we can have a qemu_irq_irqfd() which
 converts a qemu_irq to an eventfd.  If the route calculations determine
 that it can be serviced via a real irqfd, they also configure it as an
 irqfd.  Otherwise qemu configures a poll on this eventfd and calls the
 callback when needed.


 --
 error compiling committee.c: too many arguments to function



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/6] file_ram_alloc(): coding style fixes

2012-07-03 Thread Blue Swirl

On Mon, Jul 2, 2012 at 6:06 PM, Eduardo Habkost ehabk...@redhat.com wrote:
 Cc: Blue Swirl blauwir...@gmail.com
 Signed-off-by: Eduardo Habkost ehabk...@redhat.com

Acked-by: Blue Swirl blauwir...@gmail.com

 ---
  exec.c |5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

 diff --git a/exec.c b/exec.c
 index 8244d54..c8bfd27 100644
 --- a/exec.c
 +++ b/exec.c
 @@ -2392,7 +2392,7 @@ static void *file_ram_alloc(RAMBlock *block,
  unlink(filename);
  free(filename);

 -memory = (memory+hpagesize-1)  ~(hpagesize-1);
 +memory = (memory + hpagesize - 1)  ~(hpagesize - 1);

  /*
   * ftruncate is not supported by hugetlbfs in older
 @@ -2400,8 +2400,9 @@ static void *file_ram_alloc(RAMBlock *block,
   * If anything goes wrong with it under other filesystems,
   * mmap will fail.
   */
 -if (ftruncate(fd, memory))
 +if (ftruncate(fd, memory)) {
  perror(ftruncate);
 +}

  #ifdef MAP_POPULATE
  /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
 --
 1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/6] file_ram_alloc(): use g_strdup_printf() instead of asprintf()

2012-07-03 Thread Blue Swirl

On Mon, Jul 2, 2012 at 6:06 PM, Eduardo Habkost ehabk...@redhat.com wrote:
 Cc: Blue Swirl blauwir...@gmail.com
 Signed-off-by: Eduardo Habkost ehabk...@redhat.com

Acked-by: Blue Swirl blauwir...@gmail.com

 ---
  exec.c |   14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)

 diff --git a/exec.c b/exec.c
 index c8bfd27..d856325 100644
 --- a/exec.c
 +++ b/exec.c
 @@ -24,6 +24,9 @@
  #include sys/mman.h
  #endif

 +#include glib.h
 +#include glib/gprintf.h
 +
  #include qemu-common.h
  #include cpu.h
  #include tcg.h
 @@ -2357,7 +2360,7 @@ static void *file_ram_alloc(RAMBlock *block,
  ram_addr_t memory,
  const char *path)
  {
 -char *filename;
 +gchar *filename;
  void *area;
  int fd;
  #ifdef MAP_POPULATE
 @@ -2379,18 +2382,15 @@ static void *file_ram_alloc(RAMBlock *block,
  return NULL;
  }

 -if (asprintf(filename, %s/qemu_back_mem.XX, path) == -1) {
 -return NULL;
 -}
 -
 +filename = g_strdup_printf(%s/qemu_back_mem.XX, path);
  fd = mkstemp(filename);
  if (fd  0) {
  perror(unable to create backing store for hugepages);
 -free(filename);
 +g_free(filename);
  return NULL;
  }
  unlink(filename);
 -free(filename);
 +g_free(filename);

  memory = (memory + hpagesize - 1)  ~(hpagesize - 1);

 --
 1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH] kvm: align ram_size to page boundary

2012-06-17 Thread Blue Swirl

On Sun, Jun 17, 2012 at 11:51 AM, Avi Kivity a...@redhat.com wrote:
 On 06/17/2012 02:47 PM, Jan Kiszka wrote:

 I think this should rather go into generic code.

 To be honest, I put this in kvm-specific code because vl.c doesn't have
 TARGET_PAGE_ALIGN.  Maybe we should have machine-page_size or
 machine-ram_alignment.

 What sense does it make
 to have partial pages with TCG?

 Why impose an artificial restriction?

 Beca...


 (answer: to reduce differences among various accelerators)


 Oh, you found the answer. :)

 Reducing round-trips across the Internet.


 At least, it should be enforce for the x86 target, independent of the
 accelerator.

 Yeah.  So there's machine-page_size or machine-ram_alignment.  Not
 sure which is best.

The boards should make sure that the amount of RAM is feasible with
the board memory slots. It's not possible to put 256kb SIMMs to a slot
that expects 1GB DIMMs. We can allow some flexibility there though,
I'm not sure if the current chipsets would support very much memory if
we followed the docs to the letter.

Maybe strtosz() should just enforce 1MB granularity.

What about ballooning (memory hotplug?), can that reduce the memory by
smaller amount than page size?


 --
 error compiling committee.c: too many arguments to function



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH] kvm: align ram_size to page boundary

2012-06-17 Thread Blue Swirl

On Sun, Jun 17, 2012 at 12:54 PM, Avi Kivity a...@redhat.com wrote:
 On 06/17/2012 03:43 PM, Blue Swirl wrote:
 On Sun, Jun 17, 2012 at 11:51 AM, Avi Kivity a...@redhat.com wrote:
 On 06/17/2012 02:47 PM, Jan Kiszka wrote:

 I think this should rather go into generic code.

 To be honest, I put this in kvm-specific code because vl.c doesn't have
 TARGET_PAGE_ALIGN.  Maybe we should have machine-page_size or
 machine-ram_alignment.

 What sense does it make
 to have partial pages with TCG?

 Why impose an artificial restriction?

 Beca...


 (answer: to reduce differences among various accelerators)


 Oh, you found the answer. :)

 Reducing round-trips across the Internet.


 At least, it should be enforce for the x86 target, independent of the
 accelerator.

 Yeah.  So there's machine-page_size or machine-ram_alignment.  Not
 sure which is best.

 The boards should make sure that the amount of RAM is feasible with
 the board memory slots. It's not possible to put 256kb SIMMs to a slot
 that expects 1GB DIMMs. We can allow some flexibility there though,
 I'm not sure if the current chipsets would support very much memory if
 we followed the docs to the letter.

 Right. And generally memory modules are sized a power of two, creating
 the silly mega == 1048576 movement.


 Maybe strtosz() should just enforce 1MB granularity.

 strtosz() is much too general.  We could do it in vl.c without trouble.
  However, it takes away our ability to emulate a 640k should be enough
 for everyone machine.

Then how about current max of target page sizes: 8k? No machine should
want less than that.



 What about ballooning (memory hotplug?), can that reduce the memory by
 smaller amount than page size?

 Ballooning removes individual pages, that has no effect on the slot size.

 --
 error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH qom-next 00/59] QOM CPUState, part 4: CPU_COMMON

2012-05-23 Thread Blue Swirl

On Wed, May 23, 2012 at 3:07 AM, Andreas Färber afaer...@suse.de wrote:
 Hello,

 This series, based on qom-next and the two pending ARM cleanup patches, starts
 moving fields from CPUArchState (CPU_COMMON) to QOM CPUState. It stops short
 of moving all easily possible fields (i.e., those not depending on 
 target_ulong
 or target_phys_addr_t) since the series got too long already and is expected 
 to
 spark some controversies due to collisions with several other series.

 The series is structured as preparatory refactorings interwoven with the 
 actual
 touch-all movement of one field (cpu: Move ... to CPUState), optionally
 followed by type signature cleanups, culminating in the movement of two fields
 that are tied together by VMState.
 Thus, unlike part 3, this series cannot randomly be cherry-picked to
 arch-next trees, only select parts thereof (e.g., use of cpu_s390x_init()).

 Please review and test.

 The use of cpu_index vs. cpuid_apic_id for x86 cpu[n] still needs some 
 thought.

 The question was brought up whether adding the CPUs a childX86CPU properties
 should be generalized outside the machine scope - I don't think so, since CPU
 hotplug seems highly architecture-specific and not applicable everywhere 
 (SoCs).

 Blue will likely have a superb idea how to avoid the cpu_tlb_flush() 
 indirection
 that I needed for VMState, but apart from having been a lot of dumb typing, it
 works fine as interim solution. Blah. wasn't terribly helpful as a comment.

Unfortunately I don't have superb ideas today (as if I had them any
other day...), only second rate jokes (as if they could be called
jokes...). With 'Blah' I obviously meant that I didn't have a solution
for that particular target_ulong/target_phys_addr_t problem. I'll try
to improve on all these areas, if you know what I mean.


 I have checked this to compile on ...
 * openSUSE 12.1 x86_64 w/KVM,
 * openSUSE Factory ppc w/KVM,
 * SLES 11 SP2 s390x w/KVM,
 * mingw32/64 cross-builds,
 * OpenBSD 5.1 amd64 (not for final version though, master doesn't build).
 Untested: Xen.
 Only some targets including i386 were lightly runtime-tested.

 Available for testing and cherry-picking (not pulling!) from:
 git://github.com/afaerber/qemu-cpu.git qom-cpu-common.v1
 https://github.com/afaerber/qemu-cpu/commits/qom-cpu-common.v1

 Regards,
 Andreas

 Cc: Anthony Liguori anth...@codemonkey.ws
 Cc: Paolo Bonzini pbonz...@redhat.com
 Cc: Igor Mammedov imamm...@redhat.com

 Cc: Richard Henderson r...@twiddle.net
 Cc: Peter Maydell peter.mayd...@linaro.org
 Cc: Edgar E. Iglesias edgar.igles...@gmail.com
 Cc: Michael Walle mich...@walle.cc
 Cc: Aurélien Jarno aurel...@aurel32.net
 Cc: Alexander Graf ag...@suse.de
 Cc: David Gibson da...@gibson.dropbear.id.au
 Cc: qemu-ppc qemu-...@nongnu.org
 Cc: Blue Swirl blauwir...@gmail.com
 Cc: Guan Xuetao g...@mprc.pku.edu.cn
 Cc: Max Filippov jcmvb...@gmail.com

 Cc: Avi Kivity a...@redhat.com
 Cc: Marcelo Tosatti mtosa...@redhat.com
 Cc: Jan Kiszka jan.kis...@siemens.com
 Cc: kvm kvm@vger.kernel.org

 Cc: Stefano Stabellini stefano.stabell...@eu.citrix.com
 Cc: xen-devel xen-de...@lists.xensource.com

 Changes from preview in Igor's apic thread:
 * Use g_strdup_printf() for cpu[x] to be safe wrt length and nul 
 termination.
 * Clean up removal of x86 version 5 load/save support.
 * Convert use of env-halted in s390x KVM code.
 * Convert some uses of env-halted/interrupt_request in ppc KVM code.
 * Convert some uses of env-halted in Xen code, prepend cpu_x86_init() patch.
 * Avoid using POWERPC_CPU() / SPARC_CPU() macros inside *_set_irq() functions.

 Andreas Färber (59):
  qemu-thread: Let qemu_thread_is_self() return bool
  cpu: Move CPU_COMMON_THREAD into CPUState
  cpu: Move thread field into CPUState
  pc: Add CPU as /machine/cpu[n]
  apic: Replace cpu_env pointer by X86CPU link
  pc: Pass X86CPU to cpu_is_bsp()
  cpu: Move thread_kicked to CPUState
  Makefile.dis: Add include/ to include path
  cpus: Pass CPUState to qemu_cpu_is_self()
  cpus: Pass CPUState to qemu_cpu_kick_thread()
  cpu: Move created field to CPUState
  cpu: Move stop field to CPUState
  ppce500_spin: Store PowerPCCPU in SpinKick
  cpu: Move stopped field to CPUState
  cpus: Pass CPUState to cpu_is_stopped()
  cpus: Pass CPUState to cpu_can_run()
  cpu: Move halt_cond to CPUState
  cpus: Pass CPUState to qemu_tcg_cpu_thread_fn
  cpus: Pass CPUState to qemu_tcg_init_vcpu()
  ppc: Pass PowerPCCPU to ppc6xx_set_irq()
  ppc: Pass PowerPCCPU to ppc970_set_irq()
  ppc: Pass PowerPCCPU to power7_set_irq()
  ppc: Pass PowerPCCPU to ppc40x_set_irq()
  ppc: Pass PowerPCCPU to ppce500_set_irq()
  sun4m: Pass SPARCCPU to cpu_set_irq()
  sun4m: Pass SPARCCPU to cpu_kick_irq()
  sun4u: Pass SPARCCPU to {,s,hs}tick_irq() and cpu_timer_create()
  sun4u: Pass SPARCCPU to cpu_kick_irq()
  target-ppc: Rename kvm_kick_{env = cpu} and pass PowerPCCPU
  target-s390x: Let cpu_s390x_init() return S390CPU
  s390-virtio: Use cpu_s390x_init() to obtain S390CPU
  s390

Re: [Qemu-devel] KVM call agenda for tuesday 31

2012-03-05 Thread Blue Swirl

On Mon, Mar 5, 2012 at 15:17, Avi Kivity a...@redhat.com wrote:
 On 03/05/2012 05:15 PM, Anthony Liguori wrote:
 The other alternative is to s/target_phys_addr_t/uint64_t/ in the memory
 API.  I think 32-on-32 is quite rare these days, so it wouldn't be much
 of a performance issue.


 I think this makes sense independent of other discussions regarding
 fixing target_phys_addr_t size.

 Hardware addresses should be independent of the target.  If we wanted
 to use a hw_addr_t that would be okay too.


 Would this hw_addr (s/_t$//, or you'll be Blued) be fixed at uint64_t

Malced? Posixed?

 (and thus only documentary), or also subject to multiple compilation?

In real world CPU physical addresses, bus addresses and device
addresses need not have anything in common. The best would be if we
could have devices with 10-bit addresses mixing freely with 32 bit
buses and 36 bit CPU physical addresses. The next best thing probably
is to fix all of them to shortest possible reasonable value, like now.
Fixing all of them to 64 bits would simplify things a lot if we no
longer care about the small performance loss on 32 bit hosts.

 --
 error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v2 5/8] kvmvapic: Introduce TPR access optimization for Windows guests

2012-02-13 Thread Blue Swirl

On Mon, Feb 13, 2012 at 10:16, Jan Kiszka jan.kis...@siemens.com wrote:
 On 2012-02-11 16:25, Blue Swirl wrote:
 On Fri, Feb 10, 2012 at 18:31, Jan Kiszka jan.kis...@siemens.com wrote:
 This enables acceleration for MMIO-based TPR registers accesses of
 32-bit Windows guest systems. It is mostly useful with KVM enabled,
 either on older Intel CPUs (without flexpriority feature, can also be
 manually disabled for testing) or any current AMD processor.

 The approach introduced here is derived from the original version of
 qemu-kvm. It was refactored, documented, and extended by support for
 user space APIC emulation, both with and without KVM acceleration. The
 VMState format was kept compatible, so was the ABI to the option ROM
 that implements the guest-side para-virtualized driver service. This
 enables seamless migration from qemu-kvm to upstream or, one day,
 between KVM and TCG mode.

 The basic concept goes like this:
  - VAPIC PV interface consisting of I/O port 0x7e and (for KVM in-kernel
   irqchip) a vmcall hypercall is registered
  - VAPIC option ROM is loaded into guest
  - option ROM activates TPR MMIO access reporting via port 0x7e
  - TPR accesses are trapped and patched in the guest to call into option
   ROM instead, VAPIC support is enabled
  - option ROM TPR helpers track state in memory and invoke hypercall to
   poll for pending IRQs if required

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com

 I must say that I find the approach horrible, patching guests and ROMs
 and looking up Windows internals. Taking the same approach to extreme,
 we could for example patch Xen guest to become a KVM guest. Not that I
 object merging.

 Yes, this is horrible. But there is no real better way in the absence of
 hardware assisted virtualization of the TPR. I think MS is recommending
 this patching approach as well.

Maybe instead of routing via ROM and the hypercall, the TPR accesses
could be handled directly with guest invisible breakpoints (like GDB
breakpoints, but for QEMU internal use), much like other
instrumentation could be handled.

 diff --git a/hw/apic.c b/hw/apic.c
 index 086c544..2ebf3ca 100644
 --- a/hw/apic.c
 +++ b/hw/apic.c
 @@ -35,6 +35,10 @@
  #define MSI_ADDR_DEST_ID_SHIFT         12
  #define        MSI_ADDR_DEST_ID_MASK           0x000

 +#define SYNC_FROM_VAPIC                 0x1
 +#define SYNC_TO_VAPIC                   0x2
 +#define SYNC_ISR_IRR_TO_VAPIC           0x4

 Enum, please.

 OK.


 +
  static APICCommonState *local_apics[MAX_APICS + 1];

  static void apic_set_irq(APICCommonState *s, int vector_num, int 
 trigger_mode);
 @@ -78,6 +82,70 @@ static inline int get_bit(uint32_t *tab, int index)
     return !!(tab[i]  mask);
  }

 +/* return -1 if no bit is set */
 +static int get_highest_priority_int(uint32_t *tab)
 +{
 +    int i;
 +    for (i = 7; i = 0; i--) {
 +        if (tab[i] != 0) {
 +            return i * 32 + fls_bit(tab[i]);
 +        }
 +    }
 +    return -1;
 +}
 +
 +static void apic_sync_vapic(APICCommonState *s, int sync_type)
 +{
 +    VAPICState vapic_state;
 +    size_t length;
 +    off_t start;
 +    int vector;
 +
 +    if (!s-vapic_paddr) {
 +        return;
 +    }
 +    if (sync_type  SYNC_FROM_VAPIC) {
 +        cpu_physical_memory_rw(s-vapic_paddr, (void *)vapic_state,
 +                               sizeof(vapic_state), 0);
 +        s-tpr = vapic_state.tpr;
 +    }
 +    if (sync_type  (SYNC_TO_VAPIC | SYNC_ISR_IRR_TO_VAPIC)) {
 +        start = offsetof(VAPICState, isr);
 +        length = offsetof(VAPICState, enabled) - offsetof(VAPICState, isr);
 +
 +        if (sync_type  SYNC_TO_VAPIC) {
 +            assert(qemu_cpu_is_self(s-cpu_env));
 +
 +            vapic_state.tpr = s-tpr;
 +            vapic_state.enabled = 1;
 +            start = 0;
 +            length = sizeof(VAPICState);
 +        }
 +
 +        vector = get_highest_priority_int(s-isr);
 +        if (vector  0) {
 +            vector = 0;
 +        }
 +        vapic_state.isr = vector  0xf0;
 +
 +        vapic_state.zero = 0;
 +
 +        vector = get_highest_priority_int(s-irr);
 +        if (vector  0) {
 +            vector = 0;
 +        }
 +        vapic_state.irr = vector  0xff;
 +
 +        cpu_physical_memory_write_rom(s-vapic_paddr + start,
 +                                      ((void *)vapic_state) + start, 
 length);

 This assumes that the vapic_state structure matches guest what guest
 expect without conversion. Is this true for i386 on x86_64? I didn't
 check the structure in question.

 Yes, the structure in question is a packed one, stable on both guest and
 host side (the guest side is 32-bit only anyway).

 diff --git a/hw/apic_common.c b/hw/apic_common.c
 index 588531b..1977da7 100644
 --- a/hw/apic_common.c
 +++ b/hw/apic_common.c
 @@ -20,8 +20,10 @@
  #include apic.h
  #include apic_internal.h
  #include trace.h
 +#include kvm.h

  static int apic_irq_delivered;
 +bool apic_report_tpr_access;

 This should go to APICCommonState.

 Nope

Re: [Qemu-devel] [PATCH v2 1/8] kvm: Set cpu_single_env only once

2012-02-11 Thread Blue Swirl

On Fri, Feb 10, 2012 at 18:31, Jan Kiszka jan.kis...@siemens.com wrote:
 As we have thread-local cpu_single_env now and KVM uses exactly one
 thread per VCPU, we can drop the cpu_single_env updates from the loop
 and initialize this variable only once during setup.

I don't think this is correct. Maybe you missed the part that sets
cpu_single_env to NULL, which I think is to annoy broken code that
assumes that some CPU state is always globally available. This is not
true for monitor context.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  cpus.c    |    1 +
  kvm-all.c |    5 -
  2 files changed, 1 insertions(+), 5 deletions(-)

 diff --git a/cpus.c b/cpus.c
 index f45a438..d0c8340 100644
 --- a/cpus.c
 +++ b/cpus.c
 @@ -714,6 +714,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
     qemu_mutex_lock(qemu_global_mutex);
     qemu_thread_get_self(env-thread);
     env-thread_id = qemu_get_thread_id();
 +    cpu_single_env = env;

     r = kvm_init_vcpu(env);
     if (r  0) {
 diff --git a/kvm-all.c b/kvm-all.c
 index c4babda..e2cbc03 100644
 --- a/kvm-all.c
 +++ b/kvm-all.c
 @@ -1118,8 +1118,6 @@ int kvm_cpu_exec(CPUState *env)
         return EXCP_HLT;
     }

 -    cpu_single_env = env;
 -
     do {
         if (env-kvm_vcpu_dirty) {
             kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
 @@ -1136,13 +1134,11 @@ int kvm_cpu_exec(CPUState *env)
              */
             qemu_cpu_kick_self();
         }
 -        cpu_single_env = NULL;
         qemu_mutex_unlock_iothread();

         run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);

         qemu_mutex_lock_iothread();
 -        cpu_single_env = env;
         kvm_arch_post_run(env, run);

         kvm_flush_coalesced_mmio_buffer();
 @@ -1206,7 +1202,6 @@ int kvm_cpu_exec(CPUState *env)
     }

     env-exit_request = 0;
 -    cpu_single_env = NULL;
     return ret;
  }

 --
 1.7.3.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 1/8] kvm: Set cpu_single_env only once

2012-02-11 Thread Blue Swirl

On Sat, Feb 11, 2012 at 10:06, Jan Kiszka jan.kis...@web.de wrote:
 On 2012-02-11 11:02, Blue Swirl wrote:
 On Fri, Feb 10, 2012 at 18:31, Jan Kiszka jan.kis...@siemens.com wrote:
 As we have thread-local cpu_single_env now and KVM uses exactly one
 thread per VCPU, we can drop the cpu_single_env updates from the loop
 and initialize this variable only once during setup.

 I don't think this is correct. Maybe you missed the part that sets
 cpu_single_env to NULL, which I think is to annoy broken code that
 assumes that some CPU state is always globally available. This is not
 true for monitor context.

 I did check this before changing, and I see no such need. Particularly
 as this old debugging help prevents valid use case.

It looks like monitor code is safe now. But in several places there
are checks like this in pc.c:
DeviceState *cpu_get_current_apic(void)
{
if (cpu_single_env) {
return cpu_single_env-apic_state;
} else {
return NULL;
}
}

In cpu-exec.c, there are these lines:
/* fail safe : never use cpu_single_env outside cpu_exec() */
cpu_single_env = NULL;

I think using cpu_single_env is an indication of a problem, like poor
code, layering violation or poor API (vmport). What is your use case?


 Jan


 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  cpus.c    |    1 +
  kvm-all.c |    5 -
  2 files changed, 1 insertions(+), 5 deletions(-)

 diff --git a/cpus.c b/cpus.c
 index f45a438..d0c8340 100644
 --- a/cpus.c
 +++ b/cpus.c
 @@ -714,6 +714,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
     qemu_mutex_lock(qemu_global_mutex);
     qemu_thread_get_self(env-thread);
     env-thread_id = qemu_get_thread_id();
 +    cpu_single_env = env;

     r = kvm_init_vcpu(env);
     if (r  0) {
 diff --git a/kvm-all.c b/kvm-all.c
 index c4babda..e2cbc03 100644
 --- a/kvm-all.c
 +++ b/kvm-all.c
 @@ -1118,8 +1118,6 @@ int kvm_cpu_exec(CPUState *env)
         return EXCP_HLT;
     }

 -    cpu_single_env = env;
 -
     do {
         if (env-kvm_vcpu_dirty) {
             kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
 @@ -1136,13 +1134,11 @@ int kvm_cpu_exec(CPUState *env)
              */
             qemu_cpu_kick_self();
         }
 -        cpu_single_env = NULL;
         qemu_mutex_unlock_iothread();

         run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);

         qemu_mutex_lock_iothread();
 -        cpu_single_env = env;
         kvm_arch_post_run(env, run);

         kvm_flush_coalesced_mmio_buffer();
 @@ -1206,7 +1202,6 @@ int kvm_cpu_exec(CPUState *env)
     }

     env-exit_request = 0;
 -    cpu_single_env = NULL;
     return ret;
  }

 --
 1.7.3.4






--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v2 1/8] kvm: Set cpu_single_env only once

2012-02-11 Thread Blue Swirl

On Sat, Feb 11, 2012 at 12:43, Jan Kiszka jan.kis...@web.de wrote:
 On 2012-02-11 12:49, Andreas Färber wrote:
 Am 11.02.2012 12:25, schrieb Blue Swirl:
 I think using cpu_single_env is an indication of a problem, like poor
 code, layering violation or poor API (vmport). What is your use case?

 I couldn't spot any in this series. Jan, note that any new use of env or
 cpu_single_env will need to be redone when we convert to QOM CPU.

 cpu_single_env should have nothing to do with QOM.

 The ABIs of vmport and the KVM VAPI require a reference to the calling
 VCPU, and that's why you find tons of them in patch 5.

Yes, this seems to be another case of a badly designed ABI. I guess
there is no way to change that anymore, just like vmport?

Some of the cpu_single_env accesses in patch 5 could be avoided when
APIC is moved closer to CPU. VAPIC should be also close to APIC so it
should be able to access the CPU directly. In some other cases the
current state could be passed around instead once it is known.


 Jan

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v2 1/8] kvm: Set cpu_single_env only once

2012-02-11 Thread Blue Swirl

On Sat, Feb 11, 2012 at 14:00, Jan Kiszka jan.kis...@web.de wrote:
 On 2012-02-11 14:54, Blue Swirl wrote:
 On Sat, Feb 11, 2012 at 12:43, Jan Kiszka jan.kis...@web.de wrote:
 On 2012-02-11 12:49, Andreas Färber wrote:
 Am 11.02.2012 12:25, schrieb Blue Swirl:
 I think using cpu_single_env is an indication of a problem, like poor
 code, layering violation or poor API (vmport). What is your use case?

 I couldn't spot any in this series. Jan, note that any new use of env or
 cpu_single_env will need to be redone when we convert to QOM CPU.

 cpu_single_env should have nothing to do with QOM.

 The ABIs of vmport and the KVM VAPI require a reference to the calling
 VCPU, and that's why you find tons of them in patch 5.

 Yes, this seems to be another case of a badly designed ABI. I guess
 there is no way to change that anymore, just like vmport?

 Believe me, I grumbled over it more than once while porting it from
 qemu-kvm. The point is that some (Windows) VMs out there are running
 already with this option ROM loaded and working this unfortunate ABI.

Maybe in time those could be deprecated and a ROM using a sane ABI
introduced instead. After some grace time the old ABI could be finally
removed.


 Some of the cpu_single_env accesses in patch 5 could be avoided when
 APIC is moved closer to CPU. VAPIC should be also close to APIC so it
 should be able to access the CPU directly. In some other cases the
 current state could be passed around instead once it is known.

 Some callbacks are I/O-port originated, ie. not associated with the
 per-CPU MMIO area or some MSR. So we would have to pass down the causing
 CPU to every I/O handler - not sure if that is desired...

I meant things like vapic_enable_tpr_reporting(), current CPUState
could be passed via vapic_prepare() easily.

 Jan

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v2 2/8] Allow to use pause_all_vcpus from VCPU context

2012-02-11 Thread Blue Swirl

On Fri, Feb 10, 2012 at 18:31, Jan Kiszka jan.kis...@siemens.com wrote:
 In order to perform critical manipulations on the VM state in the
 context of a VCPU, specifically code patching, stopping and resuming of
 all VCPUs may be necessary. resume_all_vcpus is already compatible, now
 enable pause_all_vcpus for this use case by stopping the calling context
 before starting to wait for the whole gang.

 CC: Paolo Bonzini pbonz...@redhat.com
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  cpus.c |   12 
  1 files changed, 12 insertions(+), 0 deletions(-)

 diff --git a/cpus.c b/cpus.c
 index d0c8340..5adfc6b 100644
 --- a/cpus.c
 +++ b/cpus.c
 @@ -870,6 +870,18 @@ void pause_all_vcpus(void)
         penv = (CPUState *)penv-next_cpu;
     }

 +    if (!qemu_thread_is_self(io_thread)) {
 +        cpu_stop_current();
 +        if (!kvm_enabled()) {
 +            while (penv) {
 +                penv-stop = 0;
 +                penv-stopped = 1;
 +                penv = (CPUState *)penv-next_cpu;

The cast is useless, next_cpu is already CPUState *. I wonder why it
is used in other cases too.

 +            }
 +            return;
 +        }
 +    }
 +
     while (!all_vcpus_paused()) {
         qemu_cond_wait(qemu_pause_cond, qemu_global_mutex);
         penv = first_cpu;
 --
 1.7.3.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH v2 3/8] target-i386: Add infrastructure for reporting TPR MMIO accesses

2012-02-11 Thread Blue Swirl

On Fri, Feb 10, 2012 at 18:31, Jan Kiszka jan.kis...@siemens.com wrote:
 This will allow the APIC core to file a TPR access report. Depending on
 the accelerator and kernel irqchip mode, it will either be delivered
 right away or queued for later reporting.

 In TCG mode, we can restart the triggering instruction and can therefore
 forward the event directly. KVM does not allows us to restart, so we
 postpone the delivery of events recording in the user space APIC until
 the current instruction is completed.

 Note that KVM without in-kernel irqchip will report the address after
 the instruction that triggered a write access. In contrast, read
 accesses will return the precise information.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  cpu-all.h            |    3 ++-
  hw/apic.h            |    2 ++
  hw/apic_common.c     |    4 
  target-i386/cpu.h    |    9 +
  target-i386/helper.c |   19 +++
  target-i386/kvm.c    |   24 ++--
  6 files changed, 58 insertions(+), 3 deletions(-)

 diff --git a/cpu-all.h b/cpu-all.h
 index e2c3c49..80e6d42 100644
 --- a/cpu-all.h
 +++ b/cpu-all.h
 @@ -375,8 +375,9 @@ DECLARE_TLS(CPUState *,cpu_single_env);
  #define CPU_INTERRUPT_TGT_INT_0   0x0100
  #define CPU_INTERRUPT_TGT_INT_1   0x0400
  #define CPU_INTERRUPT_TGT_INT_2   0x0800
 +#define CPU_INTERRUPT_TGT_INT_3   0x2000

 -/* First unused bit: 0x2000.  */
 +/* First unused bit: 0x4000.  */

  /* The set of all bits that should be masked when single-stepping.  */
  #define CPU_INTERRUPT_SSTEP_MASK \
 diff --git a/hw/apic.h b/hw/apic.h
 index a62d83b..45598bd 100644
 --- a/hw/apic.h
 +++ b/hw/apic.h
 @@ -18,6 +18,8 @@ void cpu_set_apic_tpr(DeviceState *s, uint8_t val);
  uint8_t cpu_get_apic_tpr(DeviceState *s);
  void apic_init_reset(DeviceState *s);
  void apic_sipi(DeviceState *s);
 +void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
 +                                   int access);

  /* pc.c */
  int cpu_is_bsp(CPUState *env);
 diff --git a/hw/apic_common.c b/hw/apic_common.c
 index 8373d79..588531b 100644
 --- a/hw/apic_common.c
 +++ b/hw/apic_common.c
 @@ -68,6 +68,10 @@ uint8_t cpu_get_apic_tpr(DeviceState *d)
     return s ? s-tpr  4 : 0;
  }

 +void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip, int 
 access)
 +{
 +}
 +
  void apic_report_irq_delivered(int delivered)
  {
     apic_irq_delivered += delivered;
 diff --git a/target-i386/cpu.h b/target-i386/cpu.h
 index 37dde79..92e9c87 100644
 --- a/target-i386/cpu.h
 +++ b/target-i386/cpu.h
 @@ -482,6 +482,7 @@
  #define CPU_INTERRUPT_VIRQ      CPU_INTERRUPT_TGT_INT_0
  #define CPU_INTERRUPT_INIT      CPU_INTERRUPT_TGT_INT_1
  #define CPU_INTERRUPT_SIPI      CPU_INTERRUPT_TGT_INT_2
 +#define CPU_INTERRUPT_TPR       CPU_INTERRUPT_TGT_INT_3


  enum {
 @@ -772,6 +773,9 @@ typedef struct CPUX86State {
     XMMReg ymmh_regs[CPU_NB_REGS];

     uint64_t xcr0;
 +
 +    target_ulong tpr_access_ip;
 +    int tpr_access_type;
  } CPUX86State;

  CPUX86State *cpu_x86_init(const char *cpu_model);
 @@ -1064,4 +1068,9 @@ void svm_check_intercept(CPUState *env1, uint32_t type);

  uint32_t cpu_cc_compute_all(CPUState *env1, int op);

 +#define TPR_ACCESS_READ     0
 +#define TPR_ACCESS_WRITE    1

enum would be nicer.

 +
 +void cpu_report_tpr_access(CPUState *env, int access);
 +
  #endif /* CPU_I386_H */
 diff --git a/target-i386/helper.c b/target-i386/helper.c
 index 2586aff..eca20cd 100644
 --- a/target-i386/helper.c
 +++ b/target-i386/helper.c
 @@ -1189,6 +1189,25 @@ void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, 
 int bank,
         }
     }
  }
 +
 +void cpu_report_tpr_access(CPUState *env, int access)
 +{
 +    TranslationBlock *tb;
 +
 +    if (kvm_enabled()) {
 +        cpu_synchronize_state(env);
 +
 +        env-tpr_access_ip = env-eip;
 +        env-tpr_access_type = access;
 +
 +        cpu_interrupt(env, CPU_INTERRUPT_TPR);
 +    } else {
 +        tb = tb_find_pc(env-mem_io_pc);
 +        cpu_restore_state(tb, env, env-mem_io_pc);
 +
 +        apic_handle_tpr_access_report(env-apic_state, env-eip, access);
 +    }
 +}
  #endif /* !CONFIG_USER_ONLY */

  static void mce_init(CPUX86State *cenv)
 diff --git a/target-i386/kvm.c b/target-i386/kvm.c
 index 981192d..fa77f9d 100644
 --- a/target-i386/kvm.c
 +++ b/target-i386/kvm.c
 @@ -1635,8 +1635,10 @@ void kvm_arch_pre_run(CPUState *env, struct kvm_run 
 *run)
     }

     if (!kvm_irqchip_in_kernel()) {
 -        /* Force the VCPU out of its inner loop to process the INIT request 
 */
 -        if (env-interrupt_request  CPU_INTERRUPT_INIT) {
 +        /* Force the VCPU out of its inner loop to process any INIT requests
 +         * or pending TPR access reports. */
 +        if (env-interrupt_request 
 +            (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
             env-exit_request = 1;
         }

 @@ -1730,6 +1732,11 @@ int kvm_arch_process_async_events(CPUState *env)

Re: [Qemu-devel] [PATCH v2 5/8] kvmvapic: Introduce TPR access optimization for Windows guests

2012-02-11 Thread Blue Swirl

On Fri, Feb 10, 2012 at 18:31, Jan Kiszka jan.kis...@siemens.com wrote:
 This enables acceleration for MMIO-based TPR registers accesses of
 32-bit Windows guest systems. It is mostly useful with KVM enabled,
 either on older Intel CPUs (without flexpriority feature, can also be
 manually disabled for testing) or any current AMD processor.

 The approach introduced here is derived from the original version of
 qemu-kvm. It was refactored, documented, and extended by support for
 user space APIC emulation, both with and without KVM acceleration. The
 VMState format was kept compatible, so was the ABI to the option ROM
 that implements the guest-side para-virtualized driver service. This
 enables seamless migration from qemu-kvm to upstream or, one day,
 between KVM and TCG mode.

 The basic concept goes like this:
  - VAPIC PV interface consisting of I/O port 0x7e and (for KVM in-kernel
   irqchip) a vmcall hypercall is registered
  - VAPIC option ROM is loaded into guest
  - option ROM activates TPR MMIO access reporting via port 0x7e
  - TPR accesses are trapped and patched in the guest to call into option
   ROM instead, VAPIC support is enabled
  - option ROM TPR helpers track state in memory and invoke hypercall to
   poll for pending IRQs if required

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com

I must say that I find the approach horrible, patching guests and ROMs
and looking up Windows internals. Taking the same approach to extreme,
we could for example patch Xen guest to become a KVM guest. Not that I
object merging.

 ---
  Makefile.target    |    3 +-
  hw/apic.c          |  126 -
  hw/apic_common.c   |   64 +-
  hw/apic_internal.h |   27 ++
  hw/kvm/apic.c      |   32 +++
  hw/kvmvapic.c      |  774 
 
  6 files changed, 1012 insertions(+), 14 deletions(-)
  create mode 100644 hw/kvmvapic.c

 diff --git a/Makefile.target b/Makefile.target
 index 68481a3..ec7eff8 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -230,7 +230,8 @@ obj-y += device-hotplug.o

  # Hardware support
  obj-i386-y += mc146818rtc.o pc.o
 -obj-i386-y += sga.o apic_common.o apic.o ioapic_common.o ioapic.o piix_pci.o
 +obj-i386-y += apic_common.o apic.o kvmvapic.o
 +obj-i386-y += sga.o ioapic_common.o ioapic.o piix_pci.o
  obj-i386-y += vmport.o
  obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o
  obj-i386-y += debugcon.o multiboot.o
 diff --git a/hw/apic.c b/hw/apic.c
 index 086c544..2ebf3ca 100644
 --- a/hw/apic.c
 +++ b/hw/apic.c
 @@ -35,6 +35,10 @@
  #define MSI_ADDR_DEST_ID_SHIFT         12
  #define        MSI_ADDR_DEST_ID_MASK           0x000

 +#define SYNC_FROM_VAPIC                 0x1
 +#define SYNC_TO_VAPIC                   0x2
 +#define SYNC_ISR_IRR_TO_VAPIC           0x4

Enum, please.

 +
  static APICCommonState *local_apics[MAX_APICS + 1];

  static void apic_set_irq(APICCommonState *s, int vector_num, int 
 trigger_mode);
 @@ -78,6 +82,70 @@ static inline int get_bit(uint32_t *tab, int index)
     return !!(tab[i]  mask);
  }

 +/* return -1 if no bit is set */
 +static int get_highest_priority_int(uint32_t *tab)
 +{
 +    int i;
 +    for (i = 7; i = 0; i--) {
 +        if (tab[i] != 0) {
 +            return i * 32 + fls_bit(tab[i]);
 +        }
 +    }
 +    return -1;
 +}
 +
 +static void apic_sync_vapic(APICCommonState *s, int sync_type)
 +{
 +    VAPICState vapic_state;
 +    size_t length;
 +    off_t start;
 +    int vector;
 +
 +    if (!s-vapic_paddr) {
 +        return;
 +    }
 +    if (sync_type  SYNC_FROM_VAPIC) {
 +        cpu_physical_memory_rw(s-vapic_paddr, (void *)vapic_state,
 +                               sizeof(vapic_state), 0);
 +        s-tpr = vapic_state.tpr;
 +    }
 +    if (sync_type  (SYNC_TO_VAPIC | SYNC_ISR_IRR_TO_VAPIC)) {
 +        start = offsetof(VAPICState, isr);
 +        length = offsetof(VAPICState, enabled) - offsetof(VAPICState, isr);
 +
 +        if (sync_type  SYNC_TO_VAPIC) {
 +            assert(qemu_cpu_is_self(s-cpu_env));
 +
 +            vapic_state.tpr = s-tpr;
 +            vapic_state.enabled = 1;
 +            start = 0;
 +            length = sizeof(VAPICState);
 +        }
 +
 +        vector = get_highest_priority_int(s-isr);
 +        if (vector  0) {
 +            vector = 0;
 +        }
 +        vapic_state.isr = vector  0xf0;
 +
 +        vapic_state.zero = 0;
 +
 +        vector = get_highest_priority_int(s-irr);
 +        if (vector  0) {
 +            vector = 0;
 +        }
 +        vapic_state.irr = vector  0xff;
 +
 +        cpu_physical_memory_write_rom(s-vapic_paddr + start,
 +                                      ((void *)vapic_state) + start, 
 length);

This assumes that the vapic_state structure matches guest what guest
expect without conversion. Is this true for i386 on x86_64? I didn't
check the structure in question.

 +    }
 +}
 +
 +static void apic_vapic_base_update(APICCommonState *s)
 +{
 +    apic_sync_vapic(s, SYNC_TO_VAPIC);
 +}
 +
  static

Re: [PATCH v4 12/15] kvm: x86: Add user space part for in-kernel APIC

2011-12-10 Thread Blue Swirl

On Fri, Dec 9, 2011 at 07:52, Jan Kiszka jan.kis...@siemens.com wrote:
 On 2011-12-09 08:45, Jan Kiszka wrote:
 On 2011-12-08 22:16, Blue Swirl wrote:
 On Thu, Dec 8, 2011 at 11:52, Jan Kiszka jan.kis...@siemens.com wrote:
 This introduces the alternative APIC backend which makes use of KVM's
 in-kernel device model. External NMI injection via LINT1 is emulated by
 checking the current state of the in-kernel APIC, only injecting a NMI
 into the VCPU if LINT1 is unmasked and configured to DM_NMI.

 MSI is not yet supported, so we disable this when the in-kernel model is
 in use.

 CC: Lai Jiangshan la...@cn.fujitsu.com
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  Makefile.target   |    2 +-
  hw/kvm/apic.c     |  154 
 +
  hw/pc.c           |   15 --
  kvm.h             |    3 +
  target-i386/kvm.c |    8 +++
  5 files changed, 176 insertions(+), 6 deletions(-)
  create mode 100644 hw/kvm/apic.c

 diff --git a/Makefile.target b/Makefile.target
 index b549988..76de485 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
  obj-i386-y += debugcon.o multiboot.o
  obj-i386-y += pc_piix.o
 -obj-i386-$(CONFIG_KVM) += kvm/clock.o
 +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o

  # shared objects
 diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
 new file mode 100644
 index 000..3924f9e
 --- /dev/null
 +++ b/hw/kvm/apic.c
 @@ -0,0 +1,154 @@
 +/*
 + * KVM in-kernel APIC support
 + *
 + * Copyright (c) 2011 Siemens AG
 + *
 + * Authors:
 + *  Jan Kiszka          jan.kis...@siemens.com
 + *
 + * This work is licensed under the terms of the GNU GPL version 2.
 + * See the COPYING file in the top-level directory.
 + */
 +#include hw/apic_internal.h
 +#include kvm.h
 +
 +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
 +                                   int reg_id, uint32_t val)
 +{
 +    *((uint32_t *)(kapic-regs + (reg_id  4))) = val;
 +}
 +
 +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
 +                                       int reg_id)
 +{
 +    return *((uint32_t *)(kapic-regs + (reg_id  4)));
 +}
 +
 +int kvm_put_apic(CPUState *env)
 +{
 +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env-apic_state);

 Please pass APICState instead of CPUState.

 DeviceState, I suppose. Yes, makes more sense, update will follow.

 On second look: no, I'll keep it as is. All kvm_get/put_* helpers have
 this kind of signature, i.e. are working against env.

There's kvm_get_supported_msrs for example.

 kvm_get/put_apic
 just happens to be implemented outside of target-i386/kvm.c. And they
 require both APIC and CPUState anyway, so it makes no difference.

It does, passing CPUState violates layering. Please split the
functions so that the ioctl calls which need CPUState go to kvm.c. For
example, the functions in kvm/apic.c could just perform copying from
kvm_lapic_state fields to APICstate fields and vice versa.

The KVM interface by the way does not look so clever. Why isn't there
just an array of 32 bit fields so the casts can be avoided? Perhaps
APICState should be (later) changed to match KVM version so that the
structure can be passed directly without copying.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] i8254: Rework fix interaction with HPET in legacy mode

2011-12-10 Thread Blue Swirl

On Sat, Dec 10, 2011 at 12:28, Jan Kiszka jan.kis...@web.de wrote:
 From: Jan Kiszka jan.kis...@siemens.com

 When the HPET enters legacy mode, the IRQ output of the PIT is
 suppressed and replaced by the HPET timer 0. But the current code to
 emulate this was broken in many ways. It reset the PIT state after
 re-enabling, it worked against a stale static PIT structure, and it did
 not properly saved/restored the IRQ output mask in the PIT vmstate.

 This patch solves the PIT IRQ control in a different way. On x86, it
 both redirects the PIT IRQ to the HPET, just like the RTC. But it also
 keeps the control line from the HPET to the PIT. This allows to disable
 the PIT QEMU timer when it is not needed. The PIT's view on the control
 line state is now saved in the same format that qemu-kvm is already
 using.

 Note that, in contrast to the suppressed RTC IRQ line, we do not need to
 save/restore the PIT line state in the HPET. As we trigger a PIT IRQ
 update via the control line, the line state is reconstructed on mode
 switch.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  hw/alpha_dp264.c   |    2 +-
  hw/hpet.c          |   38 +---
  hw/hpet_emul.h     |    3 ++
  hw/i8254.c         |   60 +--
  hw/mips_fulong2e.c |    2 +-
  hw/mips_jazz.c     |    2 +-
  hw/mips_malta.c    |    2 +-
  hw/mips_r4k.c      |    2 +-
  hw/pc.c            |   13 --
  hw/pc.h            |   13 +--
  hw/ppc_prep.c      |    2 +-
  11 files changed, 74 insertions(+), 65 deletions(-)

 diff --git a/hw/alpha_dp264.c b/hw/alpha_dp264.c
 index fcc20e9..412ccf0 100644
 --- a/hw/alpha_dp264.c
 +++ b/hw/alpha_dp264.c
 @@ -70,7 +70,7 @@ static void clipper_init(ram_addr_t ram_size,
     pci_bus = typhoon_init(ram_size, rtc_irq, cpus, clipper_pci_map_irq);

     rtc_init(1980, rtc_irq);
 -    pit_init(0x40, 0);
 +    pit_init(0x40, isa_get_irq(0));
     isa_create_simple(i8042);

     /* VGA setup.  Don't bother loading the bios.  */
 diff --git a/hw/hpet.c b/hw/hpet.c
 index 1b64e6a..ace0b1d 100644
 --- a/hw/hpet.c
 +++ b/hw/hpet.c
 @@ -64,6 +64,7 @@ typedef struct HPETState {
     qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
     uint32_t flags;
     uint8_t rtc_irq_level;
 +    qemu_irq pit_enabled;
     uint8_t num_timers;
     HPETTimer timer[HPET_MAX_TIMERS];

 @@ -572,12 +573,15 @@ static void hpet_ram_write(void *opaque, 
 target_phys_addr_t addr,
                     hpet_del_timer(s-timer[i]);
                 }
             }
 -            /* i8254 and RTC are disabled when HPET is in legacy mode */
 +            /* i8254 and RTC output pins are disabled
 +             * when HPET is in legacy mode */
             if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
 -                hpet_pit_disable();
 +                qemu_set_irq(s-pit_enabled, 0);
 +                qemu_irq_lower(s-irqs[0]);
                 qemu_irq_lower(s-irqs[RTC_ISA_IRQ]);
             } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
 -                hpet_pit_enable();
 +                qemu_irq_lower(s-irqs[0]);
 +                qemu_set_irq(s-pit_enabled, 1);
                 qemu_set_irq(s-irqs[RTC_ISA_IRQ], s-rtc_irq_level);
             }
             break;
 @@ -631,7 +635,6 @@ static void hpet_reset(DeviceState *d)
  {
     HPETState *s = FROM_SYSBUS(HPETState, sysbus_from_qdev(d));
     int i;
 -    static int count = 0;

     for (i = 0; i  s-num_timers; i++) {
         HPETTimer *timer = s-timer[i];
 @@ -648,29 +651,27 @@ static void hpet_reset(DeviceState *d)
         timer-wrap_flag = 0;
     }

 +    qemu_set_irq(s-pit_enabled, 1);
     s-hpet_counter = 0ULL;
     s-hpet_offset = 0ULL;
     s-config = 0ULL;
 -    if (count  0) {
 -        /* we don't enable pit when hpet_reset is first called (by hpet_init)
 -         * because hpet is taking over for pit here. On subsequent 
 invocations,
 -         * hpet_reset is called due to system reset. At this point control 
 must
 -         * be returned to pit until SW reenables hpet.
 -         */
 -        hpet_pit_enable();
 -    }
     hpet_cfg.hpet[s-hpet_id].event_timer_block_id = (uint32_t)s-capability;
     hpet_cfg.hpet[s-hpet_id].address = sysbus_from_qdev(d)-mmio[0].addr;
 -    count = 1;
  }

 -static void hpet_handle_rtc_irq(void *opaque, int n, int level)
 +static void hpet_handle_legacy_irq(void *opaque, int n, int level)
  {
     HPETState *s = FROM_SYSBUS(HPETState, opaque);

 -    s-rtc_irq_level = level;
 -    if (!hpet_in_legacy_mode(s)) {
 -        qemu_set_irq(s-irqs[RTC_ISA_IRQ], level);
 +    if (n == HPET_LEGACY_PIT_INT) {
 +        if (!hpet_in_legacy_mode(s)) {
 +            qemu_set_irq(s-irqs[0], level);
 +        }
 +    } else {
 +        s-rtc_irq_level = level;
 +        if (!hpet_in_legacy_mode(s)) {
 +            qemu_set_irq(s-irqs[RTC_ISA_IRQ], level);
 +        }
     }
  }

 @@ -713,7 +714,8 @@ static int hpet_init(SysBusDevice *dev)

Re: [PATCH 0/2] pit/hpet: Fix legacy mode switching

2011-12-10 Thread Blue Swirl

On Sat, Dec 10, 2011 at 12:28, Jan Kiszka jan.kis...@web.de wrote:
 This is a small preparatory series to allow the introduction of the KVM
 in-kernel PIT. Of course, it is also a fix for the various bugs in the
 related PIT/HPET code. See patches for details.

 Jan Kiszka (2):
  hpet: Save/restore cached RTC IRQ level
  i8254: Rework  fix interaction with HPET in legacy mode

I had one comment to this patch.

Otherwise nice cleanups, I think this logic matches real PIT/HPET
routing better.

  hw/alpha_dp264.c   |    2 +-
  hw/hpet.c          |   64 +--
  hw/hpet_emul.h     |    3 ++
  hw/i8254.c         |   60 +++-
  hw/mips_fulong2e.c |    2 +-
  hw/mips_jazz.c     |    2 +-
  hw/mips_malta.c    |    2 +-
  hw/mips_r4k.c      |    2 +-
  hw/pc.c            |   13 --
  hw/pc.h            |   13 +-
  hw/ppc_prep.c      |    2 +-
  11 files changed, 100 insertions(+), 65 deletions(-)

 --
 1.7.3.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] i8254: Rework fix interaction with HPET in legacy mode

2011-12-10 Thread Blue Swirl

On Sat, Dec 10, 2011 at 15:51, Jan Kiszka jan.kis...@web.de wrote:
 On 2011-12-10 16:49, Blue Swirl wrote:

 +ISADevice *pit_init(int base, qemu_irq irq)

 Please retain this function in pc.h, or even better, introduce i8254.h.

 No concerns about i8254.h, but this function does not qualify for static
 inline.

The function is static inline in a header file not for performance
reasons, but to keep the instantiation separate from device internals.


 +{
 +    ISADevice *dev;
 +
 +    dev = isa_create(isa-pit);
 +    qdev_prop_set_uint32(dev-qdev, iobase, base);
 +    qdev_init_nofail(dev-qdev);
 +    qdev_connect_gpio_out(dev-qdev, 0, irq);
 +
 +    return dev;
 +}
 +

 Jan

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v4 12/15] kvm: x86: Add user space part for in-kernel APIC

2011-12-10 Thread Blue Swirl

On Sat, Dec 10, 2011 at 15:58, Jan Kiszka jan.kis...@web.de wrote:
 On 2011-12-10 16:40, Blue Swirl wrote:
 On Fri, Dec 9, 2011 at 07:52, Jan Kiszka jan.kis...@siemens.com wrote:
 On 2011-12-09 08:45, Jan Kiszka wrote:
 On 2011-12-08 22:16, Blue Swirl wrote:
 On Thu, Dec 8, 2011 at 11:52, Jan Kiszka jan.kis...@siemens.com wrote:
 This introduces the alternative APIC backend which makes use of KVM's
 in-kernel device model. External NMI injection via LINT1 is emulated by
 checking the current state of the in-kernel APIC, only injecting a NMI
 into the VCPU if LINT1 is unmasked and configured to DM_NMI.

 MSI is not yet supported, so we disable this when the in-kernel model is
 in use.

 CC: Lai Jiangshan la...@cn.fujitsu.com
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  Makefile.target   |    2 +-
  hw/kvm/apic.c     |  154 
 +
  hw/pc.c           |   15 --
  kvm.h             |    3 +
  target-i386/kvm.c |    8 +++
  5 files changed, 176 insertions(+), 6 deletions(-)
  create mode 100644 hw/kvm/apic.c

 diff --git a/Makefile.target b/Makefile.target
 index b549988..76de485 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
  obj-i386-y += debugcon.o multiboot.o
  obj-i386-y += pc_piix.o
 -obj-i386-$(CONFIG_KVM) += kvm/clock.o
 +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o

  # shared objects
 diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
 new file mode 100644
 index 000..3924f9e
 --- /dev/null
 +++ b/hw/kvm/apic.c
 @@ -0,0 +1,154 @@
 +/*
 + * KVM in-kernel APIC support
 + *
 + * Copyright (c) 2011 Siemens AG
 + *
 + * Authors:
 + *  Jan Kiszka          jan.kis...@siemens.com
 + *
 + * This work is licensed under the terms of the GNU GPL version 2.
 + * See the COPYING file in the top-level directory.
 + */
 +#include hw/apic_internal.h
 +#include kvm.h
 +
 +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
 +                                   int reg_id, uint32_t val)
 +{
 +    *((uint32_t *)(kapic-regs + (reg_id  4))) = val;
 +}
 +
 +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
 +                                       int reg_id)
 +{
 +    return *((uint32_t *)(kapic-regs + (reg_id  4)));
 +}
 +
 +int kvm_put_apic(CPUState *env)
 +{
 +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env-apic_state);

 Please pass APICState instead of CPUState.

 DeviceState, I suppose. Yes, makes more sense, update will follow.

 On second look: no, I'll keep it as is. All kvm_get/put_* helpers have
 this kind of signature, i.e. are working against env.

 There's kvm_get_supported_msrs for example.

 kvm_get/put_apic
 just happens to be implemented outside of target-i386/kvm.c. And they
 require both APIC and CPUState anyway, so it makes no difference.

 It does, passing CPUState violates layering. Please split the
 functions so that the ioctl calls which need CPUState go to kvm.c. For
 example, the functions in kvm/apic.c could just perform copying from
 kvm_lapic_state fields to APICstate fields and vice versa.

 That's a good idea.


 The KVM interface by the way does not look so clever. Why isn't there
 just an array of 32 bit fields so the casts can be avoided? Perhaps
 APICState should be (later) changed to match KVM version so that the
 structure can be passed directly without copying.

 Wouldn't that complicate the use in the user space model again? At least
 for registers that are used with both backends.

Well, we have (at least) two styles how to model devices.

In the first one, the device state structure contains an array of
registers, so the functions which use them may need for example to
perform some bit field extraction to get what they need.

In the model used by APIC and others, the structure contains cooked
values, for example divide_count and count_shift in APICState. This
means that the CPU accesses get slightly slower since the fields need
to be packed and unpacked but the other functions may be faster.

Which one is better depends on frequency and importance of register
accesses by CPU vs. other accesses. But it shouldn't complicate that
much either way. Actually design choices like this may have been taken
without too much consideration.

Alternatively, KVM interface could be changed to take QEMU structure
directly, but I don't suppose that would be a good idea. It would be
easier for everyone if QEMU changed instead.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] i8254: Rework fix interaction with HPET in legacy mode

2011-12-10 Thread Blue Swirl

On Sat, Dec 10, 2011 at 16:03, Jan Kiszka jan.kis...@web.de wrote:
 On 2011-12-10 16:54, Blue Swirl wrote:
 On Sat, Dec 10, 2011 at 15:51, Jan Kiszka jan.kis...@web.de wrote:
 On 2011-12-10 16:49, Blue Swirl wrote:

 +ISADevice *pit_init(int base, qemu_irq irq)

 Please retain this function in pc.h, or even better, introduce i8254.h.

 No concerns about i8254.h, but this function does not qualify for static
 inline.

 The function is static inline in a header file not for performance
 reasons, but to keep the instantiation separate from device internals.

 Not performance, footprint and header dependencies. You need to pull in
 all the stuff the inline function needs for everyone including the
 header that contains this function. That's messy.

There's only ISA and qdev stuff, that's not messy since both are
needed in any case.

 Even if the instantiation helper should not poke into the device model
 internals (and I don't want this to change as well), it belongs to the
 module that implements the device. We do the same with other fabric
 functions.

In this case, the callers have the same needs and there are several of
them. In general this need not be true at all, if for example some
part of instantiation would have to be skipped, the functions may need
to be manually inlined to the board level anyway. The instantiation
definitely does not belong to the implementer but to the creator.
Ideally file implementing the device contains only static functions
and instantiation is either in a header file or at the board. This is
true for example for several Sparc32 devices.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] i8254: Rework fix interaction with HPET in legacy mode

2011-12-10 Thread Blue Swirl

On Sat, Dec 10, 2011 at 16:29, Jan Kiszka jan.kis...@web.de wrote:
 On 2011-12-10 17:26, Blue Swirl wrote:
 On Sat, Dec 10, 2011 at 16:03, Jan Kiszka jan.kis...@web.de wrote:
 On 2011-12-10 16:54, Blue Swirl wrote:
 On Sat, Dec 10, 2011 at 15:51, Jan Kiszka jan.kis...@web.de wrote:
 On 2011-12-10 16:49, Blue Swirl wrote:

 +ISADevice *pit_init(int base, qemu_irq irq)

 Please retain this function in pc.h, or even better, introduce i8254.h.

 No concerns about i8254.h, but this function does not qualify for static
 inline.

 The function is static inline in a header file not for performance
 reasons, but to keep the instantiation separate from device internals.

 Not performance, footprint and header dependencies. You need to pull in
 all the stuff the inline function needs for everyone including the
 header that contains this function. That's messy.

 There's only ISA and qdev stuff, that's not messy since both are
 needed in any case.

 Even if the instantiation helper should not poke into the device model
 internals (and I don't want this to change as well), it belongs to the
 module that implements the device. We do the same with other fabric
 functions.

 In this case, the callers have the same needs and there are several of
 them. In general this need not be true at all, if for example some
 part of instantiation would have to be skipped, the functions may need
 to be manually inlined to the board level anyway. The instantiation
 definitely does not belong to the implementer but to the creator.
 Ideally file implementing the device contains only static functions
 and instantiation is either in a header file or at the board. This is
 true for example for several Sparc32 devices.

 The helper is wrapping the property base API into a proper function call
 - nothing that is board-specific.

Not in this case, but in general boards could need to pass different
sets of properties or avoid passing something at all.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v4 12/15] kvm: x86: Add user space part for in-kernel APIC

2011-12-08 Thread Blue Swirl

On Thu, Dec 8, 2011 at 11:52, Jan Kiszka jan.kis...@siemens.com wrote:
 This introduces the alternative APIC backend which makes use of KVM's
 in-kernel device model. External NMI injection via LINT1 is emulated by
 checking the current state of the in-kernel APIC, only injecting a NMI
 into the VCPU if LINT1 is unmasked and configured to DM_NMI.

 MSI is not yet supported, so we disable this when the in-kernel model is
 in use.

 CC: Lai Jiangshan la...@cn.fujitsu.com
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  Makefile.target   |    2 +-
  hw/kvm/apic.c     |  154 
 +
  hw/pc.c           |   15 --
  kvm.h             |    3 +
  target-i386/kvm.c |    8 +++
  5 files changed, 176 insertions(+), 6 deletions(-)
  create mode 100644 hw/kvm/apic.c

 diff --git a/Makefile.target b/Makefile.target
 index b549988..76de485 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
  obj-i386-y += debugcon.o multiboot.o
  obj-i386-y += pc_piix.o
 -obj-i386-$(CONFIG_KVM) += kvm/clock.o
 +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o

  # shared objects
 diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
 new file mode 100644
 index 000..3924f9e
 --- /dev/null
 +++ b/hw/kvm/apic.c
 @@ -0,0 +1,154 @@
 +/*
 + * KVM in-kernel APIC support
 + *
 + * Copyright (c) 2011 Siemens AG
 + *
 + * Authors:
 + *  Jan Kiszka          jan.kis...@siemens.com
 + *
 + * This work is licensed under the terms of the GNU GPL version 2.
 + * See the COPYING file in the top-level directory.
 + */
 +#include hw/apic_internal.h
 +#include kvm.h
 +
 +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
 +                                   int reg_id, uint32_t val)
 +{
 +    *((uint32_t *)(kapic-regs + (reg_id  4))) = val;
 +}
 +
 +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
 +                                       int reg_id)
 +{
 +    return *((uint32_t *)(kapic-regs + (reg_id  4)));
 +}
 +
 +int kvm_put_apic(CPUState *env)
 +{
 +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env-apic_state);

Please pass APICState instead of CPUState.

 +    struct kvm_lapic_state kapic;
 +    int i;
 +
 +    if (s  kvm_enabled()  kvm_irqchip_in_kernel()) {
 +        memset(kapic, 0, sizeof(kapic));
 +        kvm_apic_set_reg(kapic, 0x2, s-id  24);
 +        kvm_apic_set_reg(kapic, 0x8, s-tpr);
 +        kvm_apic_set_reg(kapic, 0xd, s-log_dest  24);
 +        kvm_apic_set_reg(kapic, 0xe, s-dest_mode  28 | 0x0fff);
 +        kvm_apic_set_reg(kapic, 0xf, s-spurious_vec);
 +        for (i = 0; i  8; i++) {
 +            kvm_apic_set_reg(kapic, 0x10 + i, s-isr[i]);
 +            kvm_apic_set_reg(kapic, 0x18 + i, s-tmr[i]);
 +            kvm_apic_set_reg(kapic, 0x20 + i, s-irr[i]);
 +        }
 +        kvm_apic_set_reg(kapic, 0x28, s-esr);
 +        kvm_apic_set_reg(kapic, 0x30, s-icr[0]);
 +        kvm_apic_set_reg(kapic, 0x31, s-icr[1]);
 +        for (i = 0; i  APIC_LVT_NB; i++) {
 +            kvm_apic_set_reg(kapic, 0x32 + i, s-lvt[i]);
 +        }
 +        kvm_apic_set_reg(kapic, 0x38, s-initial_count);
 +        kvm_apic_set_reg(kapic, 0x3e, s-divide_conf);
 +
 +        return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, kapic);
 +    }
 +
 +    return 0;
 +}
 +
 +int kvm_get_apic(CPUState *env)

Same here.

 +{
 +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env-apic_state);
 +    struct kvm_lapic_state kapic;
 +    int ret, i, v;
 +
 +    if (s  kvm_enabled()  kvm_irqchip_in_kernel()) {
 +        ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, kapic);
 +        if (ret  0) {
 +            return ret;
 +        }
 +
 +        s-id = kvm_apic_get_reg(kapic, 0x2)  24;
 +        s-tpr = kvm_apic_get_reg(kapic, 0x8);
 +        s-arb_id = kvm_apic_get_reg(kapic, 0x9);
 +        s-log_dest = kvm_apic_get_reg(kapic, 0xd)  24;
 +        s-dest_mode = kvm_apic_get_reg(kapic, 0xe)  28;
 +        s-spurious_vec = kvm_apic_get_reg(kapic, 0xf);
 +        for (i = 0; i  8; i++) {
 +            s-isr[i] = kvm_apic_get_reg(kapic, 0x10 + i);
 +            s-tmr[i] = kvm_apic_get_reg(kapic, 0x18 + i);
 +            s-irr[i] = kvm_apic_get_reg(kapic, 0x20 + i);
 +        }
 +        s-esr = kvm_apic_get_reg(kapic, 0x28);
 +        s-icr[0] = kvm_apic_get_reg(kapic, 0x30);
 +        s-icr[1] = kvm_apic_get_reg(kapic, 0x31);
 +        for (i = 0; i  APIC_LVT_NB; i++) {
 +            s-lvt[i] = kvm_apic_get_reg(kapic, 0x32 + i);
 +        }
 +        s-initial_count = kvm_apic_get_reg(kapic, 0x38);
 +        s-divide_conf = kvm_apic_get_reg(kapic, 0x3e);
 +
 +        v = (s-divide_conf  3) | ((s-divide_conf  1)  4);
 +        s-count_shift = (v + 1)  7;
 +
 +        s-initial_count_load_time = qemu_get_clock_ns(vm_clock);
 +        apic_next_timer(s, s-initial_count_load_time);
 +    }
 +    return 0;

Re: [PATCH v4 00/15] uq/master: Introduce basic irqchip support

2011-12-08 Thread Blue Swirl

On Thu, Dec 8, 2011 at 11:52, Jan Kiszka jan.kis...@siemens.com wrote:
 Changes in v4:
 - rebased of current uq/master
 - fixed stupid bugs that broke bisectability and user space irqchip mode
 - integrated NMI-over-LINT1 injection logic

I had comments to one patch, others look fine.

Overall, string based subtype selection does not somehow seem to be a
hot idea, but this could be used as a starting point which should be
cleaned up later when we have proper device composition. APIC and x86
interrupt handling need more cleanup anyway.

 CC: Lai Jiangshan la...@cn.fujitsu.com

 Jan Kiszka (15):
  msi: Generalize msix_supported to msi_supported
  kvm: Move kvmclock into hw/kvm folder
  apic: Stop timer on reset
  apic: Inject external NMI events via LINT1
  apic: Introduce backend/frontend infrastructure for KVM reuse
  apic: Open-code timer save/restore
  i8259: Introduce backend/frontend infrastructure for KVM reuse
  ioapic: Introduce backend/frontend infrastructure for KVM reuse
  memory: Introduce memory_region_init_reservation
  kvm: Introduce core services for in-kernel irqchip support
  kvm: x86: Establish IRQ0 override control
  kvm: x86: Add user space part for in-kernel APIC
  kvm: x86: Add user space part for in-kernel i8259
  kvm: x86: Add user space part for in-kernel IOAPIC
  kvm: Arm in-kernel irqchip support

  Makefile.objs                  |    2 +-
  Makefile.target                |    6 +-
  configure                      |    1 +
  hw/apic.c                      |  309 ---
  hw/apic.h                      |    1 +
  hw/apic_common.c               |  312 
 
  hw/apic_internal.h             |  122 
  hw/i8259.c                     |  127 ++--
  hw/i8259_common.c              |  173 ++
  hw/i8259_internal.h            |   82 +++
  hw/ioapic.c                    |  130 ++---
  hw/ioapic_common.c             |  138 ++
  hw/ioapic_internal.h           |  106 ++
  hw/kvm/apic.c                  |  154 
  hw/{kvmclock.c = kvm/clock.c} |    4 +-
  hw/{kvmclock.h = kvm/clock.h} |    0
  hw/kvm/i8259.c                 |  126 
  hw/kvm/ioapic.c                |  101 +
  hw/msi.c                       |    8 +
  hw/msi.h                       |    2 +
  hw/msix.c                      |    9 +-
  hw/msix.h                      |    2 -
  hw/pc.c                        |   19 ++-
  hw/pc.h                        |    1 +
  hw/pc_piix.c                   |   66 -
  kvm-all.c                      |  154 
  kvm-stub.c                     |    5 +
  kvm.h                          |   13 ++
  memory.c                       |   36 +
  memory.h                       |   16 ++
  monitor.c                      |    6 +-
  qemu-config.c                  |    4 +
  qemu-options.hx                |    5 +-
  sysemu.h                       |    1 -
  target-i386/kvm.c              |   19 +++
  trace-events                   |    2 +-
  vl.c                           |    1 -
  37 files changed, 1724 insertions(+), 539 deletions(-)
  create mode 100644 hw/apic_common.c
  create mode 100644 hw/apic_internal.h
  create mode 100644 hw/i8259_common.c
  create mode 100644 hw/i8259_internal.h
  create mode 100644 hw/ioapic_common.c
  create mode 100644 hw/ioapic_internal.h
  create mode 100644 hw/kvm/apic.c
  rename hw/{kvmclock.c = kvm/clock.c} (98%)
  rename hw/{kvmclock.h = kvm/clock.h} (100%)
  create mode 100644 hw/kvm/i8259.c
  create mode 100644 hw/kvm/ioapic.c

 --
 1.7.3.4

Re: [RFC][PATCH 14/16] kvm: x86: Add user space part for in-kernel i8259

2011-12-04 Thread Blue Swirl

On Sun, Dec 4, 2011 at 16:35, Avi Kivity a...@redhat.com wrote:
 On 12/04/2011 05:19 PM, Jan Kiszka wrote:
 
  In the sense that kernel-apic is just an accelerated apic.  From the
  guest point of view, there's no difference, and that should be reflected
  in the device model.

 That was my goal as well: The guest should not notice the difference,
 but the admin on the host side should still be able to tell both
 internally fairly different models apart.

 This should be some attribute, not the name.

 Plus the code should be
 clearly split where there are differences and explicitly shared where
 there aren't.

 That's a good goal, yes.

I'd prefer an unified device built from a single source file if
possible. This conflicts with the build-once model though.


 
  If I'm reading an apic register, either from the guest or via a monitor
  debug interface, I shouldn't care whether it's accelerated or not.  The
  guest part already holds, of course.

 Specifically for the debug scenario, I'd prefer the clear
 differentiation by name as there can always remain subtle differences in
 the implementation of kernel vs. user space. Someone debugging the guest
 and/or qemu/kvm should remain aware of this.

 Aware, yes, but the name change is too drastic.

It should be also possible to migrate from non-KVM device to KVM
version, different names would prevent that for ever.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH] KVM: Add wrapper script around QEMU to test kernels

2011-08-25 Thread Blue Swirl

On Wed, Aug 24, 2011 at 9:38 PM, Alexander Graf ag...@suse.de wrote:
 On LinuxCon I had a nice chat with Linus on what he thinks kvm-tool
 would be doing and what he expects from it. Basically he wants a
 small and simple tool he and other developers can run to try out and
 see if the kernel they just built actually works.

 Fortunately, QEMU can do that today already! The only piece that was
 missing was the simple piece of the equation, so here is a script
 that wraps around QEMU and executes a kernel you just built.

 If you do have KVM around and are not cross-compiling, it will use
 KVM. But if you don't, you can still fall back to emulation mode and
 at least check if your kernel still does what you expect. I only
 implemented support for s390x and ppc there, but it's easily extensible
 to more platforms, as QEMU can emulate (and virtualize) pretty much
 any platform out there.

 If you don't have qemu installed, please do so before using this script. Your
 distro should provide a package for it (might even call it kvm). If not,
 just compile it from source - it's not hard!

 To quickly get going, just execute the following as user:

    $ ./Documentation/run-qemu.sh -r / -a init=/bin/bash

 This will drop you into a shell on your rootfs.

 Happy hacking!

 Signed-off-by: Alexander Graf ag...@suse.de

 ---

 v1 - v2:

  - fix naming of QEMU
  - use grep -q for has_config
  - support multiple -a args
  - spawn gdb on execution
  - pass through qemu options
  - dont use qemu-system-x86_64 on i386
  - add funny sentence to startup text
  - more helpful error messages
 ---
  scripts/run-qemu.sh |  334 
 +++
  1 files changed, 334 insertions(+), 0 deletions(-)
  create mode 100755 scripts/run-qemu.sh

 diff --git a/scripts/run-qemu.sh b/scripts/run-qemu.sh
 new file mode 100755
 index 000..5d4e185
 --- /dev/null
 +++ b/scripts/run-qemu.sh
 @@ -0,0 +1,334 @@
 +#!/bin/bash
 +#
 +# QEMU Launcher
 +#
 +# This script enables simple use of the KVM and QEMU tool stack for
 +# easy kernel testing. It allows to pass either a host directory to
 +# the guest or a disk image. Example usage:
 +#
 +# Run the host root fs inside a VM:
 +#
 +# $ ./scripts/run-qemu.sh -r /
 +#
 +# Run the same with SDL:
 +#
 +# $ ./scripts/run-qemu.sh -r / --sdl
 +#
 +# Or with a PPC build:
 +#
 +# $ ARCH=ppc ./scripts/run-qemu.sh -r /
 +#
 +# PPC with a mac99 model by passing options to QEMU:
 +#
 +# $ ARCH=ppc ./scripts/run-qemu.sh -r / -- -M mac99
 +#
 +
 +USE_SDL=
 +USE_VNC=
 +USE_GDB=1
 +KERNEL_BIN=arch/x86/boot/bzImage
 +MON_STDIO=
 +KERNEL_APPEND2=
 +SERIAL=ttyS0
 +SERIAL_KCONFIG=SERIAL_8250
 +BASENAME=$(basename $0)
 +
 +function usage() {
 +       echo 
 +$BASENAME allows you to execute a virtual machine with the Linux kernel
 +that you just built. To only execute a simple VM, you can just run it
 +on your root fs with \-r / -a init=/bin/bash\
 +
 +       -a, --append parameters
 +               Append the given parameters to the kernel command line.
 +
 +       -d, --disk image
 +               Add the image file as disk into the VM.
 +
 +       -D, --no-gdb
 +               Don't run an xterm with gdb attached to the guest.
 +
 +       -r, --root directory
 +               Use the specified directory as root directory inside the 
 guest.
 +
 +       -s, --sdl
 +               Enable SDL graphical output.
 +
 +       -S, --smp cpus
 +               Set number of virtual CPUs.
 +
 +       -v, --vnc
 +               Enable VNC graphical output.
 +
 +Examples:
 +
 +       Run the host root fs inside a VM:
 +       $ ./scripts/run-qemu.sh -r /
 +
 +       Run the same with SDL:
 +       $ ./scripts/run-qemu.sh -r / --sdl
 +
 +       Or with a PPC build:
 +       $ ARCH=ppc ./scripts/run-qemu.sh -r /
 +
 +       PPC with a mac99 model by passing options to QEMU:
 +       $ ARCH=ppc ./scripts/run-qemu.sh -r / -- -M mac99
 +
 +}
 +
 +function require_config() {
 +       if [ $(grep CONFIG_$1=y .config) ]; then
 +               return
 +       fi
 +
 +       echo You need to enable CONFIG_$1 for run-qemu to work properly
 +       exit 1
 +}
 +
 +function has_config() {
 +       grep -q CONFIG_$1=y .config
 +}
 +
 +function drive_if() {
 +       if has_config VIRTIO_BLK; then
 +               echo virtio
 +       elif has_config ATA_PIIX; then
 +               echo ide
 +       else
 +               echo \
 +Your kernel must have either VIRTIO_BLK or ATA_PIIX
 +enabled for block device assignment 2
 +               exit 1
 +       fi
 +}
 +
 +GETOPT=`getopt -o a:d:Dhr:sS:v --long 
 append,disk:,no-gdb,help,root:,sdl,smp:,vnc \
 +       -n $(basename \$0\) -- $@`
 +
 +if [ $? != 0 ]; then
 +       echo Terminating... 2
 +       exit 1
 +fi
 +
 +eval set -- $GETOPT
 +
 +while true; do
 +       case $1 in
 +       -a|--append)
 +               KERNEL_APPEND2=$KERNEL_APPEND2 $KERNEL_APPEND2
 +               shift
 +               ;;
 +       -d|--disk)
 +

Re: [Qemu-devel] [PATCH] KVM: Add wrapper script around Qemu to test kernels

2011-08-24 Thread Blue Swirl

On Tue, Aug 23, 2011 at 10:16 PM, Alexander Graf ag...@suse.de wrote:
 On LinuxCon I had a nice chat with Linus on what he thinks kvm-tool
 would be doing and what he expects from it. Basically he wants a
 small and simple tool he and other developers can run to try out and
 see if the kernel they just built actually works.

 Fortunately, Qemu can do that today already! The only piece that was
 missing was the simple piece of the equation, so here is a script
 that wraps around Qemu and executes a kernel you just built.

 If you do have KVM around and are not cross-compiling, it will use
 KVM. But if you don't, you can still fall back to emulation mode and
 at least check if your kernel still does what you expect. I only
 implemented support for s390x and ppc there, but it's easily extensible
 to more platforms, as Qemu can emulate (and virtualize) pretty much
 any platform out there.

 If you don't have qemu installed, please do so before using this script. Your
 distro should provide a package for it (might even call it kvm). If not,
 just compile it from source - it's not hard!

 To quickly get going, just execute the following as user:

    $ ./Documentation/run-qemu.sh -r / -a init=/bin/bash

 This will drop you into a shell on your rootfs.

 Happy hacking!

 Signed-off-by: Alexander Graf ag...@suse.de
 ---
  Documentation/run-qemu.sh |  284 
 +
  1 files changed, 284 insertions(+), 0 deletions(-)
  create mode 100755 Documentation/run-qemu.sh

 diff --git a/Documentation/run-qemu.sh b/Documentation/run-qemu.sh
 new file mode 100755
 index 000..0bac924
 --- /dev/null
 +++ b/Documentation/run-qemu.sh
 @@ -0,0 +1,284 @@
 +#!/bin/bash
 +#
 +# QEMU Launcher
 +#
 +# This script enables simple use of the KVM and Qemu tool stack for

QEMU

 +# easy kernel testing. It allows to pass either a host directory to
 +# the guest or a disk image. Example usage:
 +#
 +# Run the host root fs inside a VM:
 +#
 +# $ ./Documentation/run-qemu.sh -r /
 +#
 +# Run the same with SDL:
 +#
 +# $ ./Documentation/run-qemu.sh -r / --sdl
 +#
 +# Or with a PPC build:
 +#
 +# $ ARCH=ppc ./Documentation/run-qemu.sh -r /
 +#
 +#
 +
 +USE_SDL=
 +USE_VNC=
 +KERNEL_BIN=arch/x86/boot/bzImage
 +MON_STDIO=
 +KERNEL_APPEND2=
 +SERIAL=ttyS0
 +SERIAL_KCONFIG=SERIAL_8250
 +
 +function usage() {
 +       echo 
 +Run-Qemu allows you to execute a virtual machine with the Linux kernel

run-qemu.sh or $0

 +that you just built. To only execute a simple VM, you can just run it
 +on your root fs with \-r / -a init=/bin/bash\
 +
 +       -a, --append parameters
 +               Append the given parameters to the kernel command line
 +
 +       -d, --disk image
 +               Add the image file as disk into the VM
 +
 +       -r, --root directory
 +               Use the specified directory as root directory inside the 
 guest.
 +
 +       -s, --sdl
 +               Enable SDL graphical output.
 +
 +       -S, --smp cpus
 +               Set number of virtual CPUs
 +
 +       -v, --vnc
 +               Enable VNC graphical output.
 +
 +Examples:
 +
 +       Run the host root fs inside a VM:
 +       $ ./Documentation/run-qemu.sh -r /
 +
 +       Run the same with SDL:
 +       $ ./Documentation/run-qemu.sh -r / --sdl
 +
 +       Or with a PPC build:
 +       $ ARCH=ppc ./Documentation/run-qemu.sh -r /
 +
 +}
 +
 +function require_config() {
 +       if [ $(grep CONFIG_$1=y .config) ]; then
 +               return
 +       fi
 +
 +       echo You need to enable CONFIG_$1 for run-qemu to work properly
 +       exit 1
 +}
 +
 +function has_config() {
 +       grep CONFIG_$1=y .config
 +}
 +
 +function drive_if() {
 +       if [ $(has_config VIRTIO_BLK) ]; then
 +               echo virtio
 +       elif [ $(has_config ATA_PIIX) ]; then
 +               echo ide
 +       else
 +               echo \
 +Your kernel must have either VIRTIO_BLK or ATA_PIIX
 +enabled for block device assignment 2
 +               exit 1
 +       fi
 +}
 +
 +GETOPT=`getopt -o a:d:hr:sS:v --long append,disk:,help,root:,sdl,smp:,vnc \
 +       -n $(basename \$0\) -- $@`
 +
 +if [ $? != 0 ]; then
 +       echo Terminating... 2
 +       exit 1
 +fi
 +
 +eval set -- $GETOPT
 +
 +while true; do
 +       case $1 in
 +       -a|--append)
 +               KERNEL_APPEND2=$2
 +               shift 2
 +               ;;
 +       -d|--disk)
 +               QEMU_OPTIONS=$QEMU_OPTIONS -drive \
 +                       file=$2,if=$(drive_if),cache=unsafe
 +               USE_DISK=1
 +               shift 2
 +               ;;
 +       -h|--help)
 +               usage
 +               exit 0
 +               ;;
 +       -r|--root)
 +               ROOTFS=$2
 +               shift 2
 +               ;;
 +       -s|--sdl)
 +               USE_SDL=1
 +               shift
 +               ;;
 +       -S|--smp)
 +               SMP=$2
 +               shift 2
 +               ;;
 +       -v|--vnc)
 +               USE_VNC=1
 +               shift
 +

Re: [Qemu-devel] [PATCH] Introduce QEMU_NEW()

2011-07-25 Thread Blue Swirl

On Mon, Jul 25, 2011 at 1:09 PM, Avi Kivity a...@redhat.com wrote:
 On 07/25/2011 01:04 PM, Alexander Graf wrote:

 On 25.07.2011, at 12:02, Avi Kivity wrote:

   On 07/25/2011 12:56 PM, Alexander Graf wrote:
   
      That argument can be used to block any change.  You'll get used to
  it in time.  The question is, is the new interface better or not.
 
   I agree that it keeps you from accidently malloc'ing a struct of
  pointer size. But couldn't we also just add this to checkpatch.pl?
 
   Better APIs trump better patch review.

 Only if you enforce them. The only sensible thing for QEMU_NEW (despite
 the general rule of upper case macros, I'd actually prefer this one to be
 lower case though since it's so often used) would be to remove qemu_malloc,
 declare malloc() as unusable and convert all users of qemu_malloc() to
 qemu_new().

 Some qemu_mallocs() will remain (allocating a byte array or something
 variable sized).

 I agree qemu_new() will be nicer, but that will have to wait until Blue is
 several light-days away from Earth.

There is no escape. Don't make me destroy you. You cannot hide forever, Luke.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH] Introduce QEMU_NEW()

2011-07-25 Thread Blue Swirl

On Mon, Jul 25, 2011 at 3:21 PM, Anthony Liguori anth...@codemonkey.ws wrote:
 On 07/25/2011 07:18 AM, Avi Kivity wrote:

 On 07/25/2011 03:11 PM, Anthony Liguori wrote:

 On 07/25/2011 03:51 AM, Avi Kivity wrote:

 qemu_malloc() is type-unsafe as it returns a void pointer. Introduce
 QEMU_NEW() (and QEMU_NEWZ()), which return the correct type.

 Just use g_new() and g_new0()


 These bypass qemu_malloc(). Are we okay with that?

 Yes.  We can just make qemu_malloc use g_malloc.

It would be also possible to make g_malloc() use qemu_malloc(). That
way we could keep the tracepoints which would lose their value with
g_malloc() otherwise.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH] Introduce QEMU_NEW()

2011-07-25 Thread Blue Swirl

On Mon, Jul 25, 2011 at 5:51 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 On 07/25/2011 04:23 PM, Blue Swirl wrote:

   Yes.  We can just make qemu_malloc use g_malloc.

 It would be also possible to make g_malloc() use qemu_malloc(). That
 way we could keep the tracepoints which would lose their value with
 g_malloc() otherwise.

 qemu_malloc uses g_malloc = you keep tracepoints, you just do not trace
 memory allocated by glib

Unless the plan is to replace all qemu_malloc() calls with calls to g_malloc().

 g_malloc uses qemu_malloc = you keep and expand tracepoints, you lose the
 very nicely tuned allocator

It is replaced by libc malloc() which shouldn't be so bad either.

 The former is much less code, however it requires qemu_malloc to be always
 balanced with qemu_free (patches ready and on my github tree, won't be sent
 before KVM Forum though...).

Freeing qemu_malloc() memory with plain free() is a bug.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

1 2 3 >

1 - 100 of 261 matches

Mail list logo