date:20111017

[Qemu-devel] [PATCH] cloop.c: use gfree,instead of free

2011-10-17 Thread Dong Xu Wang

Use gfree, to pair with g_malloc. Also fix coding style.

Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com
---
 block/cloop.c |  114 +++--
 1 files changed, 62 insertions(+), 52 deletions(-)

diff --git a/block/cloop.c b/block/cloop.c
index 8cff9f2..708093e 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -39,21 +39,23 @@ typedef struct BDRVCloopState {
 
 static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
-const char* magic_version_2_0=#!/bin/sh\n
-   #V2.0 Format\n
-   modprobe cloop file=$0  mount -r -t iso9660 /dev/cloop $1\n;
-int length=strlen(magic_version_2_0);
-if(lengthbuf_size)
-   length=buf_size;
-if(!memcmp(magic_version_2_0,buf,length))
-   return 2;
+const char *magic_version_2_0 = #!/bin/sh\n
+#V2.0 Format\n
+modprobe cloop file=$0  mount -r -t iso9660 /dev/cloop $1\n;
+int length = strlen(magic_version_2_0);
+if (length  buf_size) {
+length = buf_size;
+}
+if (!memcmp(magic_version_2_0, buf, length)) {
+return 2;
+}
 return 0;
 }
 
 static int cloop_open(BlockDriverState *bs, int flags)
 {
 BDRVCloopState *s = bs-opaque;
-uint32_t offsets_size,max_compressed_block_size=1,i;
+uint32_t offsets_size, max_compressed_block_size = 1, i;
 
 bs-read_only = 1;
 
@@ -73,26 +75,28 @@ static int cloop_open(BlockDriverState *bs, int flags)
 s-offsets = g_malloc(offsets_size);
 if (bdrv_pread(bs-file, 128 + 4 + 4, s-offsets, offsets_size) 
 offsets_size) {
-   goto cloop_close;
+goto cloop_close;
 }
 for(i=0;is-n_blocks;i++) {
-   s-offsets[i]=be64_to_cpu(s-offsets[i]);
-   if(i0) {
-   uint32_t size=s-offsets[i]-s-offsets[i-1];
-   if(sizemax_compressed_block_size)
-   max_compressed_block_size=size;
-   }
+s-offsets[i] = be64_to_cpu(s-offsets[i]);
+if (i  0) {
+uint32_t size = s-offsets[i]-s-offsets[i - 1];
+if (size  max_compressed_block_size) {
+max_compressed_block_size = size;
+}
+}
 }
 
 /* initialize zlib engine */
-s-compressed_block = g_malloc(max_compressed_block_size+1);
+s-compressed_block = g_malloc(max_compressed_block_size + 1);
 s-uncompressed_block = g_malloc(s-block_size);
-if(inflateInit(s-zstream) != Z_OK)
-   goto cloop_close;
-s-current_block=s-n_blocks;
+if (inflateInit(s-zstream) != Z_OK) {
+goto cloop_close;
+}
+s-current_block = s-n_blocks;
 
 s-sectors_per_block = s-block_size/512;
-bs-total_sectors = s-n_blocks*s-sectors_per_block;
+bs-total_sectors = s-n_blocks * s-sectors_per_block;
 return 0;
 
 cloop_close:
@@ -104,26 +108,29 @@ static inline int cloop_read_block(BlockDriverState *bs, 
int block_num)
 BDRVCloopState *s = bs-opaque;
 
 if(s-current_block != block_num) {
-   int ret;
-uint32_t bytes = s-offsets[block_num+1]-s-offsets[block_num];
+int ret;
+uint32_t bytes = s-offsets[block_num+1] - s-offsets[block_num];
 
 ret = bdrv_pread(bs-file, s-offsets[block_num], s-compressed_block,
  bytes);
-if (ret != bytes)
+if (ret != bytes) {
 return -1;
+}
+
+s-zstream.next_in = s-compressed_block;
+s-zstream.avail_in = bytes;
+s-zstream.next_out = s-uncompressed_block;
+s-zstream.avail_out = s-block_size;
+ret = inflateReset(s-zstream);
+if (ret != Z_OK) {
+return -1;
+}
+ret = inflate(s-zstream, Z_FINISH);
+if (ret != Z_STREAM_END || s-zstream.total_out != s-block_size) {
+return -1;
+}
 
-   s-zstream.next_in = s-compressed_block;
-   s-zstream.avail_in = bytes;
-   s-zstream.next_out = s-uncompressed_block;
-   s-zstream.avail_out = s-block_size;
-   ret = inflateReset(s-zstream);
-   if(ret != Z_OK)
-   return -1;
-   ret = inflate(s-zstream, Z_FINISH);
-   if(ret != Z_STREAM_END || s-zstream.total_out != s-block_size)
-   return -1;
-
-   s-current_block = block_num;
+s-current_block = block_num;
 }
 return 0;
 }
@@ -134,12 +141,14 @@ static int cloop_read(BlockDriverState *bs, int64_t 
sector_num,
 BDRVCloopState *s = bs-opaque;
 int i;
 
-for(i=0;inb_sectors;i++) {
-   uint32_t sector_offset_in_block=((sector_num+i)%s-sectors_per_block),
-   block_num=(sector_num+i)/s-sectors_per_block;
-   if(cloop_read_block(bs, block_num) != 0)
-   return -1;
-   memcpy(buf+i*512,s-uncompressed_block+sector_offset_in_block*512,512);
+for (i = 0; i  nb_sectors; i++) {
+uint32_t sector_offset_in_block = ((sector_num + i) % 
s-sectors_per_block),
+block_num = (sector_num + i) / s-sectors_per_block;
+if (cloop_read_block(bs, block_num)

Re: [Qemu-devel] [RFC128 3/2] Adjust system and pci address spaces to full 64-bit

2011-10-17 Thread David Gibson

On Sun, Oct 16, 2011 at 05:29:07PM +0200, Avi Kivity wrote:
 Now that the memory API supports full 64-bit buses, adjust the relevant
 callers to take advantage of it.

Note that this doesn't, strictly speaking doesn't give you full 64-bit
coverage, since the range covered is 2^64-1 bytes rather than 2^64
bytes.  Cases where that will matter would be very rare, of course.


 Signed-off-by: Avi Kivity a...@redhat.com
 ---
 
 Note needs slight adjustment to patch 2 to make 'info mtree' work.
 
  exec.c  |2 +-
  hw/pc_piix.c|2 +-
  hw/pci_bridge.c |2 +-
  3 files changed, 3 insertions(+), 3 deletions(-)
 
 diff --git a/exec.c b/exec.c
 index d0cbf15..16e37a7 100644
 --- a/exec.c
 +++ b/exec.c
 @@ -3825,7 +3825,7 @@ static void io_mem_init(void)
  static void memory_map_init(void)
  {
  system_memory = g_malloc(sizeof(*system_memory));
 -memory_region_init(system_memory, system, INT64_MAX);
 +memory_region_init(system_memory, system, UINT64_MAX);
  set_system_memory_map(system_memory);
  
  system_io = g_malloc(sizeof(*system_io));
 diff --git a/hw/pc_piix.c b/hw/pc_piix.c
 index ce1c87f..45540e5 100644
 --- a/hw/pc_piix.c
 +++ b/hw/pc_piix.c
 @@ -115,7 +115,7 @@ static void pc_init1(MemoryRegion *system_memory,
  
  if (pci_enabled) {
  pci_memory = g_new(MemoryRegion, 1);
 -memory_region_init(pci_memory, pci, INT64_MAX);
 +memory_region_init(pci_memory, pci, UINT64_MAX);
  rom_memory = pci_memory;
  } else {
  pci_memory = NULL;
 diff --git a/hw/pci_bridge.c b/hw/pci_bridge.c
 index b6287cd..3b786aa 100644
 --- a/hw/pci_bridge.c
 +++ b/hw/pci_bridge.c
 @@ -319,7 +319,7 @@ int pci_bridge_initfn(PCIDevice *dev)
  sec_bus-parent_dev = dev;
  sec_bus-map_irq = br-map_irq;
  sec_bus-address_space_mem = br-address_space_mem;
 -memory_region_init(br-address_space_mem, pci_pridge_pci, INT64_MAX);
 +memory_region_init(br-address_space_mem, pci_pridge_pci, UINT64_MAX);
  sec_bus-address_space_io = br-address_space_io;
  memory_region_init(br-address_space_io, pci_bridge_io, 65536);
  pci_bridge_region_init(br);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH] cloop.c: use gfree,instead of free

2011-10-17 Thread Ray Wang


On 10/17/2011 02:11 PM, Dong Xu Wang wrote:

Use gfree, to pair with g_malloc. Also fix coding style.

  Should it be g_free, instead of gfree.

Signed-off-by: Dong Xu Wangwdon...@linux.vnet.ibm.com
---
  block/cloop.c |  114 +++--
  1 files changed, 62 insertions(+), 52 deletions(-)

diff --git a/block/cloop.c b/block/cloop.c
index 8cff9f2..708093e 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -39,21 +39,23 @@ typedef struct BDRVCloopState {

  static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
  {
-const char* magic_version_2_0=#!/bin/sh\n
-   #V2.0 Format\n
-   modprobe cloop file=$0  mount -r -t iso9660 /dev/cloop $1\n;
-int length=strlen(magic_version_2_0);
-if(lengthbuf_size)
-   length=buf_size;
-if(!memcmp(magic_version_2_0,buf,length))
-   return 2;
+const char *magic_version_2_0 = #!/bin/sh\n
+#V2.0 Format\n
+modprobe cloop file=$0  mount -r -t iso9660 /dev/cloop $1\n;
+int length = strlen(magic_version_2_0);
+if (length  buf_size) {
+length = buf_size;
+}
+if (!memcmp(magic_version_2_0, buf, length)) {
+return 2;
+}
  return 0;
  }

  static int cloop_open(BlockDriverState *bs, int flags)
  {
  BDRVCloopState *s = bs-opaque;
-uint32_t offsets_size,max_compressed_block_size=1,i;
+uint32_t offsets_size, max_compressed_block_size = 1, i;

  bs-read_only = 1;

@@ -73,26 +75,28 @@ static int cloop_open(BlockDriverState *bs, int flags)
  s-offsets = g_malloc(offsets_size);
  if (bdrv_pread(bs-file, 128 + 4 + 4, s-offsets, offsets_size)
  offsets_size) {
-   goto cloop_close;
+goto cloop_close;
  }
  for(i=0;is-n_blocks;i++) {
-   s-offsets[i]=be64_to_cpu(s-offsets[i]);
-   if(i0) {
-   uint32_t size=s-offsets[i]-s-offsets[i-1];
-   if(sizemax_compressed_block_size)
-   max_compressed_block_size=size;
-   }
+s-offsets[i] = be64_to_cpu(s-offsets[i]);
+if (i  0) {
+uint32_t size = s-offsets[i]-s-offsets[i - 1];
+if (size  max_compressed_block_size) {
+max_compressed_block_size = size;
+}
+}
  }

  /* initialize zlib engine */
-s-compressed_block = g_malloc(max_compressed_block_size+1);
+s-compressed_block = g_malloc(max_compressed_block_size + 1);
  s-uncompressed_block = g_malloc(s-block_size);
-if(inflateInit(s-zstream) != Z_OK)
-   goto cloop_close;
-s-current_block=s-n_blocks;
+if (inflateInit(s-zstream) != Z_OK) {
+goto cloop_close;
+}
+s-current_block = s-n_blocks;

  s-sectors_per_block = s-block_size/512;
-bs-total_sectors = s-n_blocks*s-sectors_per_block;
+bs-total_sectors = s-n_blocks * s-sectors_per_block;
  return 0;

  cloop_close:
@@ -104,26 +108,29 @@ static inline int cloop_read_block(BlockDriverState *bs, 
int block_num)
  BDRVCloopState *s = bs-opaque;

  if(s-current_block != block_num) {
-   int ret;
-uint32_t bytes = s-offsets[block_num+1]-s-offsets[block_num];
+int ret;
+uint32_t bytes = s-offsets[block_num+1] - s-offsets[block_num];

  ret = bdrv_pread(bs-file, s-offsets[block_num], s-compressed_block,
   bytes);
-if (ret != bytes)
+if (ret != bytes) {
  return -1;
+}
+
+s-zstream.next_in = s-compressed_block;
+s-zstream.avail_in = bytes;
+s-zstream.next_out = s-uncompressed_block;
+s-zstream.avail_out = s-block_size;
+ret = inflateReset(s-zstream);
+if (ret != Z_OK) {
+return -1;
+}
+ret = inflate(s-zstream, Z_FINISH);
+if (ret != Z_STREAM_END || s-zstream.total_out != s-block_size) {
+return -1;
+}

-   s-zstream.next_in = s-compressed_block;
-   s-zstream.avail_in = bytes;
-   s-zstream.next_out = s-uncompressed_block;
-   s-zstream.avail_out = s-block_size;
-   ret = inflateReset(s-zstream);
-   if(ret != Z_OK)
-   return -1;
-   ret = inflate(s-zstream, Z_FINISH);
-   if(ret != Z_STREAM_END || s-zstream.total_out != s-block_size)
-   return -1;
-
-   s-current_block = block_num;
+s-current_block = block_num;
  }
  return 0;
  }
@@ -134,12 +141,14 @@ static int cloop_read(BlockDriverState *bs, int64_t 
sector_num,
  BDRVCloopState *s = bs-opaque;
  int i;

-for(i=0;inb_sectors;i++) {
-   uint32_t sector_offset_in_block=((sector_num+i)%s-sectors_per_block),
-   block_num=(sector_num+i)/s-sectors_per_block;
-   if(cloop_read_block(bs, block_num) != 0)
-   return -1;
-   memcpy(buf+i*512,s-uncompressed_block+sector_offset_in_block*512,512);
+for (i = 0; i  nb_sectors; i++) {
+uint32_t sector_offset_in_block = ((sector_num + i) % 
s-sectors_per_block),
+

[Qemu-devel] buildbot failure in qemu on s390-next_i386_debian_6_0

2011-10-17 Thread qemu

The Buildbot has detected a new failure on builder s390-next_i386_debian_6_0 
while building qemu.
Full details are available at:
 http://buildbot.b1-systems.de/qemu/builders/s390-next_i386_debian_6_0/builds/64

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: yuzuki

Build Reason: The Nightly scheduler named 'nightly_s390-next' triggered this 
build
Build Source Stamp: [branch s390-next] HEAD
Blamelist: 

BUILD FAILED: failed git

sincerely,
 -The Buildbot

Re: [Qemu-devel] [PATCH] target_sparc: Fix use of free() instead of g_free()

2011-10-17 Thread Ray Wang


On 10/17/2011 02:10 AM, Stefan Weil wrote:

This error was reported by cppcheck.

Signed-off-by: Stefan Weils...@weilnetz.de
---
  target-sparc/helper.c |4 ++--
  1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index c80531a..ca9bf6b 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -1200,8 +1200,8 @@ static int cpu_sparc_register(CPUSPARCState *env, const 
char *cpu_model)

  static void cpu_sparc_close(CPUSPARCState *env)
  {
-free(env-def);
-free(env);
+g_free(env-def);
+g_free(env);
  }

  CPUSPARCState *cpu_sparc_init(const char *cpu_model)

Reviewed-by: Ray Wangrayw...@linux.vnet.ibm.com

--
Regards,

Ray Wang

Re: [Qemu-devel] [PATCH v3 1/4] vga: make PCI devices optional

2011-10-17 Thread Jan Kiszka

On 2011-10-16 23:21, Blue Swirl wrote:
 Improve VGA selection logic, push check for device availabilty to vl.c.
 Make PCI VGA devices optional.
 
 Signed-off-by: Blue Swirl blauwir...@gmail.com
 ---
  hw/cirrus_vga.c |5 -
  hw/pc.c |6 +-
  hw/pc.h |   33 +++--
  hw/pci.c|   18 ++
  hw/pci.h|4 
  hw/qdev.c   |5 +
  hw/qdev.h   |1 +
  hw/vga-pci.c|6 --
  vl.c|   33 +++--
  9 files changed, 83 insertions(+), 28 deletions(-)
 
 diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
 index c7e365b..a11444c 100644
 --- a/hw/cirrus_vga.c
 +++ b/hw/cirrus_vga.c
 @@ -2955,11 +2955,6 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev)
   return 0;
  }
 
 -void pci_cirrus_vga_init(PCIBus *bus)
 -{
 -pci_create_simple(bus, -1, cirrus-vga);
 -}
 -
  static PCIDeviceInfo cirrus_vga_info = {
  .qdev.name= cirrus-vga,
  .qdev.desc= Cirrus CLGD 54xx VGA,
 diff --git a/hw/pc.c b/hw/pc.c
 index f0802b7..057eb9c 100644
 --- a/hw/pc.c
 +++ b/hw/pc.c
 @@ -1080,11 +1080,7 @@ void pc_vga_init(PCIBus *pci_bus)
  }
  } else if (vmsvga_enabled) {
  if (pci_bus) {
 -if (!pci_vmsvga_init(pci_bus)) {
 -fprintf(stderr, Warning: vmware_vga not available,
 - using standard VGA instead\n);
 -pci_vga_init(pci_bus);
 -}
 +pci_vmsvga_init(pci_bus);
  } else {
  fprintf(stderr, %s: vmware_vga: no PCI bus\n, __FUNCTION__);
  }
 diff --git a/hw/pc.h b/hw/pc.h
 index b8ad9a3..6c951e8 100644
 --- a/hw/pc.h
 +++ b/hw/pc.h
 @@ -9,6 +9,7 @@
  #include net.h
  #include memory.h
  #include ioapic.h
 +#include pci.h
 
  /* PC-style peripherals (also used by other machines).  */
 
 @@ -203,26 +204,46 @@ enum vga_retrace_method {
 
  extern enum vga_retrace_method vga_retrace_method;
 
 -static inline int isa_vga_init(void)
 +static inline bool isa_vga_init(void)
  {
  ISADevice *dev;
 
  dev = isa_try_create(isa-vga);
  if (!dev) {
 -fprintf(stderr, Warning: isa-vga not available\n);
 -return 0;
 +return false;
  }
  qdev_init_nofail(dev-qdev);
 -return 1;
 +return true;
 +}
 +
 +/* vga-pci.c */
 +static inline bool pci_vga_init(PCIBus *bus)
 +{
 +PCIDevice *dev;
 +
 +dev = pci_try_create_simple(bus, -1, VGA);
 +if (!dev) {
 +return false;
 +}
 +return true;
  }
 
 -int pci_vga_init(PCIBus *bus);
  int isa_vga_mm_init(target_phys_addr_t vram_base,
  target_phys_addr_t ctrl_base, int it_shift,
  MemoryRegion *address_space);
 
  /* cirrus_vga.c */
 -void pci_cirrus_vga_init(PCIBus *bus);
 +static inline bool pci_cirrus_vga_init(PCIBus *bus)
 +{
 +PCIDevice *dev;
 +
 +dev = pci_try_create_simple(bus, -1, cirrus-vga);
 +if (!dev) {
 +return false;
 +}
 +return true;
 +}
 +
  void isa_cirrus_vga_init(MemoryRegion *address_space);
 
  /* ne2000.c */
 diff --git a/hw/pci.c b/hw/pci.c
 index 749e8d8..46c01ac 100644
 --- a/hw/pci.c
 +++ b/hw/pci.c
 @@ -1687,6 +1687,19 @@ PCIDevice
 *pci_create_simple_multifunction(PCIBus *bus, int devfn,
  return dev;
  }
 
 +PCIDevice *pci_try_create_simple_multifunction(PCIBus *bus, int devfn,
 +   bool multifunction,
 +   const char *name)
 +{
 +PCIDevice *dev = pci_try_create_multifunction(bus, devfn, multifunction,
 +  name);
 +if (!dev) {
 +return NULL;
 +}
 +qdev_init_nofail(dev-qdev);
 +return dev;
 +}
 +
  PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
  {
  return pci_create_multifunction(bus, devfn, false, name);
 @@ -1702,6 +1715,11 @@ PCIDevice *pci_try_create(PCIBus *bus, int
 devfn, const char *name)
  return pci_try_create_multifunction(bus, devfn, false, name);
  }
 
 +PCIDevice *pci_try_create_simple(PCIBus *bus, int devfn, const char *name)
 +{
 +return pci_try_create_simple_multifunction(bus, devfn, false, name);
 +}
 +
  static int pci_find_space(PCIDevice *pdev, uint8_t size)
  {
  int config_size = pci_config_size(pdev);
 diff --git a/hw/pci.h b/hw/pci.h
 index 86a81c8..aa2e040 100644
 --- a/hw/pci.h
 +++ b/hw/pci.h
 @@ -473,9 +473,13 @@ PCIDevice *pci_create_simple_multifunction(PCIBus
 *bus, int devfn,
  PCIDevice *pci_try_create_multifunction(PCIBus *bus, int devfn,
  bool multifunction,
  const char *name);
 +PCIDevice *pci_try_create_simple_multifunction(PCIBus *bus, int devfn,
 +   bool multifunction,
 +   const char *name);
  PCIDevice *pci_create(PCIBus *bus, int

[Qemu-devel] buildbot failure in qemu on monitor_i386_debian_6_0

2011-10-17 Thread qemu

The Buildbot has detected a new failure on builder monitor_i386_debian_6_0 
while building qemu.
Full details are available at:
 http://buildbot.b1-systems.de/qemu/builders/monitor_i386_debian_6_0/builds/63

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: yuzuki

Build Reason: The Nightly scheduler named 'nightly_monitor' triggered this build
Build Source Stamp: [branch queue/monitor] HEAD
Blamelist: 

BUILD FAILED: failed git

sincerely,
 -The Buildbot

[Qemu-devel] [PATCH v2 1/2] spice: turn client_migrate_info to async

2011-10-17 Thread Yonit Halperin

RHBZ 737921
Spice client is required to connect to the migration target before/as migration
starts. Since after migration starts, the target qemu is blocked and cannot 
accept new spice client
we trigger the connection to the target upon client_migrate_info command.
client_migrate_info completion cb will be called after spice client has been
connected to the target (or a timeout). See following patches and spice patches.

Signed-off-by: Yonit Halperin yhalp...@redhat.com
---
 hmp-commands.hx |3 ++-
 monitor.c   |6 --
 qmp-commands.hx |3 ++-
 ui/qemu-spice.h |   14 +++---
 ui/spice-core.c |   10 +++---
 5 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 9e1cca8..6f390a0 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -827,7 +827,8 @@ ETEXI
 .params = protocol hostname port tls-port cert-subject,
 .help   = send migration info to spice/vnc client,
 .user_print = monitor_user_noop,
-.mhandler.cmd_new = client_migrate_info,
+.mhandler.cmd_async = client_migrate_info,
+.flags  = MONITOR_CMD_ASYNC,
 },
 
 STEXI
diff --git a/monitor.c b/monitor.c
index df0f622..0374dcc 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1221,7 +1221,8 @@ static int add_graphics_client(Monitor *mon, const QDict 
*qdict, QObject **ret_d
 return -1;
 }
 
-static int client_migrate_info(Monitor *mon, const QDict *qdict, QObject 
**ret_data)
+static int client_migrate_info(Monitor *mon, const QDict *qdict,
+   MonitorCompletion cb, void *opaque)
 {
 const char *protocol = qdict_get_str(qdict, protocol);
 const char *hostname = qdict_get_str(qdict, hostname);
@@ -1236,7 +1237,8 @@ static int client_migrate_info(Monitor *mon, const QDict 
*qdict, QObject **ret_d
 return -1;
 }
 
-ret = qemu_spice_migrate_info(hostname, port, tls_port, subject);
+ret = qemu_spice_migrate_info(hostname, port, tls_port, subject,
+  cb, opaque);
 if (ret != 0) {
 qerror_report(QERR_UNDEFINED_ERROR);
 return -1;
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 27cc66e..321fb10 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -578,7 +578,8 @@ EQMP
 .params = protocol hostname port tls-port cert-subject,
 .help   = send migration info to spice/vnc client,
 .user_print = monitor_user_noop,
-.mhandler.cmd_new = client_migrate_info,
+.mhandler.cmd_async = client_migrate_info,
+.flags  = MONITOR_CMD_ASYNC,
 },
 
 SQMP
diff --git a/ui/qemu-spice.h b/ui/qemu-spice.h
index f34be69..c35b29c 100644
--- a/ui/qemu-spice.h
+++ b/ui/qemu-spice.h
@@ -25,6 +25,7 @@
 #include qemu-option.h
 #include qemu-config.h
 #include qemu-char.h
+#include monitor.h
 
 extern int using_spice;
 
@@ -37,7 +38,8 @@ int qemu_spice_set_passwd(const char *passwd,
   bool fail_if_connected, bool 
disconnect_if_connected);
 int qemu_spice_set_pw_expire(time_t expires);
 int qemu_spice_migrate_info(const char *hostname, int port, int tls_port,
-const char *subject);
+const char *subject,
+MonitorCompletion cb, void *opaque);
 
 void do_info_spice_print(Monitor *mon, const QObject *data);
 void do_info_spice(Monitor *mon, QObject **ret_data);
@@ -45,6 +47,7 @@ void do_info_spice(Monitor *mon, QObject **ret_data);
 int qemu_chr_open_spice(QemuOpts *opts, CharDriverState **_chr);
 
 #else  /* CONFIG_SPICE */
+#include monitor.h
 
 #define using_spice 0
 static inline int qemu_spice_set_passwd(const char *passwd,
@@ -57,8 +60,13 @@ static inline int qemu_spice_set_pw_expire(time_t expires)
 {
 return -1;
 }
-static inline int qemu_spice_migrate_info(const char *h, int p, int t, const 
char *s)
-{ return -1; }
+static inline int qemu_spice_migrate_info(const char *h, int p, int t,
+  const char *s,
+  MonitorCompletion cb, void *opaque)
+{
+cb(opaque, NULL);
+return -1;
+}
 
 #endif /* CONFIG_SPICE */
 
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 3cbc721..50c0d7d 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -457,10 +457,14 @@ static void migration_state_notifier(Notifier *notifier, 
void *data)
 }
 
 int qemu_spice_migrate_info(const char *hostname, int port, int tls_port,
-const char *subject)
+const char *subject,
+MonitorCompletion *cb, void *opaque)
 {
-return spice_server_migrate_info(spice_server, hostname,
- port, tls_port, subject);
+int ret;
+ret = spice_server_migrate_info(spice_server, hostname,
+port, tls_port, subject);
+cb(opaque, NULL);
+

[Qemu-devel] [PATCH v2 2/2] spice: support the new migration interface (spice 0.8.3)

2011-10-17 Thread Yonit Halperin

- call spice_server_migrate_(start|end|connect).
- register spice_migrate_connect completion callback

Signed-off-by: Yonit Halperin yhalp...@redhat.com
---
 ui/spice-core.c |   56 ++-
 1 files changed, 55 insertions(+), 1 deletions(-)

diff --git a/ui/spice-core.c b/ui/spice-core.c
index 50c0d7d..457cf61 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -288,6 +288,38 @@ static SpiceCoreInterface core_interface = {
 #endif
 };
 
+#ifdef SPICE_INTERFACE_MIGRATION
+typedef struct SpiceMigration {
+SpiceMigrateInstance sin;
+struct {
+MonitorCompletion *cb;
+void *opaque;
+} connect_complete;
+} SpiceMigration;
+
+static void migrate_connect_complete_cb(SpiceMigrateInstance *sin);
+
+static const SpiceMigrateInterface migrate_interface = {
+.base.type = SPICE_INTERFACE_MIGRATION,
+.base.description = migration,
+.base.major_version = SPICE_INTERFACE_MIGRATION_MAJOR,
+.base.minor_version = SPICE_INTERFACE_MIGRATION_MINOR,
+.migrate_connect_complete = migrate_connect_complete_cb,
+.migrate_end_complete = NULL,
+};
+
+static SpiceMigration spice_migrate;
+
+static void migrate_connect_complete_cb(SpiceMigrateInstance *sin)
+{
+SpiceMigration *sm = container_of(sin, SpiceMigration, sin);
+if (sm-connect_complete.cb) {
+sm-connect_complete.cb(sm-connect_complete.opaque, NULL);
+}
+sm-connect_complete.cb = NULL;
+}
+#endif
+
 /* config string parsing */
 
 static int name2enum(const char *string, const char *table[], int entries)
@@ -449,9 +481,19 @@ static void migration_state_notifier(Notifier *notifier, 
void *data)
 {
 int state = get_migration_state();
 
-if (state == MIG_STATE_COMPLETED) {
+if (state == MIG_STATE_ACTIVE) {
+#ifdef SPICE_INTERFACE_MIGRATION
+spice_server_migrate_start(spice_server);
+#endif
+} else if (state == MIG_STATE_COMPLETED) {
 #if SPICE_SERVER_VERSION = 0x000701 /* 0.7.1 */
+#ifndef SPICE_INTERFACE_MIGRATION
 spice_server_migrate_switch(spice_server);
+#else
+spice_server_migrate_end(spice_server, true);
+} else if (state == MIG_STATE_CANCELLED || state == MIG_STATE_ERROR) {
+spice_server_migrate_end(spice_server, false);
+#endif
 #endif
 }
 }
@@ -461,9 +503,16 @@ int qemu_spice_migrate_info(const char *hostname, int 
port, int tls_port,
 MonitorCompletion *cb, void *opaque)
 {
 int ret;
+#ifdef SPICE_INTERFACE_MIGRATION
+spice_migrate.connect_complete.cb = cb;
+spice_migrate.connect_complete.opaque = opaque;
+ret = spice_server_migrate_connect(spice_server, hostname,
+   port, tls_port, subject);
+#else
 ret = spice_server_migrate_info(spice_server, hostname,
 port, tls_port, subject);
 cb(opaque, NULL);
+#endif
 return ret;
 }
 
@@ -654,6 +703,11 @@ void qemu_spice_init(void)
 
 migration_state.notify = migration_state_notifier;
 add_migration_state_change_notifier(migration_state);
+#ifdef SPICE_INTERFACE_MIGRATION
+spice_migrate.sin.base.sif = migrate_interface.base;
+spice_migrate.connect_complete.cb = NULL;
+qemu_spice_add_interface(spice_migrate.sin.base);
+#endif
 
 qemu_spice_input_init();
 qemu_spice_audio_init();
-- 
1.7.6.4

[Qemu-devel] [PATCH v2 0/2] spice migration interface v2 (RHBZ 737921)

2011-10-17 Thread Yonit Halperin

Same as the previous series with a small fix to
allow compliation without Spice disabled.

Yonit
 

Spice client is required to connect to the migration target before/as migration
starts. Previously, it connected upon migration completion, however, the ticket
was set in the beginning, thus when migration time was 
ticket_expiration_time, spice failed to connect to the target.

Since the migration target is blocked after migration starts, we execute 
spice-client connection to the target before migration, upon 
client_migrate_info. We wait till the client is connected to the target, or 
till a timeout occurs.
In order to not block the iothread, this patch turns client_migrate_info to
asynchronous.

In addition, we changed the spice api:
(1) client_migrate_info need to call spice_server_migrate_connect
(2) spice_server_migrate_start/end need to be called upon migration start/end

** spice_server_start and the migrate_end_complete callback, were added for
   future use, in case we implement a real seamless spice migration

Yonit Halperin (2):
  spice: turn client_migrate_info to async
  spice: support the new migration interface (spice 0.8.3)

 hmp-commands.hx |3 +-
 monitor.c   |6 +++-
 qmp-commands.hx |3 +-
 ui/qemu-spice.h |   14 +--
 ui/spice-core.c |   66 +++---
 5 files changed, 81 insertions(+), 11 deletions(-)

-- 
1.7.6.4

Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation

2011-10-17 Thread Lai Jiangshan

On 10/16/2011 05:39 PM, Avi Kivity wrote:
 On 10/14/2011 11:03 AM, Lai Jiangshan wrote:
 Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
 button event happens. This doesn't properly emulate real hardware on
 which NMI button event triggers LINT1. Because of this, NMI is sent to
 the processor even when LINT1 is masked in LVT. For example, this
 causes the problem that kdump initiated by NMI sometimes doesn't work
 on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.

 With this patch, we introduce introduce KVM_SET_LINT1,
 and we can use KVM_SET_LINT1 to correctly emulate NMI button
 without change the old KVM_NMI behavior.
  
 @@ -759,6 +762,8 @@ struct kvm_clock_data {
  #define KVM_CREATE_SPAPR_TCE  _IOW(KVMIO,  0xa8, struct 
 kvm_create_spapr_tce)
  /* Available with KVM_CAP_RMA */
  #define KVM_ALLOCATE_RMA  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
 +/* Available with KVM_CAP_SET_LINT1 for x86 */
 +#define KVM_SET_LINT1 _IO(KVMIO,   0xaa)
  

 
 LINT1 may have been programmed as a level -triggered interrupt instead
 of edge triggered (NMI or interrupt).  We can use the ioctl argument for
 the level (and pressing the NMI button needs to pulse the level to 1 and
 back to 0).
 

Hi, Avi,

How to handle level=0 in the kernel?
Or just ignore it?

Thanks,
Lai

[Qemu-devel] [PATCH RFC v1 0/2] Initial support for Microsoft Hyper-V.

2011-10-17 Thread Vadim Rozenfeld

With the following series of patches we are starting to implement
some basic Microsoft Hyper-V Enlightenment functionality. This series
is mostly about adding support for relaxed timing, spinlock, 
and virtual apic.

For more Hyper-V related information please see:
Hypervisor Functional Specification v2.0: For Windows Server 2008 R2 at
http://www.microsoft.com/download/en/details.aspx?displaylang=enid=18673

Changelog:
 v0-v1
  - move hyper-v parameters under cpu category,
  - move hyper-v stuff to target-i386 directory,
  - make CONFIG_HYPERV enabled by default for
i386-softmmu and x86_64-softmmu configurations,
  - rearrange the patches from v0,
  - set HV_X64_MSR_HYPERCALL, HV_X64_MSR_GUEST_OS_ID,
and HV_X64_MSR_APIC_ASSIST_PAGE to 0 on system reset.

Vadim Rozenfeld (2):
  hyper-v: introduce Hyper-V support infrastructure.
  hyper-v: initialize Hyper-V CPUID leafs.

 Makefile.target|2 +
 default-configs/i386-softmmu.mak   |1 +
 default-configs/x86_64-softmmu.mak |1 +
 target-i386/cpuid.c|   14 +++
 target-i386/hyperv.c   |   69 
 target-i386/hyperv.h   |   30 +++
 target-i386/kvm.c  |   64 -
 7 files changed, 179 insertions(+), 2 deletions(-)
 create mode 100644 target-i386/hyperv.c
 create mode 100644 target-i386/hyperv.h

-- 
1.7.4.4

[Qemu-devel] [PATCH RFC v1 2/2] hyper-v: initialize Hyper-V CPUID leafs.

2011-10-17 Thread Vadim Rozenfeld

---
 target-i386/kvm.c |   64 +++-
 1 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 3840255..30b3e85 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -29,6 +29,7 @@
 #include hw/pc.h
 #include hw/apic.h
 #include ioport.h
+#include hyperv.h
 
 //#define DEBUG_KVM
 
@@ -379,11 +380,16 @@ int kvm_arch_init_vcpu(CPUState *env)
 cpuid_i = 0;
 
 /* Paravirtualization CPUIDs */
-memcpy(signature, KVMKVMKVM\0\0\0, 12);
 c = cpuid_data.entries[cpuid_i++];
 memset(c, 0, sizeof(*c));
 c-function = KVM_CPUID_SIGNATURE;
-c-eax = 0;
+if (!hyperv_enabled()) {
+memcpy(signature, KVMKVMKVM\0\0\0, 12);
+c-eax = 0;
+} else {
+memcpy(signature, Microsoft Hv, 12);
+c-eax = HYPERV_CPUID_MIN;
+}
 c-ebx = signature[0];
 c-ecx = signature[1];
 c-edx = signature[2];
@@ -394,6 +400,45 @@ int kvm_arch_init_vcpu(CPUState *env)
 c-eax = env-cpuid_kvm_features 
 kvm_arch_get_supported_cpuid(s, KVM_CPUID_FEATURES, 0, R_EAX);
 
+if (hyperv_enabled()) {
+memcpy(signature, Hv#1\0\0\0\0\0\0\0\0, 12);
+c-eax = signature[0];
+
+c = cpuid_data.entries[cpuid_i++];
+memset(c, 0, sizeof(*c));
+c-function = HYPERV_CPUID_VERSION;
+c-eax = 0x1bbc;
+c-ebx = 0x00060001;
+
+c = cpuid_data.entries[cpuid_i++];
+memset(c, 0, sizeof(*c));
+c-function = HYPERV_CPUID_FEATURES;
+if (hyperv_get_relaxed_timing()) {
+c-eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
+}
+if (hyperv_get_vapic_recommended()) {
+c-eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
+c-eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
+}
+
+c = cpuid_data.entries[cpuid_i++];
+memset(c, 0, sizeof(*c));
+c-function = HYPERV_CPUID_ENLIGHTMENT_INFO;
+if (hyperv_get_relaxed_timing()) {
+c-eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
+}
+if (hyperv_get_vapic_recommended()) {
+c-eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
+}
+c-ebx = hyperv_get_spinlock_retries();
+
+c = cpuid_data.entries[cpuid_i++];
+memset(c, 0, sizeof(*c));
+c-function = HYPERV_CPUID_IMPLEMENT_LIMITS;
+c-eax = 0x40;
+c-ebx = 0x40;
+}
+
 has_msr_async_pf_en = c-eax  (1  KVM_FEATURE_ASYNC_PF);
 
 cpu_x86_cpuid(env, 0, 0, limit, unused, unused, unused);
@@ -945,6 +990,13 @@ static int kvm_put_msrs(CPUState *env, int level)
 kvm_msr_entry_set(msrs[n++], MSR_KVM_ASYNC_PF_EN,
   env-async_pf_en_msr);
 }
+if (hyperv_hypercall_available()) {
+kvm_msr_entry_set(msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0);
+kvm_msr_entry_set(msrs[n++], HV_X64_MSR_HYPERCALL, 0);
+}
+if (hyperv_get_vapic_recommended()) {
+kvm_msr_entry_set(msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0);
+}
 }
 if (env-mcg_cap) {
 int i;
@@ -1179,6 +1231,14 @@ static int kvm_get_msrs(CPUState *env)
 msrs[n++].index = MSR_KVM_ASYNC_PF_EN;
 }
 
+if (hyperv_hypercall_available()) {
+msrs[n++].index = HV_X64_MSR_GUEST_OS_ID;
+msrs[n++].index = HV_X64_MSR_HYPERCALL;
+}
+if (hyperv_get_vapic_recommended()) {
+msrs[n++].index = HV_X64_MSR_APIC_ASSIST_PAGE;
+}
+
 if (env-mcg_cap) {
 msrs[n++].index = MSR_MCG_STATUS;
 msrs[n++].index = MSR_MCG_CTL;
-- 
1.7.4.4

[Qemu-devel] [PATCH RFC v1 1/2] hyper-v: introduce Hyper-V support infrastructure.

2011-10-17 Thread Vadim Rozenfeld

with the following series of patches we are starting to implement
some basic Microsoft Hyper-V Enlightenment functionality, like relaxed
timing, spinlock, and virtual apic support.

For more Hyper-V related information please see:
Hypervisor Functional Specification v2.0: For Windows Server 2008 R2 at
http://www.microsoft.com/download/en/details.aspx?displaylang=enid=18673
---
 Makefile.target|2 +
 default-configs/i386-softmmu.mak   |1 +
 default-configs/x86_64-softmmu.mak |1 +
 target-i386/cpuid.c|   14 +++
 target-i386/hyperv.c   |   69 
 target-i386/hyperv.h   |   30 +++
 6 files changed, 117 insertions(+), 0 deletions(-)
 create mode 100644 target-i386/hyperv.c
 create mode 100644 target-i386/hyperv.h

diff --git a/Makefile.target b/Makefile.target
index 40cc592..2c8e1b8 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -202,6 +202,8 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-y += memory.o
 LIBS+=-lz
 
+obj-$(CONFIG_HYPERV) += hyperv.o
+
 QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
 QEMU_CFLAGS += $(VNC_JPEG_CFLAGS)
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 55589fa..ee69a0a 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -21,3 +21,4 @@ CONFIG_PIIX_PCI=y
 CONFIG_SOUND=y
 CONFIG_HPET=y
 CONFIG_APPLESMC=y
+CONFIG_HYPERV=y
diff --git a/default-configs/x86_64-softmmu.mak 
b/default-configs/x86_64-softmmu.mak
index 8895028..35b1c00 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -21,3 +21,4 @@ CONFIG_PIIX_PCI=y
 CONFIG_SOUND=y
 CONFIG_HPET=y
 CONFIG_APPLESMC=y
+CONFIG_HYPERV=y
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 1e8bcff..50b2d0e 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -27,6 +27,8 @@
 #include qemu-option.h
 #include qemu-config.h
 
+#include hyperv.h
+
 /* feature flags taken from Intel Processor Identification and the CPUID
  * Instruction and AMD's CPUID Specification.  In cases of disagreement
  * between feature naming conventions, aliases may be added.
@@ -716,6 +718,14 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, 
const char *cpu_model)
 goto error;
 }
 x86_cpu_def-tsc_khz = tsc_freq / 1000;
+} else if (!strcmp(featurestr, hv_spinlocks)) {
+   char* err;
+   numvalue = strtoul(val, err, 0);
+   if (!*val || *err) {
+fprintf(stderr, bad numerical value %s\n, val);
+goto error;
+}
+hyperv_set_spinlock_retries(numvalue);
 } else {
 fprintf(stderr, unrecognized feature %s\n, featurestr);
 goto error;
@@ -724,6 +734,10 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, 
const char *cpu_model)
 check_cpuid = 1;
 } else if (!strcmp(featurestr, enforce)) {
 check_cpuid = enforce_cpuid = 1;
+} else if (!strcmp(featurestr, hv_relaxed)) {
+hyperv_set_relaxed_timing(1);
+} else if (!strcmp(featurestr, hv_vapic)) {
+hyperv_set_vapic_recommended(1);
 } else {
 fprintf(stderr, feature string `%s' not in format 
(+feature|-feature|feature=xyz)\n, featurestr);
 goto error;
diff --git a/target-i386/hyperv.c b/target-i386/hyperv.c
new file mode 100644
index 000..bed859e
--- /dev/null
+++ b/target-i386/hyperv.c
@@ -0,0 +1,69 @@
+/*
+ * QEMU Hyper-V support
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Author: Vadim Rozenfeld vroze...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include hyperv.h
+
+static int hyperv_vapic;
+static int hyperv_relaxed_timing;
+static int hyperv_spinlock_attempts = HYPERV_SPINLOCK_NEVER_RETRY;
+
+void hyperv_set_vapic_recommended(int val)
+{
+hyperv_vapic = val;
+}
+
+void hyperv_set_relaxed_timing(int val)
+{
+hyperv_relaxed_timing = val;
+}
+
+void hyperv_set_spinlock_retries(int val)
+{
+hyperv_spinlock_attempts = val;
+if (hyperv_spinlock_attempts  0xFFF) {
+hyperv_spinlock_attempts = 0xFFF;
+}
+}
+
+int hyperv_enabled(void)
+{
+return hyperv_hypercall_available() || hyperv_get_relaxed_timing();
+}
+
+int hyperv_hypercall_available(void)
+{
+if (hyperv_vapic ||
+(hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY)) {
+  return 1;
+}
+return 0;
+}
+
+int hyperv_get_vapic_recommended(void)
+{
+#ifdef KVM_CAP_IRQCHIP
+return hyperv_vapic;
+#else
+return 0;
+#endif
+}
+
+int hyperv_get_relaxed_timing(void)
+{
+return hyperv_relaxed_timing;
+}
+
+int hyperv_get_spinlock_retries(void)
+{
+return hyperv_spinlock_attempts;
+}
+
diff

Re: [Qemu-devel] [PATCHv3] ps2: migrate ledstate

2011-10-17 Thread Gerd Hoffmann



  static const VMStateDescription vmstate_ps2_common = {
  .name = PS2 Common State,
-.version_id = 3,
+.version_id = 4,
  .minimum_version_id = 2,
  .minimum_version_id_old = 2,
  .fields  = (VMStateField []) {



@@ -577,6 +585,7 @@ static const VMStateDescription vmstate_ps2_keyboard = {
  VMSTATE_INT32(scan_enabled, PS2KbdState),
  VMSTATE_INT32(translate, PS2KbdState),
  VMSTATE_INT32_V(scancode_set, PS2KbdState,3),
+VMSTATE_INT32_V(ledstate, PS2KbdState, 4),
  VMSTATE_END_OF_LIST()
  }


version_id in vmstate_ps2_keyboard must be updated too.

The version update in vmstate_ps2_common might not be needed, IIRC the 
versions for stuff referenced via VMSTATE_STRUCT() isn't used anyway, Juan?


cheers,
  Gerd

Re: [Qemu-devel] [PATCH RFC v1 1/2] hyper-v: introduce Hyper-V support infrastructure.

2011-10-17 Thread Kevin Wolf

Am 17.10.2011 11:17, schrieb Vadim Rozenfeld:
 with the following series of patches we are starting to implement
 some basic Microsoft Hyper-V Enlightenment functionality, like relaxed
 timing, spinlock, and virtual apic support.
 
 For more Hyper-V related information please see:
 Hypervisor Functional Specification v2.0: For Windows Server 2008 R2 at
 http://www.microsoft.com/download/en/details.aspx?displaylang=enid=18673
 ---
  Makefile.target|2 +
  default-configs/i386-softmmu.mak   |1 +
  default-configs/x86_64-softmmu.mak |1 +
  target-i386/cpuid.c|   14 +++
  target-i386/hyperv.c   |   69 
 
  target-i386/hyperv.h   |   30 +++
  6 files changed, 117 insertions(+), 0 deletions(-)
  create mode 100644 target-i386/hyperv.c
  create mode 100644 target-i386/hyperv.h
 
 diff --git a/Makefile.target b/Makefile.target
 index 40cc592..2c8e1b8 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -202,6 +202,8 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
  obj-y += memory.o
  LIBS+=-lz
  
 +obj-$(CONFIG_HYPERV) += hyperv.o
 +

The patch doesn't look to me as if it could build successfully without
CONFIG_HYPERV. An option with only one working value seems a bit pointless.

Kevin

[Qemu-devel] [RFC][PATCH 15/45] qemu-kvm: Drop unused kvm_del_irq_route

2011-10-17 Thread Jan Kiszka

kvm_add_irq_route only exists to create platform specific static routes.
So there is no need for a corresponding delete.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 qemu-kvm.c |   16 
 qemu-kvm.h |8 
 2 files changed, 0 insertions(+), 24 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 70481de..e8dc537 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -410,22 +410,6 @@ int kvm_update_routing_entry(struct kvm_irq_routing_entry 
*entry,
 #endif
 }
 
-int kvm_del_irq_route(int gsi, int irqchip, int pin)
-{
-#ifdef KVM_CAP_IRQ_ROUTING
-struct kvm_irq_routing_entry e;
-
-e.gsi = gsi;
-e.type = KVM_IRQ_ROUTING_IRQCHIP;
-e.flags = 0;
-e.u.irqchip.irqchip = irqchip;
-e.u.irqchip.pin = pin;
-return kvm_del_routing_entry(e);
-#else
-return -ENOSYS;
-#endif
-}
-
 int kvm_commit_irq_routes(void)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 8032388..68a921e 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -181,14 +181,6 @@ int kvm_deassign_pci_device(KVMState *s,
  */
 int kvm_add_irq_route(int gsi, int irqchip, int pin);
 
-/*!
- * \brief Removes an irq route from the temporary irq routing table
- *
- * Adds an irq route to the temporary irq routing table.  Nothing is
- * committed to the running VM.
- */
-int kvm_del_irq_route(int gsi, int irqchip, int pin);
-
 struct kvm_irq_routing_entry;
 /*!
  * \brief Adds a routing entry to the temporary irq routing table
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 10/45] msix: Factor out msix_message_from_vector

2011-10-17 Thread Jan Kiszka

This helper will also be used by the upcoming config notifier.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msix.c |   19 +--
 1 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 04e08e5..50fa504 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -33,6 +33,15 @@
 #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
 #define MSIX_MAX_ENTRIES 32
 
+static void msix_message_from_vector(PCIDevice *dev, unsigned vector,
+ MSIMessage *msg)
+{
+uint8_t *table_entry = dev-msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
+
+msg-address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
+msg-data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
+}
+
 /* KVM specific MSIX helpers */
 static void kvm_msix_free(PCIDevice *dev)
 {
@@ -453,9 +462,7 @@ uint32_t msix_bar_size(PCIDevice *dev)
 /* Send an MSI-X message */
 void msix_notify(PCIDevice *dev, unsigned vector)
 {
-uint8_t *table_entry = dev-msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
-uint64_t address;
-uint32_t data;
+MSIMessage msg;
 
 if (vector = dev-msix_entries_nr || !dev-msix_entry_used[vector])
 return;
@@ -469,9 +476,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
 return;
 }
 
-address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
-data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
-stl_le_phys(address, data);
+msix_message_from_vector(dev, vector, msg);
+
+stl_le_phys(msg.address, msg.data);
 }
 
 void msix_reset(PCIDevice *dev)
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 07/45] msi: Generalize msix_supported to msi_supported

2011-10-17 Thread Jan Kiszka

Rename msix_supported to msi_supported and control MSI and MSI-X
activation this way. That was likely to original intention for this
flag, but MSI support came after MSI-X.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msi.c  |8 
 hw/msi.h  |2 ++
 hw/msix.c |8 +++-
 hw/msix.h |2 --
 hw/pc.c   |4 ++--
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/hw/msi.c b/hw/msi.c
index c924e38..2b7b6e3 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -37,6 +37,9 @@
 
 #define PCI_MSI_VECTORS_MAX 32
 
+/* Flag for interrupt controller to declare MSI/MSI-X support */
+bool msi_supported;
+
 /* If we get rid of cap allocator, we won't need this. */
 static inline uint8_t msi_cap_sizeof(uint16_t flags)
 {
@@ -205,6 +208,11 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
 uint16_t flags;
 uint8_t cap_size;
 int config_offset;
+
+if (!msi_supported) {
+return -ENOTSUP;
+}
+
 MSI_DEV_PRINTF(dev,
init offset: 0x%PRIx8 vector: %PRId8
 64bit %d mask %d\n,
diff --git a/hw/msi.h b/hw/msi.h
index 6ff0607..e5e821f 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -24,6 +24,8 @@
 #include qemu-common.h
 #include pci.h
 
+extern bool msi_supported;
+
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
  unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
diff --git a/hw/msix.c b/hw/msix.c
index 33cb716..04e08e5 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -12,6 +12,7 @@
  */
 
 #include hw.h
+#include msi.h
 #include msix.h
 #include pci.h
 #include range.h
@@ -32,10 +33,6 @@
 #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
 #define MSIX_MAX_ENTRIES 32
 
-
-/* Flag for interrupt controller to declare MSI-X support */
-int msix_supported;
-
 /* KVM specific MSIX helpers */
 static void kvm_msix_free(PCIDevice *dev)
 {
@@ -327,8 +324,9 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
   unsigned bar_nr, unsigned bar_size)
 {
 int ret;
+
 /* Nothing to do if MSI is not supported by interrupt controller */
-if (!msix_supported ||
+if (!msi_supported ||
 (kvm_enabled()  kvm_irqchip_in_kernel()  !kvm_has_gsi_routing())) {
 return -ENOTSUP;
 }
diff --git a/hw/msix.h b/hw/msix.h
index 189bb3f..a8661e1 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -29,8 +29,6 @@ void msix_notify(PCIDevice *dev, unsigned vector);
 
 void msix_reset(PCIDevice *dev);
 
-extern int msix_supported;
-
 int msix_set_mask_notifier(PCIDevice *dev, msix_mask_notifier_func);
 int msix_unset_mask_notifier(PCIDevice *dev);
 #endif
diff --git a/hw/pc.c b/hw/pc.c
index 70e0d08..768a20c 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -36,7 +36,7 @@
 #include elf.h
 #include multiboot.h
 #include mc146818rtc.h
-#include msix.h
+#include msi.h
 #include sysbus.h
 #include sysemu.h
 #include kvm.h
@@ -892,7 +892,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
 apic_mapped = 1;
 }
 
-msix_supported = 1;
+msi_supported = true;
 
 return dev;
 }
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 01/45] msi: Guard msi/msix_write_config with msi_present

2011-10-17 Thread Jan Kiszka

Terminate msi/msix_write_config early if support is not enabled. This
allows to remove checks at the caller site if MSI is optional.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msi.c  |3 ++-
 hw/msix.c |2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/msi.c b/hw/msi.c
index 56a4698..bbc9cd7 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -378,7 +378,8 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, 
uint32_t val, int len)
 unsigned int vector;
 uint32_t pending;
 
-if (!ranges_overlap(addr, len, dev-msi_cap, msi_cap_sizeof(flags))) {
+if (!msi_present(dev) ||
+!ranges_overlap(addr, len, dev-msi_cap, msi_cap_sizeof(flags))) {
 return;
 }
 
diff --git a/hw/msix.c b/hw/msix.c
index 60d6d1e..ebd5aee 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -240,7 +240,7 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
 unsigned enable_pos = dev-msix_cap + MSIX_CONTROL_OFFSET;
 int vector;
 
-if (!range_covers_byte(addr, len, enable_pos)) {
+if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
 return;
 }
 
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 41/45] msix: Drop unused msix_bar_size

2011-10-17 Thread Jan Kiszka

No use for it, even more after the upcoming API changes.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msix.c |8 
 hw/msix.h |2 --
 hw/pci.h  |2 --
 3 files changed, 0 insertions(+), 12 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 5f0fa6a..bccd8b1 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -83,7 +83,6 @@ static int msix_add_config(struct PCIDevice *pdev, unsigned 
short nentries,
 new_size = bar_size * 2;
 }
 
-pdev-msix_bar_size = new_size;
 config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, 0,
MSIX_CAP_LENGTH);
 if (config_offset  0) {
@@ -374,13 +373,6 @@ int msix_enabled(PCIDevice *dev)
  MSIX_ENABLE_MASK);
 }
 
-/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
-uint32_t msix_bar_size(PCIDevice *dev)
-{
-return (dev-cap_present  QEMU_PCI_CAP_MSIX) ?
-dev-msix_bar_size : 0;
-}
-
 /* Send an MSI-X message */
 void msix_notify(PCIDevice *dev, unsigned vector)
 {
diff --git a/hw/msix.h b/hw/msix.h
index 9cd54cf..dfc6087 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -19,8 +19,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f);
 int msix_enabled(PCIDevice *dev);
 int msix_present(PCIDevice *dev);
 
-uint32_t msix_bar_size(PCIDevice *dev);
-
 void msix_clear_vector(PCIDevice *dev, unsigned vector);
 void msix_clear_all_vectors(PCIDevice *dev);
 
diff --git a/hw/pci.h b/hw/pci.h
index 266fe34..e2be271 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -178,8 +178,6 @@ struct PCIDevice {
 uint8_t *msix_table_page;
 /* MMIO index used to map MSIX table and pending bit entries. */
 MemoryRegion msix_mmio;
-/* Region including the MSI-X table */
-uint32_t msix_bar_size;
 /* Version id needed for VMState */
 int32_t version_id;
 
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 37/45] qemu-kvm: Clean up irqrouting API

2011-10-17 Thread Jan Kiszka

Drop unused functions, privatize those which are only used internally now.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 kvm-stub.c |   10 --
 kvm.h  |1 -
 qemu-kvm.c |   37 ++---
 qemu-kvm.h |   39 ---
 4 files changed, 6 insertions(+), 81 deletions(-)

diff --git a/kvm-stub.c b/kvm-stub.c
index acd1446..a4225e0 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -135,20 +135,10 @@ int kvm_has_gsi_routing(void)
 return 0;
 }
 
-int kvm_get_irq_route_gsi(void)
-{
-return -ENOSYS;
-}
-
 void kvm_msi_cache_invalidate(MSIRoutingCache *cache)
 {
 }
 
-int kvm_commit_irq_routes(void)
-{
-return -ENOSYS;
-}
-
 int kvm_set_irq(int irq, int level, int *status)
 {
 assert(0);
diff --git a/kvm.h b/kvm.h
index 61bcfec..9780e53 100644
--- a/kvm.h
+++ b/kvm.h
@@ -202,7 +202,6 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, 
uint16_t val, bool assign);
 
 int kvm_has_gsi_routing(void);
 int kvm_allows_irq0_override(void);
-int kvm_get_irq_route_gsi(void);
 
 void kvm_msi_cache_invalidate(MSIRoutingCache *cache);
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index c9b348c..34aebe5 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -188,12 +188,6 @@ int kvm_assign_pci_device(KVMState *s,
 return kvm_vm_ioctl(s, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
 }
 
-static int kvm_old_assign_irq(KVMState *s,
-  struct kvm_assigned_irq *assigned_irq)
-{
-return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, assigned_irq);
-}
-
 int kvm_device_intx_assign(KVMState *s, uint32_t dev_id,
uint32_t host_irq_type, uint32_t guest_irq)
 {
@@ -210,25 +204,6 @@ int kvm_device_intx_assign(KVMState *s, uint32_t dev_id,
 return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, assigned_irq);
 }
 }
-
-#ifdef KVM_CAP_ASSIGN_DEV_IRQ
-int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq)
-{
-int ret;
-
-ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
-if (ret  0) {
-return kvm_vm_ioctl(s, KVM_ASSIGN_DEV_IRQ, assigned_irq);
-}
-
-return kvm_old_assign_irq(s, assigned_irq);
-}
-#else
-int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq)
-{
-return kvm_old_assign_irq(s, assigned_irq);
-}
-#endif
 #endif
 
 int kvm_device_irq_deassign(KVMState *s, uint32_t dev_id, uint32_t type)
@@ -275,8 +250,8 @@ int kvm_has_gsi_routing(void)
 return r;
 }
 
-int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry,
-  MSIRoutingCache *msi_cache)
+static int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry,
+ MSIRoutingCache *msi_cache)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
 KVMState *s = kvm_state;
@@ -328,7 +303,7 @@ int kvm_add_irq_route(int gsi, int irqchip, int pin)
 #endif
 }
 
-int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry)
+static int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
 KVMState *s = kvm_state;
@@ -398,8 +373,8 @@ int kvm_del_routing_entry(struct kvm_irq_routing_entry 
*entry)
 #endif
 }
 
-int kvm_update_routing_entry(struct kvm_irq_routing_entry *entry,
- struct kvm_irq_routing_entry *newentry)
+static int kvm_update_routing_entry(struct kvm_irq_routing_entry *entry,
+struct kvm_irq_routing_entry *newentry)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
 KVMState *s = kvm_state;
@@ -456,7 +431,7 @@ int kvm_commit_irq_routes(void)
 
 static void kvm_msi_cache_flush(KVMState *s);
 
-int kvm_get_irq_route_gsi(void)
+static int kvm_get_irq_route_gsi(void)
 {
 KVMState *s = kvm_state;
 int i, bit;
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 552b668..6b73ce1 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -139,17 +139,6 @@ int kvm_enable_vapic(CPUState *env, uint64_t vapic);
 int kvm_assign_pci_device(KVMState *s,
   struct kvm_assigned_pci_dev *assigned_dev);
 
-/*!
- * \brief Assign IRQ for an assigned device
- *
- * Used for PCI device assignment, this function assigns IRQ numbers for
- * an physical device and guest IRQ handling.
- *
- * \param kvm Pointer to the current kvm_context
- * \param assigned_irq Parameters, like dev id, host irq, guest irq, etc
- */
-int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq);
-
 int kvm_device_intx_assign(KVMState *s, uint32_t dev_id,
uint32_t host_irq_type, uint32_t guest_irq);
 int kvm_device_msi_assign(KVMState *s, uint32_t dev_id, MSIMessage *msg,
@@ -182,34 +171,6 @@ int kvm_deassign_pci_device(KVMState *s,
  */
 int kvm_add_irq_route(int gsi, int irqchip, int pin);
 
-struct kvm_irq_routing_entry;
-/*!
- * \brief Adds a routing entry to the temporary irq routing table
- *
- * Adds a filled routing entry to the temporary irq routing table. Nothing is
- * committed to the running VM.
- */
-int kvm_add_routing_entry(struct

[Qemu-devel] [RFC][PATCH 44/45] pci-assign: Use generic MSI-X support

2011-10-17 Thread Jan Kiszka

Switch MSI-X support of the device assignment core to the generic layer
QEMU offers. As for legacy MSI, we use config notifiers to update IRQ
assignment and routes on guest changes. Quite a bit code becomes
obsolete in the device assigment core, e.g. the maintenance of the MSI-X
vector masking MMIO page. Note that we have to reorder BAR mapping and
capability initialization in order to pass the BAR container on
msix_init.

Also in this case we still do not support per-vector masking even after
these changes.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |  335 +---
 hw/device-assignment.h |   14 +--
 2 files changed, 88 insertions(+), 261 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 10b30a3..df554b3 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -24,6 +24,7 @@
  *  Copyright (C) 2008, Qumranet, Amit Shah (amit.s...@qumranet.com)
  *  Copyright (C) 2008, Red Hat, Amit Shah (amit.s...@redhat.com)
  *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (m...@il.ibm.com)
+ *  Copyright (C) 2011, Siemens AG, Jan Kiszka (jan.kis...@siemens.com)
  */
 #include stdio.h
 #include unistd.h
@@ -41,6 +42,7 @@
 #include range.h
 #include sysemu.h
 #include msi.h
+#include msix.h
 
 #define MSIX_PAGE_SIZE 0x1000
 
@@ -64,8 +66,6 @@
 
 static void assigned_dev_load_option_rom(AssignedDevice *dev);
 
-static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
-
 static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
uint32_t addr, int len, uint32_t *val)
 {
@@ -238,24 +238,11 @@ static void assigned_dev_iomem_setup(PCIDevice *pci_dev, 
int region_num,
 {
 AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
 AssignedDevRegion *region = r_dev-v_addrs[region_num];
-PCIRegion *real_region = r_dev-real_device.regions[region_num];
 
 if (e_size  0) {
 memory_region_init(region-container, assigned-dev-container,
e_size);
 memory_region_add_subregion(region-container, 0, 
region-real_iomem);
-
-/* deal with MSI-X MMIO page */
-if (real_region-base_addr = r_dev-msix_table_addr 
-real_region-base_addr + real_region-size 
-r_dev-msix_table_addr) {
-int offset = r_dev-msix_table_addr - real_region-base_addr;
-
-memory_region_add_subregion_overlap(region-container,
-offset,
-r_dev-mmio,
-1);
-}
 }
 }
 
@@ -648,21 +635,20 @@ again:
 
 static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs);
 
-static void invalidate_msix_vectors(AssignedDevice *dev)
-{
-int i;
-
-for (i = 0; i  dev-irq_entries_nr; i++) {
-kvm_msi_cache_invalidate(dev-dev.msix_cache[i]);
-}
-}
-
 static void free_assigned_device(AssignedDevice *dev)
 {
+uint32_t table_bar_nr, pba_bar_nr;
+uint8_t *msix_cap;
 int i;
 
-if (dev-cap.available  ASSIGNED_DEVICE_CAP_MSIX) {
-assigned_dev_unregister_msix_mmio(dev);
+if (msix_present(dev-dev)) {
+msix_cap = dev-dev.config + dev-dev.msix_cap;
+table_bar_nr = pci_get_long(msix_cap + PCI_MSIX_TABLE) 
+PCI_MSIX_FLAGS_BIRMASK;
+pba_bar_nr = pci_get_long(msix_cap + PCI_MSIX_PBA) 
+PCI_MSIX_FLAGS_BIRMASK;
+msix_uninit(dev-dev, dev-v_addrs[table_bar_nr].container,
+dev-v_addrs[pba_bar_nr].container);
 }
 for (i = 0; i  dev-real_device.region_number; i++) {
 PCIRegion *pci_region = dev-real_device.regions[i];
@@ -698,9 +684,6 @@ static void free_assigned_device(AssignedDevice *dev)
 if (dev-real_device.config_fd = 0) {
 close(dev-real_device.config_fd);
 }
-
-invalidate_msix_vectors(dev);
-g_free(dev-dev.msix_cache);
 }
 
 static uint32_t calc_assigned_dev_id(AssignedDevice *dev)
@@ -916,11 +899,13 @@ void assigned_dev_update_irqs(void)
 }
 }
 
+/* used for both MSI and MSI-X */
 static void assigned_dev_update_msi(PCIDevice *pci_dev, bool enabled)
 {
 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
 
 if (!enabled) {
+dev-msix_vectors_in_use = 0;
 assign_intx(dev);
 }
 }
@@ -945,113 +930,66 @@ static int assigned_dev_update_msi_vector(PCIDevice 
*pci_dev,
 return 0;
 }
 
-static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev)
+static int assigned_dev_update_msix_vector(PCIDevice *pci_dev,
+   unsigned int vector,
+   MSIMessage *msg, bool masked)
 {
-AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
-uint16_t entries_nr = 0, entries_max_nr;
-void *msix_page = adev-msix_table_page;
+AssignedDevice *dev =

[Qemu-devel] [RFC][PATCH 32/45] pci-assign: Factor out deassign_irq

2011-10-17 Thread Jan Kiszka

Will have more users soon.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |   30 ++
 1 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index e0b9cfe..e5ac54c 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -807,10 +807,25 @@ static int assign_device(AssignedDevice *dev)
 return r;
 }
 
+static void deassign_irq(AssignedDevice *dev)
+{
+int ret;
+
+if (dev-irq_requested_type) {
+ret = kvm_device_irq_deassign(kvm_state,
+  calc_assigned_dev_id(dev),
+  dev-irq_requested_type);
+if (ret) {
+perror(assigned_dev: deassign irq);
+}
+dev-girq = -1;
+dev-irq_requested_type = 0;
+}
+}
+
 static int assign_intx(AssignedDevice *dev)
 {
 struct kvm_assigned_irq assigned_irq_data;
-uint32_t dev_id;
 int irq, r;
 
 /* Interrupt PIN 0 means don't use INTx */
@@ -824,19 +839,10 @@ static int assign_intx(AssignedDevice *dev)
 return 0;
 }
 
-dev_id = calc_assigned_dev_id(dev);
-
-if (dev-irq_requested_type) {
-r = kvm_device_irq_deassign(kvm_state, dev_id,
-dev-irq_requested_type);
-if (r) {
-perror(assign_intx: deassign);
-}
-dev-irq_requested_type = 0;
-}
+deassign_irq(dev);
 
 memset(assigned_irq_data, 0, sizeof(assigned_irq_data));
-assigned_irq_data.assigned_dev_id = dev_id;
+assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(dev);
 assigned_irq_data.guest_irq = irq;
 assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX;
 if (dev-features  ASSIGNED_DEVICE_PREFER_MSI_MASK 
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 28/45] qemu-kvm: msix: Drop tracking of used vectors

2011-10-17 Thread Jan Kiszka

This optimization was only required to keep KVM route usage low. Now
that we solve that problem via lazy updates, we can drop the field. We
still need interfaces to clear pending vectors, though (and we have to
make use of them more broadly - but that's unrelated to this patch).

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/ivshmem.c|   16 ++---
 hw/msix.c   |   62 +++---
 hw/msix.h   |5 +--
 hw/pci.h|2 -
 hw/virtio-pci.c |   20 +++--
 5 files changed, 26 insertions(+), 79 deletions(-)

diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 242fbea..a402c98 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -535,10 +535,8 @@ static uint64_t ivshmem_get_size(IVShmemState * s) {
 return value;
 }
 
-static void ivshmem_setup_msi(IVShmemState * s) {
-
-int i;
-
+static void ivshmem_setup_msi(IVShmemState *s)
+{
 /* allocate the MSI-X vectors */
 
 memory_region_init(s-msix_bar, ivshmem-msix, 4096);
@@ -551,11 +549,6 @@ static void ivshmem_setup_msi(IVShmemState * s) {
 exit(1);
 }
 
-/* 'activate' the vectors */
-for (i = 0; i  s-vectors; i++) {
-msix_vector_use(s-dev, i);
-}
-
 /* allocate Qemu char devices for receiving interrupts */
 s-eventfd_table = g_malloc0(s-vectors * sizeof(EventfdEntry));
 }
@@ -581,7 +574,7 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int 
version_id)
 IVSHMEM_DPRINTF(ivshmem_load\n);
 
 IVShmemState *proxy = opaque;
-int ret, i;
+int ret;
 
 if (version_id  0) {
 return -EINVAL;
@@ -599,9 +592,6 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int 
version_id)
 
 if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
 msix_load(proxy-dev, f);
-for (i = 0; i  proxy-vectors; i++) {
-msix_vector_use(proxy-dev, i);
-}
 } else {
 proxy-intrstatus = qemu_get_be32(f);
 proxy-intrmask = qemu_get_be32(f);
diff --git a/hw/msix.c b/hw/msix.c
index ce3375a..f1b97b5 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -292,9 +292,6 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
 if (nentries  MSIX_MAX_ENTRIES)
 return -EINVAL;
 
-dev-msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
-sizeof *dev-msix_entry_used);
-
 dev-msix_table_page = g_malloc0(MSIX_PAGE_SIZE);
 msix_mask_all(dev, nentries);
 
@@ -317,21 +314,9 @@ err_config:
 memory_region_destroy(dev-msix_mmio);
 g_free(dev-msix_table_page);
 dev-msix_table_page = NULL;
-g_free(dev-msix_entry_used);
-dev-msix_entry_used = NULL;
 return ret;
 }
 
-static void msix_free_irq_entries(PCIDevice *dev)
-{
-int vector;
-
-for (vector = 0; vector  dev-msix_entries_nr; ++vector) {
-dev-msix_entry_used[vector] = 0;
-msix_clr_pending(dev, vector);
-}
-}
-
 /* Clean up resources for the device. */
 int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
 {
@@ -340,14 +325,11 @@ int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
 }
 pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
 dev-msix_cap = 0;
-msix_free_irq_entries(dev);
 dev-msix_entries_nr = 0;
 memory_region_del_subregion(bar, dev-msix_mmio);
 memory_region_destroy(dev-msix_mmio);
 g_free(dev-msix_table_page);
 dev-msix_table_page = NULL;
-g_free(dev-msix_entry_used);
-dev-msix_entry_used = NULL;
 
 kvm_msix_free(dev);
 g_free(dev-msix_cache);
@@ -376,7 +358,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
 return;
 }
 
-msix_free_irq_entries(dev);
 qemu_get_buffer(f, dev-msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
 qemu_get_buffer(f, dev-msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
 }
@@ -407,7 +388,7 @@ void msix_notify(PCIDevice *dev, unsigned vector)
 {
 MSIMessage msg;
 
-if (vector = dev-msix_entries_nr || !dev-msix_entry_used[vector])
+if (vector = dev-msix_entries_nr)
 return;
 if (msix_is_masked(dev, vector)) {
 msix_set_pending(dev, vector);
@@ -424,48 +405,31 @@ void msix_reset(PCIDevice *dev)
 if (!msix_present(dev)) {
 return;
 }
-msix_free_irq_entries(dev);
+msix_clear_all_vectors(dev);
 dev-config[dev-msix_cap + MSIX_CONTROL_OFFSET] =
~dev-wmask[dev-msix_cap + MSIX_CONTROL_OFFSET];
 memset(dev-msix_table_page, 0, MSIX_PAGE_SIZE);
 msix_mask_all(dev, dev-msix_entries_nr);
 }
 
-/* PCI spec suggests that devices make it possible for software to configure
- * less vectors than supported by the device, but does not specify a standard
- * mechanism for devices to do so.
- *
- * We support this by asking devices to declare vectors software is going to
- * actually use, and checking this on the notification path. Devices that
- * don't want to follow the spec suggestion can declare all vectors as used. */
-
-/* Mark vector as used. */
-int

[Qemu-devel] [RFC][PATCH 43/45] msix: Allow to customize capability on init

2011-10-17 Thread Jan Kiszka

This enables fully configurable MSI-X initialization by taking config
space offset, independent table and PBA BARs and the offset inside them
on msix_init. Table and PBA are now realized as two memory subregions,
either of the passed BAR regions or the single page container
msix_init_simple creates and registers.

Will be required for device assignment.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msix.c |  245 +---
 hw/msix.h |7 ++-
 hw/pci.h  |   12 ++-
 3 files changed, 150 insertions(+), 114 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 258b9c1..548e712 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -25,18 +25,12 @@
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE  8)
 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL  8)
 
-/* How much space does an MSIX table need. */
-/* The spec requires giving the table structure
- * a 4K aligned region all by itself. */
 #define MSIX_PAGE_SIZE 0x1000
-/* Reserve second half of the page for pending bits */
-#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
-#define MSIX_MAX_ENTRIES 32
 
 static void msix_message_from_vector(PCIDevice *dev, unsigned vector,
  MSIMessage *msg)
 {
-uint8_t *table_entry = dev-msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
+uint8_t *table_entry = dev-msix_table + vector * PCI_MSIX_ENTRY_SIZE;
 
 msg-address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
 msg-data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
@@ -54,67 +48,6 @@ static void kvm_msix_free(PCIDevice *dev)
 }
 }
 
-/* Add MSI-X capability to the config space for the device. */
-/* Given a bar and its size, add MSI-X table on top of it
- * and fill MSI-X capability in the config space.
- * Original bar size must be a power of 2 or 0.
- * New bar size is returned. */
-static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
-   unsigned bar_nr, unsigned bar_size)
-{
-int config_offset;
-uint32_t new_size;
-uint8_t *config;
-
-if (nentries  1 || nentries  PCI_MSIX_FLAGS_QSIZE + 1) {
-return -EINVAL;
-}
-if (bar_size  0x8000) {
-return -ENOSPC;
-}
-
-/* Add space for MSI-X structures */
-if (!bar_size) {
-new_size = MSIX_PAGE_SIZE;
-} else if (bar_size  MSIX_PAGE_SIZE) {
-bar_size = MSIX_PAGE_SIZE;
-new_size = MSIX_PAGE_SIZE * 2;
-} else {
-new_size = bar_size * 2;
-}
-
-config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, 0,
-   MSIX_CAP_LENGTH);
-if (config_offset  0) {
-return config_offset;
-}
-pdev-msix_cap = config_offset;
-
-config = pdev-config + config_offset;
-pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
-/* Table on top of BAR */
-pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
-/* Pending bits on top of that */
-pci_set_long(config + PCI_MSIX_PBA,
- (bar_size + MSIX_PAGE_PENDING) | bar_nr);
-
-/* Make flags bit writable. */
-pdev-wmask[config_offset + MSIX_CONTROL_OFFSET] |=
-MSIX_ENABLE_MASK | MSIX_MASKALL_MASK;
-
-return 0;
-}
-
-static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
-   unsigned size)
-{
-PCIDevice *dev = opaque;
-unsigned int offset = addr  (MSIX_PAGE_SIZE - 1)  ~0x3;
-void *page = dev-msix_table_page;
-
-return pci_get_long(page + offset);
-}
-
 static uint8_t msix_pending_mask(int vector)
 {
 return 1  (vector % 8);
@@ -122,7 +55,7 @@ static uint8_t msix_pending_mask(int vector)
 
 static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
 {
-return dev-msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+return dev-msix_pba + vector / 8;
 }
 
 static int msix_is_pending(PCIDevice *dev, int vector)
@@ -150,7 +83,7 @@ static bool msix_is_masked(PCIDevice *dev, int vector)
 unsigned offset =
 vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
 return msix_function_masked(dev) ||
-  dev-msix_table_page[offset]  PCI_MSIX_ENTRY_CTRL_MASKBIT;
+dev-msix_table[offset]  PCI_MSIX_ENTRY_CTRL_MASKBIT;
 }
 
 static void msix_fire_vector_config_notifier(PCIDevice *dev,
@@ -213,18 +146,25 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
 }
 }
 
-static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
-uint64_t val, unsigned size)
+static uint64_t msix_table_read(void *opaque, target_phys_addr_t addr,
+unsigned size)
+{
+PCIDevice *dev = opaque;
+
+return pci_get_long(dev-msix_table + addr);
+}
+
+static void msix_table_write(void *opaque, target_phys_addr_t addr,
+ uint64_t val, unsigned size)
 {
 PCIDevice *dev = opaque;
-unsigned int offset = addr  (MSIX_PAGE_SIZE - 1)  ~0x3;
-unsigned int

[Qemu-devel] [RFC][PATCH 38/45] msi: Implement config notifiers for legacy MSI

2011-10-17 Thread Jan Kiszka

Realize support for MSI config notifiers analogously to MSI-X. The logic
is slightly more complex for legacy MSI as per-vector masking is option
here. Device assignment will be the first user.

Note that this change does not introduce per-vector masking support.
This can to be added at some later point, using the notifications the
MSI layer provides now.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msi.c |  171 --
 hw/msi.h |7 ++-
 hw/pci.c |2 +-
 hw/pci.h |3 +
 4 files changed, 166 insertions(+), 17 deletions(-)

diff --git a/hw/msi.c b/hw/msi.c
index 23d79dd..2380ee3 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -241,15 +241,15 @@ void msi_uninit(struct PCIDevice *dev)
 
 void msi_reset(PCIDevice *dev)
 {
-uint16_t flags;
+uint16_t flags, old_flags;
 bool msi64bit;
 
 if (!msi_present(dev)) {
 return;
 }
 
-flags = pci_get_word(dev-config + msi_flags_off(dev));
-flags = ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
+old_flags = pci_get_word(dev-config + msi_flags_off(dev));
+flags = old_flags  ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
 msi64bit = flags  PCI_MSI_FLAGS_64BIT;
 
 pci_set_word(dev-config + msi_flags_off(dev), flags);
@@ -262,6 +262,8 @@ void msi_reset(PCIDevice *dev)
 pci_set_long(dev-config + msi_mask_off(dev, msi64bit), 0);
 pci_set_long(dev-config + msi_pending_off(dev, msi64bit), 0);
 }
+/* trigger notifier on potential changes */
+msi_write_config(dev, msi_flags_off(dev), old_flags, 2);
 MSI_DEV_PRINTF(dev, reset\n);
 }
 
@@ -306,16 +308,20 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
 }
 
 /* Normally called by pci_default_write_config(). */
-void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
+void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t old_val, int len)
 {
 uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
 bool msi64bit = flags  PCI_MSI_FLAGS_64BIT;
 bool msi_per_vector_mask = flags  PCI_MSI_FLAGS_MASKBIT;
+bool fire_vector_notifier = false;
 unsigned int nr_vectors;
 uint8_t log_num_vecs;
 uint8_t log_max_vecs;
 unsigned int vector;
 uint32_t pending;
+MSIMessage msg;
+bool enabled;
+int ret;
 
 if (!msi_present(dev) ||
 !ranges_overlap(addr, len, dev-msi_cap, msi_cap_sizeof(flags))) {
@@ -342,7 +348,35 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, 
uint32_t val, int len)
 fprintf(stderr, \n);
 #endif
 
-if (!(flags  PCI_MSI_FLAGS_ENABLE)) {
+enabled = flags  PCI_MSI_FLAGS_ENABLE;
+nr_vectors = msi_nr_vectors(flags);
+
+if (dev-msi_enable_notifier 
+range_covers_byte(addr, len, msi_flags_off(dev))) {
+old_val = (msi_flags_off(dev) - addr) * 8;
+if ((old_val  PCI_MSI_FLAGS_ENABLE) != enabled) {
+dev-msi_enable_notifier(dev, enabled);
+if (enabled  dev-msi_vector_config_notifier) {
+fire_vector_notifier = true;
+}
+}
+}
+if (dev-msi_vector_config_notifier) {
+if (ranges_overlap(addr, len, msi_address_lo_off(dev),
+   msi64bit ? 10 : 6)) {
+fire_vector_notifier = true;
+}
+}
+if (fire_vector_notifier) {
+for (vector = 0; vector  nr_vectors; ++vector) {
+msi_message_from_vector(dev, flags, vector, msg);
+ret = dev-msi_vector_config_notifier(dev, vector, msg,
+  msi_is_masked(dev, vector));
+assert(ret = 0);
+}
+}
+
+if (!enabled) {
 kvm_msi_free(dev);
 return;
 }
@@ -375,13 +409,12 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, 
uint32_t val, int len)
 pci_set_word(dev-config + msi_flags_off(dev), flags);
 }
 
-if (!msi_per_vector_mask) {
-/* if per vector masking isn't supported,
-   there is no pending interrupt. */
+if (!msi_per_vector_mask ||
+!ranges_overlap(addr, len, msi_mask_off(dev, msi64bit), 4)) {
 return;
 }
 
-nr_vectors = msi_nr_vectors(flags);
+old_val = (msi_mask_off(dev, msi64bit) - addr) * 8;
 
 /* This will discard pending interrupts, if any. */
 pending = pci_get_long(dev-config + msi_pending_off(dev, msi64bit));
@@ -390,13 +423,22 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, 
uint32_t val, int len)
 
 /* deliver pending interrupts which are unmasked */
 for (vector = 0; vector  nr_vectors; ++vector) {
-if (msi_is_masked(dev, vector) || !(pending  (1U  vector))) {
-continue;
+bool is_masked = msi_is_masked(dev, vector);
+unsigned int vector_mask = 1U  vector;
+
+if (!fire_vector_notifier  dev-msi_vector_config_notifier 
+(bool)(old_val  vector_mask) != is_masked) {
+msi_message_from_vector(dev, flags, vector,

Re: [Qemu-devel] [PATCH RFC v1 1/2] hyper-v: introduce Hyper-V support infrastructure.

2011-10-17 Thread Jan Kiszka

On 2011-10-17 11:17, Vadim Rozenfeld wrote:
 with the following series of patches we are starting to implement
 some basic Microsoft Hyper-V Enlightenment functionality, like relaxed
 timing, spinlock, and virtual apic support.
 
 For more Hyper-V related information please see:
 Hypervisor Functional Specification v2.0: For Windows Server 2008 R2 at
 http://www.microsoft.com/download/en/details.aspx?displaylang=enid=18673
 ---
  Makefile.target|2 +
  default-configs/i386-softmmu.mak   |1 +
  default-configs/x86_64-softmmu.mak |1 +
  target-i386/cpuid.c|   14 +++
  target-i386/hyperv.c   |   69 
 
  target-i386/hyperv.h   |   30 +++
  6 files changed, 117 insertions(+), 0 deletions(-)
  create mode 100644 target-i386/hyperv.c
  create mode 100644 target-i386/hyperv.h
 
 diff --git a/Makefile.target b/Makefile.target
 index 40cc592..2c8e1b8 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -202,6 +202,8 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
  obj-y += memory.o
  LIBS+=-lz
  
 +obj-$(CONFIG_HYPERV) += hyperv.o

obj-i386-y

 +
  QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
  QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
  QEMU_CFLAGS += $(VNC_JPEG_CFLAGS)
 diff --git a/default-configs/i386-softmmu.mak 
 b/default-configs/i386-softmmu.mak
 index 55589fa..ee69a0a 100644
 --- a/default-configs/i386-softmmu.mak
 +++ b/default-configs/i386-softmmu.mak
 @@ -21,3 +21,4 @@ CONFIG_PIIX_PCI=y
  CONFIG_SOUND=y
  CONFIG_HPET=y
  CONFIG_APPLESMC=y
 +CONFIG_HYPERV=y
 diff --git a/default-configs/x86_64-softmmu.mak 
 b/default-configs/x86_64-softmmu.mak
 index 8895028..35b1c00 100644
 --- a/default-configs/x86_64-softmmu.mak
 +++ b/default-configs/x86_64-softmmu.mak
 @@ -21,3 +21,4 @@ CONFIG_PIIX_PCI=y
  CONFIG_SOUND=y
  CONFIG_HPET=y
  CONFIG_APPLESMC=y
 +CONFIG_HYPERV=y

Useless config options (that do not work anyway as Kevin noted).

 diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
 index 1e8bcff..50b2d0e 100644
 --- a/target-i386/cpuid.c
 +++ b/target-i386/cpuid.c
 @@ -27,6 +27,8 @@
  #include qemu-option.h
  #include qemu-config.h
  
 +#include hyperv.h
 +
  /* feature flags taken from Intel Processor Identification and the CPUID
   * Instruction and AMD's CPUID Specification.  In cases of disagreement
   * between feature naming conventions, aliases may be added.
 @@ -716,6 +718,14 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, 
 const char *cpu_model)
  goto error;
  }
  x86_cpu_def-tsc_khz = tsc_freq / 1000;
 +} else if (!strcmp(featurestr, hv_spinlocks)) {
 +   char* err;
 +   numvalue = strtoul(val, err, 0);
 +   if (!*val || *err) {
 +fprintf(stderr, bad numerical value %s\n, val);
 +goto error;
 +}
 +hyperv_set_spinlock_retries(numvalue);
  } else {
  fprintf(stderr, unrecognized feature %s\n, featurestr);
  goto error;
 @@ -724,6 +734,10 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, 
 const char *cpu_model)
  check_cpuid = 1;
  } else if (!strcmp(featurestr, enforce)) {
  check_cpuid = enforce_cpuid = 1;
 +} else if (!strcmp(featurestr, hv_relaxed)) {
 +hyperv_set_relaxed_timing(1);
 +} else if (!strcmp(featurestr, hv_vapic)) {
 +hyperv_set_vapic_recommended(1);
  } else {
  fprintf(stderr, feature string `%s' not in format 
 (+feature|-feature|feature=xyz)\n, featurestr);
  goto error;
 diff --git a/target-i386/hyperv.c b/target-i386/hyperv.c
 new file mode 100644
 index 000..bed859e
 --- /dev/null
 +++ b/target-i386/hyperv.c
 @@ -0,0 +1,69 @@
 +/*
 + * QEMU Hyper-V support
 + *
 + * Copyright Red Hat, Inc. 2011
 + *
 + * Author: Vadim Rozenfeld vroze...@redhat.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + *
 + */
 +
 +#include hyperv.h
 +
 +static int hyperv_vapic;
 +static int hyperv_relaxed_timing;
 +static int hyperv_spinlock_attempts = HYPERV_SPINLOCK_NEVER_RETRY;
 +
 +void hyperv_set_vapic_recommended(int val)
 +{
 +hyperv_vapic = val;
 +}
 +
 +void hyperv_set_relaxed_timing(int val)
 +{
 +hyperv_relaxed_timing = val;
 +}
 +
 +void hyperv_set_spinlock_retries(int val)
 +{
 +hyperv_spinlock_attempts = val;
 +if (hyperv_spinlock_attempts  0xFFF) {
 +hyperv_spinlock_attempts = 0xFFF;
 +}
 +}

hyperv_enabled_x(bool enable) would be nicer.

 +
 +int hyperv_enabled(void)
 +{
 +return hyperv_hypercall_available() || hyperv_get_relaxed_timing();
 +}
 +
 +int hyperv_hypercall_available(void)
 +{
 +if (hyperv_vapic ||
 +(hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY)) {
 +  return 1;
 +

Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation

2011-10-17 Thread Lai Jiangshan

On 10/16/2011 05:39 PM, Avi Kivity wrote:
 On 10/14/2011 11:03 AM, Lai Jiangshan wrote:
 Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
 button event happens. This doesn't properly emulate real hardware on
 which NMI button event triggers LINT1. Because of this, NMI is sent to
 the processor even when LINT1 is masked in LVT. For example, this
 causes the problem that kdump initiated by NMI sometimes doesn't work
 on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.

 With this patch, we introduce introduce KVM_SET_LINT1,
 and we can use KVM_SET_LINT1 to correctly emulate NMI button
 without change the old KVM_NMI behavior.
  
 @@ -759,6 +762,8 @@ struct kvm_clock_data {
  #define KVM_CREATE_SPAPR_TCE  _IOW(KVMIO,  0xa8, struct 
 kvm_create_spapr_tce)
  /* Available with KVM_CAP_RMA */
  #define KVM_ALLOCATE_RMA  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
 +/* Available with KVM_CAP_SET_LINT1 for x86 */
 +#define KVM_SET_LINT1 _IO(KVMIO,   0xaa)
  

 
 LINT1 may have been programmed as a level -triggered interrupt instead
 of edge triggered (NMI or interrupt).  We can use the ioctl argument for
 the level (and pressing the NMI button needs to pulse the level to 1 and
 back to 0).
 

Hi, Avi, Jan,

Which approach you prefer to?
I need to know the result before wasting too much time to respin
the approach.

1) Fix KVM_NMI emulation approach  (which is v3 patchset)
- It directly fixes the problem and matches the
  real hard ware more, but it changes KVM_NMI bahavior.
- Require both kernel-site and userspace-site fix.

2) Get the LAPIC state from kernel irqchip, and inject NMI if it is allowed
   (which is v4 patchset)
- Simple, don't changes any kernel behavior.
- Only need the userspace-site fix

3) Add KVM_SET_LINT1 approach (which is v5 patchset)
- don't changes the kernel's KVM_NMI behavior.
- much complex
- Require both kernel-site and userspace-site fix.
- userspace-site should also handle the !KVM_SET_LINT1
  condition, it uses all the 2) approach' code. it means
  this approach equals the 2) approach + KVM_SET_LINT1 ioctl.

This is an urgent bug of us, we need to settle it down soon.

Thanks,
Lai

Re: [Qemu-devel] [PATCH RFC v1 2/2] hyper-v: initialize Hyper-V CPUID leafs.

2011-10-17 Thread Paolo Bonzini


On 10/17/2011 11:17 AM, Vadim Rozenfeld wrote:

@@ -379,11 +380,16 @@ int kvm_arch_init_vcpu(CPUState *env)
  cpuid_i = 0;

  /* Paravirtualization CPUIDs */
-memcpy(signature, KVMKVMKVM\0\0\0, 12);
  c =cpuid_data.entries[cpuid_i++];
  memset(c, 0, sizeof(*c));
  c-function = KVM_CPUID_SIGNATURE;
-c-eax = 0;
+if (!hyperv_enabled()) {
+memcpy(signature, KVMKVMKVM\0\0\0, 12);
+c-eax = 0;
+} else {
+memcpy(signature, Microsoft Hv, 12);
+c-eax = HYPERV_CPUID_MIN;
+}


Even not counting that hyper-v support should IMHO not be in 
KVM-specific code, I still think this shouldn't remove KVM leaves 
completely but rather move them to 0x4100.  The KVM 
paravirtualization code then can similarly probe with 0x100 stride up to 
0x40001000.  This is what was done for Xen, and it allows to enable 
enlightenments independent of whether the guest is Linux or Windows.


However, let's get a third opinion---Avi, what do you think?

Paolo

[Qemu-devel] [RFC][PATCH 05/45] msi: Invoke msi/msix_write_config from PCI core

2011-10-17 Thread Jan Kiszka

Also this functions is better invoked by the core than by each and every
device. This allows to drop the config_write callbacks from ich and
intel-hda.

CC: Alexander Graf ag...@suse.de
CC: Gerd Hoffmann kra...@redhat.com
CC: Isaku Yamahata yamah...@valinux.co.jp
Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/ide/ich.c|8 
 hw/intel-hda.c  |   12 
 hw/ioh3420.c|1 -
 hw/msi.c|2 +-
 hw/pci.c|3 +++
 hw/virtio-pci.c |2 --
 hw/xio3130_downstream.c |1 -
 hw/xio3130_upstream.c   |1 -
 8 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 3f7510f..a470c01 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -139,13 +139,6 @@ static int pci_ich9_uninit(PCIDevice *dev)
 return 0;
 }
 
-static void pci_ich9_write_config(PCIDevice *pci, uint32_t addr,
-  uint32_t val, int len)
-{
-pci_default_write_config(pci, addr, val, len);
-msi_write_config(pci, addr, val, len);
-}
-
 static PCIDeviceInfo ich_ahci_info[] = {
 {
 .qdev.name= ich9-ahci,
@@ -154,7 +147,6 @@ static PCIDeviceInfo ich_ahci_info[] = {
 .qdev.vmsd= vmstate_ahci,
 .init = pci_ich9_ahci_init,
 .exit = pci_ich9_uninit,
-.config_write = pci_ich9_write_config,
 .vendor_id= PCI_VENDOR_ID_INTEL,
 .device_id= PCI_DEVICE_ID_INTEL_82801IR,
 .revision = 0x02,
diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index 4272204..0453039 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -1156,17 +1156,6 @@ static int intel_hda_exit(PCIDevice *pci)
 return 0;
 }
 
-static void intel_hda_write_config(PCIDevice *pci, uint32_t addr,
-   uint32_t val, int len)
-{
-IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci);
-
-pci_default_write_config(pci, addr, val, len);
-if (d-msi) {
-msi_write_config(pci, addr, val, len);
-}
-}
-
 static int intel_hda_post_load(void *opaque, int version)
 {
 IntelHDAState* d = opaque;
@@ -1250,7 +1239,6 @@ static PCIDeviceInfo intel_hda_info = {
 .qdev.reset   = intel_hda_reset,
 .init = intel_hda_init,
 .exit = intel_hda_exit,
-.config_write = intel_hda_write_config,
 .vendor_id= PCI_VENDOR_ID_INTEL,
 .device_id= 0x2668,
 .revision = 1,
diff --git a/hw/ioh3420.c b/hw/ioh3420.c
index fc2fb3b..886ede8 100644
--- a/hw/ioh3420.c
+++ b/hw/ioh3420.c
@@ -71,7 +71,6 @@ static void ioh3420_write_config(PCIDevice *d,
 pci_get_long(d-config + d-exp.aer_cap + PCI_ERR_ROOT_COMMAND);
 
 pci_bridge_write_config(d, address, val, len);
-msi_write_config(d, address, val, len);
 ioh3420_aer_vector_update(d);
 pcie_cap_slot_write_config(d, address, val, len);
 pcie_aer_write_config(d, address, val, len);
diff --git a/hw/msi.c b/hw/msi.c
index b117f69..c924e38 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -369,7 +369,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
 stl_le_phys(address, data);
 }
 
-/* call this function after updating configs by pci_default_write_config(). */
+/* Normally called by pci_default_write_config(). */
 void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
 {
 uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
diff --git a/hw/pci.c b/hw/pci.c
index 933d49e..6673989 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1154,6 +1154,9 @@ void pci_default_write_config(PCIDevice *d, uint32_t 
addr, uint32_t val, int l)
 
 if (range_covers_byte(addr, l, PCI_COMMAND))
 pci_update_irq_disabled(d, was_irq_disabled);
+
+msi_write_config(d, addr, val, l);
+msix_write_config(d, addr, val, l);
 }
 
 /***/
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 3fb250f..615295e 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -502,8 +502,6 @@ static void virtio_write_config(PCIDevice *pci_dev, 
uint32_t address,
 virtio_set_status(proxy-vdev,
   proxy-vdev-status  ~VIRTIO_CONFIG_S_DRIVER_OK);
 }
-
-msix_write_config(pci_dev, address, val, len);
 }
 
 static unsigned virtio_pci_get_features(void *opaque)
diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c
index 464eefa..8e9117d 100644
--- a/hw/xio3130_downstream.c
+++ b/hw/xio3130_downstream.c
@@ -41,7 +41,6 @@ static void xio3130_downstream_write_config(PCIDevice *d, 
uint32_t address,
 pci_bridge_write_config(d, address, val, len);
 pcie_cap_flr_write_config(d, address, val, len);
 pcie_cap_slot_write_config(d, address, val, len);
-msi_write_config(d, address, val, len);
 pcie_aer_write_config(d, address, val, len);
 }
 
diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c
index 0d8d254..707401e 100644
--- a/hw/xio3130_upstream.c
+++ b/hw/xio3130_upstream.c
@@

Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation

2011-10-17 Thread Avi Kivity

On 10/17/2011 11:40 AM, Lai Jiangshan wrote:
 
  
  LINT1 may have been programmed as a level -triggered interrupt instead
  of edge triggered (NMI or interrupt).  We can use the ioctl argument for
  the level (and pressing the NMI button needs to pulse the level to 1 and
  back to 0).
  

 Hi, Avi, Jan,

 Which approach you prefer to?
 I need to know the result before wasting too much time to respin
 the approach.

Yes, sorry about the slow and sometimes conflicting feedback.

 1) Fix KVM_NMI emulation approach  (which is v3 patchset)
   - It directly fixes the problem and matches the
 real hard ware more, but it changes KVM_NMI bahavior.
   - Require both kernel-site and userspace-site fix.

 2) Get the LAPIC state from kernel irqchip, and inject NMI if it is allowed
(which is v4 patchset)
   - Simple, don't changes any kernel behavior.
   - Only need the userspace-site fix

 3) Add KVM_SET_LINT1 approach (which is v5 patchset)
   - don't changes the kernel's KVM_NMI behavior.
   - much complex
   - Require both kernel-site and userspace-site fix.
   - userspace-site should also handle the !KVM_SET_LINT1
 condition, it uses all the 2) approach' code. it means
 this approach equals the 2) approach + KVM_SET_LINT1 ioctl.

 This is an urgent bug of us, we need to settle it down soo

While (1) is simple, it overloads a single ioctl with two meanings,
that's not so good.

Whether we do (1) or (3), we need (2) as well, for older kernels.

So I recommend first focusing on (2) and merging it, then doing (3).

(note an additional issue with 3 is whether to make it a vm or vcpu
ioctl - we've been assuming vcpu ioctl but it's not necessarily the best
choice).

-- 
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation

2011-10-17 Thread Avi Kivity

On 10/17/2011 11:17 AM, Lai Jiangshan wrote:
 On 10/16/2011 05:39 PM, Avi Kivity wrote:
  On 10/14/2011 11:03 AM, Lai Jiangshan wrote:
  Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
  button event happens. This doesn't properly emulate real hardware on
  which NMI button event triggers LINT1. Because of this, NMI is sent to
  the processor even when LINT1 is masked in LVT. For example, this
  causes the problem that kdump initiated by NMI sometimes doesn't work
  on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
 
  With this patch, we introduce introduce KVM_SET_LINT1,
  and we can use KVM_SET_LINT1 to correctly emulate NMI button
  without change the old KVM_NMI behavior.
   
  @@ -759,6 +762,8 @@ struct kvm_clock_data {
   #define KVM_CREATE_SPAPR_TCE_IOW(KVMIO,  0xa8, struct 
  kvm_create_spapr_tce)
   /* Available with KVM_CAP_RMA */
   #define KVM_ALLOCATE_RMA_IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
  +/* Available with KVM_CAP_SET_LINT1 for x86 */
  +#define KVM_SET_LINT1   _IO(KVMIO,   0xaa)
   
 
  
  LINT1 may have been programmed as a level -triggered interrupt instead
  of edge triggered (NMI or interrupt).  We can use the ioctl argument for
  the level (and pressing the NMI button needs to pulse the level to 1 and
  back to 0).
  

 Hi, Avi,

 How to handle level=0 in the kernel?
 Or just ignore it?

It needs to be handled according to the delivery mode, polarity, and
trigger mode bits in the LVT.

For example, a Fixed delivery mode with polarity 1 and level trigger
mode will post the interrupt as long as it is in level 0 and not masked
by the ISR.  __apic_accept_irq() should handle this.

-- 
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [PATCH] fix memory leak in aio_write_f

2011-10-17 Thread Kevin Wolf

Am 28.09.2011 08:57, schrieb a...@redhat.com:
 From: Alex Jia a...@redhat.com
 
 Haven't released memory of 'ctx' before return.
 
 Signed-off-by: Alex Jia a...@redhat.com

Thanks, applied to the block branch.

Kevin

Re: [Qemu-devel] [PATCH v2 0/2] spice migration interface v2 (RHBZ 737921)

2011-10-17 Thread Gerd Hoffmann


On 10/17/11 10:03, Yonit Halperin wrote:

Same as the previous series with a small fix to
allow compliation without Spice disabled.


Replaced patches.

Thanks,
  Gerd

[Qemu-devel] [PATCH] change free() to g_free() to pair with g_malloc() series.

2011-10-17 Thread Ray Wang

Signed-off-by: Ray Wang rayw...@linux.vnet.ibm.com
---
 block/dmg.c |   14 +++---
 target-arm/helper.c |2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/dmg.c b/block/dmg.c
index 64c3cce..661f31b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -284,14 +284,14 @@ static void dmg_close(BlockDriverState *bs)
 {
 BDRVDMGState *s = bs-opaque;
 if(s-n_chunks0) {
-   free(s-types);
-   free(s-offsets);
-   free(s-lengths);
-   free(s-sectors);
-   free(s-sectorcounts);
+   g_free(s-types);
+   g_free(s-offsets);
+   g_free(s-lengths);
+   g_free(s-sectors);
+   g_free(s-sectorcounts);
 }
-free(s-compressed_chunk);
-free(s-uncompressed_chunk);
+g_free(s-compressed_chunk);
+g_free(s-uncompressed_chunk);
 inflateEnd(s-zstream);
 }
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index e2428eb..2b17dc9 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -471,7 +471,7 @@ static uint32_t cpu_arm_find_by_name(const char *name)
 
 void cpu_arm_close(CPUARMState *env)
 {
-free(env);
+g_free(env);
 }
 
 uint32_t cpsr_read(CPUARMState *env)
-- 
1.7.4.1

[Qemu-devel] [RFC][PATCH 30/45] pci-assign: Rename assign_irq to assign_intx

2011-10-17 Thread Jan Kiszka

The previous name may incorrectly suggest that this function assigns all
types of IRQs though it's only dealing with legacy interrupts.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |   14 +++---
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 799b816..4e4349b 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -807,7 +807,7 @@ static int assign_device(AssignedDevice *dev)
 return r;
 }
 
-static int assign_irq(AssignedDevice *dev)
+static int assign_intx(AssignedDevice *dev)
 {
 struct kvm_assigned_irq assigned_irq_data;
 int irq, r = 0;
@@ -829,7 +829,7 @@ static int assign_irq(AssignedDevice *dev)
 assigned_irq_data.flags = dev-irq_requested_type;
 r = kvm_deassign_irq(kvm_state, assigned_irq_data);
 if (r) {
-perror(assign_irq: deassign);
+perror(assign_intx: deassign);
 }
 dev-irq_requested_type = 0;
 }
@@ -898,7 +898,7 @@ void assigned_dev_update_irqs(void)
 while (dev) {
 next = QLIST_NEXT(dev, next);
 if (dev-irq_requested_type  KVM_DEV_IRQ_HOST_INTX) {
-r = assign_irq(dev);
+r = assign_intx(dev);
 if (r  0) {
 qdev_unplug(dev-dev.qdev);
 }
@@ -967,7 +967,7 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
 assigned_dev-girq = -1;
 assigned_dev-irq_requested_type = assigned_irq_data.flags;
 } else {
-assign_irq(assigned_dev);
+assign_intx(assigned_dev);
 }
 }
 
@@ -1102,7 +1102,7 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
 assigned_dev-girq = -1;
 assigned_dev-irq_requested_type = assigned_irq_data.flags;
 } else {
-assign_irq(assigned_dev);
+assign_intx(assigned_dev);
 }
 }
 
@@ -1645,8 +1645,8 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
 if (r  0)
 goto out;
 
-/* assign irq for the device */
-r = assign_irq(dev);
+/* assign legacy INTx to the device */
+r = assign_intx(dev);
 if (r  0)
 goto assigned_out;
 
-- 
1.7.3.4

Re: [Qemu-devel] [PATCHv3] ps2: migrate ledstate

2011-10-17 Thread Christophe Fergeau

On Mon, Oct 17, 2011 at 11:25:42AM +0200, Gerd Hoffmann wrote:
 
   static const VMStateDescription vmstate_ps2_common = {
   .name = PS2 Common State,
 -.version_id = 3,
 +.version_id = 4,
   .minimum_version_id = 2,
   .minimum_version_id_old = 2,
   .fields  = (VMStateField []) {
 
 version_id in vmstate_ps2_keyboard must be updated too.

Yeah, I somehow updated the field in the wrong struct, /me blushes and
hides. I don't think this struct version needs to be updated.

 The version update in vmstate_ps2_common might not be needed, IIRC
 the versions for stuff referenced via VMSTATE_STRUCT() isn't used
 anyway, Juan?

Ah, ok, I hoped it would help to handle migration between versions with and
without this field, I guess I was too optimistic :)

Thanks,

Christophe


pgpMro5SS9BOn.pgp
Description: PGP signature

Re: [Qemu-devel] [PATCH v8 1/4] block: add the block queue support

2011-10-17 Thread Kevin Wolf

Am 26.09.2011 10:01, schrieb Zhi Yong Wu:
 On Fri, Sep 23, 2011 at 11:32 PM, Kevin Wolf kw...@redhat.com wrote:
 Am 08.09.2011 12:11, schrieb Zhi Yong Wu:
 Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com
 ---
  Makefile.objs |2 +-
  block/blk-queue.c |  201 
 +
  block/blk-queue.h |   59 
  block_int.h   |   27 +++
  4 files changed, 288 insertions(+), 1 deletions(-)
  create mode 100644 block/blk-queue.c
  create mode 100644 block/blk-queue.h

 diff --git a/Makefile.objs b/Makefile.objs
 index 26b885b..5dcf456 100644
 --- a/Makefile.objs
 +++ b/Makefile.objs
 @@ -33,7 +33,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
 dmg.o bochs.o vpc.o vv
  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o 
 qcow2-snapshot.o qcow2-cache.o
  block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o 
 qed-cluster.o
  block-nested-y += qed-check.o
 -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
 +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
 blk-queue.o
  block-nested-$(CONFIG_WIN32) += raw-win32.o
  block-nested-$(CONFIG_POSIX) += raw-posix.o
  block-nested-$(CONFIG_CURL) += curl.o
 diff --git a/block/blk-queue.c b/block/blk-queue.c
 new file mode 100644
 index 000..adef497
 --- /dev/null
 +++ b/block/blk-queue.c
 @@ -0,0 +1,201 @@
 +/*
 + * QEMU System Emulator queue definition for block layer
 + *
 + * Copyright (c) IBM, Corp. 2011
 + *
 + * Authors:
 + *  Zhi Yong Wu  wu...@linux.vnet.ibm.com
 + *  Stefan Hajnoczi stefa...@linux.vnet.ibm.com
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a 
 copy
 + * of this software and associated documentation files (the Software), 
 to deal
 + * in the Software without restriction, including without limitation the 
 rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or 
 sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included 
 in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
 OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 
 IN
 + * THE SOFTWARE.
 + */
 +
 +#include block_int.h
 +#include block/blk-queue.h
 +#include qemu-common.h
 +
 +/* The APIs for block request queue on qemu block layer.
 + */
 +
 +struct BlockQueueAIOCB {
 +BlockDriverAIOCB common;
 +QTAILQ_ENTRY(BlockQueueAIOCB) entry;
 +BlockRequestHandler *handler;
 +BlockDriverAIOCB *real_acb;
 +
 +int64_t sector_num;
 +QEMUIOVector *qiov;
 +int nb_sectors;
 +};

 The idea is that each request is first queued on the QTAILQ, and at some
 point it's removed from the queue and gets a real_acb. But it never has
 both at the same time. Correct?
 NO. if block I/O throttling is enabled and I/O rate at runtime exceed
 this limits, this request will be enqueued.
 It represents the whole lifecycle of one enqueued request.

What are the conditions under which the request will still be enqueued,
but has a real_acb at the same time?

 +
 +typedef struct BlockQueueAIOCB BlockQueueAIOCB;
 +
 +struct BlockQueue {
 +QTAILQ_HEAD(requests, BlockQueueAIOCB) requests;
 +bool req_failed;
 +bool flushing;
 +};

 I find req_failed pretty confusing. Needs documentation at least, but
 most probably also a better name.
 OK. request_has_failed?

No, that doesn't describe what it's really doing.

You set req_failed = true by default and then on some obscure condition
clear it or not. It's tracking something, but I'm not sure what meaning
it has during the whole process.

 +
 +static void qemu_block_queue_dequeue(BlockQueue *queue,
 + BlockQueueAIOCB *request)
 +{
 +BlockQueueAIOCB *req;
 +
 +assert(queue);
 +while (!QTAILQ_EMPTY(queue-requests)) {
 +req = QTAILQ_FIRST(queue-requests);
 +if (req == request) {
 +QTAILQ_REMOVE(queue-requests, req, entry);
 +break;
 +}
 +}
 +}

 Is it just me or is this an endless loop if the request isn't the first
 element in the list?
 queue-requests is only used to store requests which exceed the limits.
 Why is the request not the first evlement?

Why do you have a loop if it's always the first element?

 +void qemu_del_block_queue(BlockQueue *queue)
 +{
 +BlockQueueAIOCB *request, *next;
 +
 +QTAILQ_FOREACH_SAFE(request, queue-requests,

[Qemu-devel] [RFC][PATCH 06/45] msix: Prevent bogus mask updates on MMIO accesses

2011-10-17 Thread Jan Kiszka

Only accesses to the MSI-X table must trigger a call to
msix_handle_mask_update or a notifier invocation.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msix.c |   16 ++--
 1 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 2c4de21..33cb716 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -264,18 +264,22 @@ static void msix_mmio_write(void *opaque, 
target_phys_addr_t addr,
 {
 PCIDevice *dev = opaque;
 unsigned int offset = addr  (MSIX_PAGE_SIZE - 1)  ~0x3;
-int vector = offset / PCI_MSIX_ENTRY_SIZE;
+unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE;
 int was_masked = msix_is_masked(dev, vector);
 pci_set_long(dev-msix_table_page + offset, val);
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
 kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
 }
-if (was_masked != msix_is_masked(dev, vector)  dev-msix_mask_notifier) {
-int r = dev-msix_mask_notifier(dev, vector,
-   msix_is_masked(dev, vector));
-assert(r = 0);
+
+if (vector  dev-msix_entries_nr) {
+if (was_masked != msix_is_masked(dev, vector) 
+dev-msix_mask_notifier) {
+int r = dev-msix_mask_notifier(dev, vector,
+msix_is_masked(dev, vector));
+assert(r = 0);
+}
+msix_handle_mask_update(dev, vector);
 }
-msix_handle_mask_update(dev, vector);
 }
 
 static const MemoryRegionOps msix_mmio_ops = {
-- 
1.7.3.4

Re: [Qemu-devel] [PATCH v8 2/4] block: add the command line support

2011-10-17 Thread Kevin Wolf

Am 26.09.2011 08:15, schrieb Zhi Yong Wu:
 On Fri, Sep 23, 2011 at 11:54 PM, Kevin Wolf kw...@redhat.com wrote:
 +}
 +
 +static void bdrv_block_timer(void *opaque)
 +{
 +BlockDriverState *bs = opaque;
 +BlockQueue *queue= bs-block_queue;
 +
 +qemu_block_queue_flush(queue);

 Hm, didn't really notice it while reading patch 1, but
 qemu_block_queue_flush() is misleading. It's really something like
 Why do you say this is misleading?
 qemu_block_queue_submit().
 Right. It will resubmit all enqueued I/O requests.

For me, flush sounds as if it waits for completion of all requests.

Kevin

[Qemu-devel] [RFC][PATCH 29/45] pci-assign: Drop kvm_assigned_irq::host_irq initialization

2011-10-17 Thread Jan Kiszka

real_device.irq is never set explicitly, thus remains 0. So we can
simply drop this line as assigned_irq_data is zero-initialized anyway.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 07e9f5a..799b816 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -825,7 +825,6 @@ static int assign_irq(AssignedDevice *dev)
 memset(assigned_irq_data, 0, sizeof(assigned_irq_data));
 assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(dev);
 assigned_irq_data.guest_irq = irq;
-assigned_irq_data.host_irq = dev-real_device.irq;
 if (dev-irq_requested_type) {
 assigned_irq_data.flags = dev-irq_requested_type;
 r = kvm_deassign_irq(kvm_state, assigned_irq_data);
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 42/45] msix: Introduce msix_init_simple

2011-10-17 Thread Jan Kiszka

Devices models are usually not interested in specifying MSI-X
configuration details beyond the number of vectors to provide and the
BAR number to use. Layout of an exclusively used BAR and its
registration can also be handled centrally.

This is the purpose of msix_init_simple. It provides handy services to
the existing users. Future users like device assignment may require more
detailed setup specification. For them we will (re-)introduce msix_init
with the full list of configuration option (in contrast to the current
code).

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/ivshmem.c|6 +-
 hw/msix.c   |   35 ++-
 hw/msix.h   |7 +++
 hw/virtio-pci.c |   15 +--
 hw/virtio-pci.h |1 -
 5 files changed, 23 insertions(+), 41 deletions(-)

diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index a402c98..d9dbd18 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -65,7 +65,6 @@ typedef struct IVShmemState {
  */
 MemoryRegion bar;
 MemoryRegion ivshmem;
-MemoryRegion msix_bar;
 uint64_t ivshmem_size; /* size of shared memory region */
 int shm_fd; /* shared memory file descriptor */
 
@@ -539,10 +538,7 @@ static void ivshmem_setup_msi(IVShmemState *s)
 {
 /* allocate the MSI-X vectors */
 
-memory_region_init(s-msix_bar, ivshmem-msix, 4096);
-if (!msix_init(s-dev, s-vectors, s-msix_bar, 1, 0)) {
-pci_register_bar(s-dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
- s-msix_bar);
+if (!msix_init_simple(s-dev, s-vectors, 1)) {
 IVSHMEM_DPRINTF(msix initialized (%d vectors)\n, s-vectors);
 } else {
 IVSHMEM_DPRINTF(msix initialization failed\n);
diff --git a/hw/msix.c b/hw/msix.c
index bccd8b1..258b9c1 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -244,17 +244,6 @@ static const MemoryRegionOps msix_mmio_ops = {
 },
 };
 
-static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
-{
-uint8_t *config = d-config + d-msix_cap;
-uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
-uint32_t offset = table  ~(MSIX_PAGE_SIZE - 1);
-/* TODO: for assigned devices, we'll want to make it possible to map
- * pending bits separately in case they are in a separate bar. */
-
-memory_region_add_subregion(bar, offset, d-msix_mmio);
-}
-
 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
 {
 int vector;
@@ -272,11 +261,9 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned 
nentries)
 }
 }
 
-/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
- * modified, it should be retrieved with msix_bar_size. */
-int msix_init(struct PCIDevice *dev, unsigned short nentries,
-  MemoryRegion *bar,
-  unsigned bar_nr, unsigned bar_size)
+/* Initialize the MSI-X structures in a single dedicated BAR
+ * and register it. */
+int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr)
 {
 int ret;
 
@@ -296,14 +283,16 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
   msix, MSIX_PAGE_SIZE);
 
 dev-msix_entries_nr = nentries;
-ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+ret = msix_add_config(dev, nentries, bar_nr, 0);
 if (ret)
 goto err_config;
 
 dev-msix_cache = g_malloc0(nentries * sizeof *dev-msix_cache);
 
 dev-cap_present |= QEMU_PCI_CAP_MSIX;
-msix_mmio_setup(dev, bar);
+
+pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
+ dev-msix_mmio);
 return 0;
 
 err_config:
@@ -315,10 +304,10 @@ err_config:
 }
 
 /* Clean up resources for the device. */
-int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
+void msix_uninit(PCIDevice *dev, MemoryRegion *bar)
 {
 if (!msix_present(dev)) {
-return 0;
+return;
 }
 pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
 dev-msix_cap = 0;
@@ -332,7 +321,11 @@ int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
 g_free(dev-msix_cache);
 
 dev-cap_present = ~QEMU_PCI_CAP_MSIX;
-return 0;
+}
+
+void msix_uninit_simple(PCIDevice *dev)
+{
+msix_uninit(dev, dev-msix_mmio);
 }
 
 void msix_save(PCIDevice *dev, QEMUFile *f)
diff --git a/hw/msix.h b/hw/msix.h
index dfc6087..56e7ba5 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -4,14 +4,13 @@
 #include qemu-common.h
 #include pci.h
 
-int msix_init(PCIDevice *pdev, unsigned short nentries,
-  MemoryRegion *bar,
-  unsigned bar_nr, unsigned bar_size);
+int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr);
 
 void msix_write_config(PCIDevice *pci_dev, uint32_t address,
uint32_t old_val, int len);
 
-int msix_uninit(PCIDevice *d, MemoryRegion *bar);
+void msix_uninit(PCIDevice *d, MemoryRegion *bar);
+void msix_uninit_simple(PCIDevice *d);
 
 void msix_save(PCIDevice *dev, QEMUFile *f);
 void msix_load(PCIDevice *dev, QEMUFile *f);
diff --git

Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation

2011-10-17 Thread Jan Kiszka

On 2011-10-17 11:54, Avi Kivity wrote:
 On 10/17/2011 11:17 AM, Lai Jiangshan wrote:
 On 10/16/2011 05:39 PM, Avi Kivity wrote:
 On 10/14/2011 11:03 AM, Lai Jiangshan wrote:
 Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
 button event happens. This doesn't properly emulate real hardware on
 which NMI button event triggers LINT1. Because of this, NMI is sent to
 the processor even when LINT1 is masked in LVT. For example, this
 causes the problem that kdump initiated by NMI sometimes doesn't work
 on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.

 With this patch, we introduce introduce KVM_SET_LINT1,
 and we can use KVM_SET_LINT1 to correctly emulate NMI button
 without change the old KVM_NMI behavior.
  
 @@ -759,6 +762,8 @@ struct kvm_clock_data {
  #define KVM_CREATE_SPAPR_TCE_IOW(KVMIO,  0xa8, struct 
 kvm_create_spapr_tce)
  /* Available with KVM_CAP_RMA */
  #define KVM_ALLOCATE_RMA_IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
 +/* Available with KVM_CAP_SET_LINT1 for x86 */
 +#define KVM_SET_LINT1   _IO(KVMIO,   0xaa)
  


 LINT1 may have been programmed as a level -triggered interrupt instead
 of edge triggered (NMI or interrupt).  We can use the ioctl argument for
 the level (and pressing the NMI button needs to pulse the level to 1 and
 back to 0).


 Hi, Avi,

 How to handle level=0 in the kernel?
 Or just ignore it?
 
 It needs to be handled according to the delivery mode, polarity, and
 trigger mode bits in the LVT.
 
 For example, a Fixed delivery mode with polarity 1 and level trigger
 mode will post the interrupt as long as it is in level 0 and not masked
 by the ISR.  __apic_accept_irq() should handle this.

But I think it's not yet fully prepared for this (level is only
considered for APIC_DM_INIT e.g.).

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

[Qemu-devel] [RFC][PATCH 26/45] qemu-kvm: Use g_realloc for irq_routes extension

2011-10-17 Thread Jan Kiszka

Allows to drop checking for out-of-memory.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 qemu-kvm.c |7 +--
 1 files changed, 1 insertions(+), 6 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 6bdd7b5..eb8f176 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -258,7 +258,6 @@ int kvm_add_routing_entry(struct kvm_irq_routing_entry 
*entry,
 {
 #ifdef KVM_CAP_IRQ_ROUTING
 KVMState *s = kvm_state;
-struct kvm_irq_routing *z;
 struct kvm_irq_routing_entry *new;
 int n, size;
 
@@ -269,12 +268,8 @@ int kvm_add_routing_entry(struct kvm_irq_routing_entry 
*entry,
 }
 size = sizeof(struct kvm_irq_routing);
 size += n * sizeof(*new);
-z = realloc(s-irq_routes, size);
-if (!z) {
-return -ENOMEM;
-}
+s-irq_routes = g_realloc(s-irq_routes, size);
 s-nr_allocated_irq_routes = n;
-s-irq_routes = z;
 
 s-msi_cache = g_realloc(s-msi_cache, sizeof(*s-msi_cache) * n);
 }
-- 
1.7.3.4

Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm

2011-10-17 Thread Kevin Wolf

Am 26.09.2011 09:24, schrieb Zhi Yong Wu:
 On Sat, Sep 24, 2011 at 12:19 AM, Kevin Wolf kw...@redhat.com wrote:
 Am 08.09.2011 12:11, schrieb Zhi Yong Wu:
 Note:
  1.) When bps/iops limits are specified to a small value such as 511 
 bytes/s, this VM will hang up. We are considering how to handle this 
 senario.
  2.) When dd command is issued in guest, if its option bs is set to a 
 large value such as bs=1024K, the result speed will slightly bigger than 
 the limits.

 For these problems, if you have nice thought, pls let us know.:)

 Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com
 ---
  block.c |  259 
 ---
  block.h |1 -
  2 files changed, 248 insertions(+), 12 deletions(-)

 One general comment: What about synchronous and/or coroutine I/O
 operations? Do you think they are just not important enough to consider
 here or were they forgotten?
 For sync ops, we assume that it will be converse into async mode at
 some point of future, right?
 For coroutine I/O, it is introduced in image driver layer, and behind
 bdrv_aio_readv/writev. I think that we need not consider them, right?

Meanwhile the block layer has been changed to handle all requests in
terms of coroutines. So you would best move your intercepting code into
the coroutine functions.

 Also, do I understand correctly that you're always submitting the whole
 Right, when the block timer fire, it will flush whole request queue.
 queue at once? Does this effectively enforce the limit all the time or
 will it lead to some peaks and then no requests at all for a while until
 In fact, it only try to submit those enqueued request one by one. If
 fail to pass the limit, this request will be enqueued again.

Right, I missed this. Makes sense.

 the average is right again?
 Yeah, it is possible. Do you better idea?

 Maybe some documentation on how it all works from a high level
 perspective would be helpful.

 +/* throttling disk read I/O */
 +if (bs-io_limits_enabled) {
 +if (bdrv_exceed_io_limits(bs, nb_sectors, false, wait_time)) {
 +ret = qemu_block_queue_enqueue(bs-block_queue, bs, 
 bdrv_aio_readv,
 +   sector_num, qiov, nb_sectors, cb, opaque);
 +printf(wait_time=%ld\n, wait_time);
 +if (wait_time != -1) {
 +printf(reset block timer\n);
 +qemu_mod_timer(bs-block_timer,
 +   wait_time + qemu_get_clock_ns(vm_clock));
 +}
 +
 +if (ret) {
 +printf(ori ret is not null\n);
 +} else {
 +printf(ori ret is null\n);
 +}
 +
 +return ret;
 +}
 +}

 -return drv-bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
 +ret =  drv-bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
 cb, opaque);
 +if (ret) {
 +if (bs-io_limits_enabled) {
 +bs-io_disps.bytes[BLOCK_IO_LIMIT_READ] +=
 +  (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
 +bs-io_disps.ios[BLOCK_IO_LIMIT_READ]++;
 +}

 I wonder if you can't reuse bs-nr_bytes/nr_ops instead of introducing a
 second counting mechanism. Would have the advantage that numbers are
 NO, our counting variables will be reset to ZERO if current slice
 time(0.1ms) is used up.

Instead of setting the counter to zero you could remember the base value
and calculate the difference when you need it. The advantage is that we
can share infrastructure instead of introducing several subtly different
ways of I/O accounting.

 actually consistent (your metric counts slightly differently than the
 existing info blockstats one).
 Yeah, i notice this, and don't think there's wrong with it. and you?

It's not really user friendly if a number that is called the same means
this in one place and in another place that.

Kevin

[Qemu-devel] [RFC][PATCH 14/45] qemu-kvm: Drop useless kvm_clear_gsi_routes

2011-10-17 Thread Jan Kiszka

There are no routes to clear at this point, we are just creating the VM.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 qemu-kvm-x86.c |1 -
 qemu-kvm.c |   10 --
 qemu-kvm.h |9 -
 3 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index a7981b1..bab4307 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -167,7 +167,6 @@ int kvm_arch_init_irq_routing(void)
 int i, r;
 
 if (kvm_has_gsi_routing()) {
-kvm_clear_gsi_routes();
 for (i = 0; i  8; ++i) {
 if (i == 2) {
 continue;
diff --git a/qemu-kvm.c b/qemu-kvm.c
index f5b129a..70481de 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -252,16 +252,6 @@ int kvm_has_gsi_routing(void)
 return r;
 }
 
-int kvm_clear_gsi_routes(void)
-{
-#ifdef KVM_CAP_IRQ_ROUTING
-kvm_state-irq_routes-nr = 0;
-return 0;
-#else
-return -EINVAL;
-#endif
-}
-
 int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 2bd5602..8032388 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -174,15 +174,6 @@ int kvm_deassign_pci_device(KVMState *s,
 struct kvm_assigned_pci_dev *assigned_dev);
 
 /*!
- * \brief Clears the temporary irq routing table
- *
- * Clears the temporary irq routing table.  Nothing is committed to the
- * running VM.
- *
- */
-int kvm_clear_gsi_routes(void);
-
-/*!
  * \brief Adds an irq route to the temporary irq routing table
  *
  * Adds an irq route to the temporary irq routing table.  Nothing is
-- 
1.7.3.4

[Qemu-devel] [PATCH] qxl: create slots on post_load in any state (fix RHBZ 740547)

2011-10-17 Thread Alon Levy

If we migrate when the device is not in a native state the guest
still believes the slots are created, and will cause operations
that reference the slots, causing a panic: virtual address out of range
on the first of them. Easy to see by migrating in vga mode (with
a driver loaded, for instance windows cmd window in full screen mode)
and then exiting vga mode back to native mode will cause said panic.

Fixed by doing the slot recreation unconditionally at post_load

Signed-off-by: Alon Levy al...@redhat.com
---
 hw/qxl.c |   14 --
 1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/qxl.c b/hw/qxl.c
index 03848ed..4e9f39f 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -1684,6 +1684,14 @@ static int qxl_post_load(void *opaque, int version)
 qxl_mode_to_string(d-mode));
 newmode = d-mode;
 d-mode = QXL_MODE_UNDEFINED;
+for (i = 0; i  NUM_MEMSLOTS; i++) {
+if (!d-guest_slots[i].active) {
+continue;
+}
+dprint(d, 1, %s: restoring guest slot %d delta %PRIu64\n,
+   __func__, i, d-guest_slots[i].delta);
+qxl_add_memslot(d, i, d-guest_slots[i].delta, QXL_SYNC);
+}
 switch (newmode) {
 case QXL_MODE_UNDEFINED:
 break;
@@ -1691,12 +1699,6 @@ static int qxl_post_load(void *opaque, int version)
 qxl_enter_vga_mode(d);
 break;
 case QXL_MODE_NATIVE:
-for (i = 0; i  NUM_MEMSLOTS; i++) {
-if (!d-guest_slots[i].active) {
-continue;
-}
-qxl_add_memslot(d, i, 0, QXL_SYNC);
-}
 qxl_create_guest_primary(d, 1, QXL_SYNC);
 
 /* replay surface-create and cursor-set commands */
-- 
1.7.6.4

[Qemu-devel] [RFC][PATCH 31/45] qemu-kvm: Refactor kvm_deassign_irq to kvm_device_irq_deassign

2011-10-17 Thread Jan Kiszka

Don't pass kvm_assigned_irq struct, rather use the actually required
fields in the interface.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |   42 --
 qemu-kvm.c |   15 ++-
 qemu-kvm.h |   11 +--
 3 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 4e4349b..e0b9cfe 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -810,7 +810,8 @@ static int assign_device(AssignedDevice *dev)
 static int assign_intx(AssignedDevice *dev)
 {
 struct kvm_assigned_irq assigned_irq_data;
-int irq, r = 0;
+uint32_t dev_id;
+int irq, r;
 
 /* Interrupt PIN 0 means don't use INTx */
 if (assigned_dev_pci_read_byte(dev-dev, PCI_INTERRUPT_PIN) == 0)
@@ -819,21 +820,24 @@ static int assign_intx(AssignedDevice *dev)
 irq = pci_map_irq(dev-dev, dev-intpin);
 irq = piix_get_irq(irq);
 
-if (dev-girq == irq)
-return r;
+if (dev-girq == irq) {
+return 0;
+}
+
+dev_id = calc_assigned_dev_id(dev);
 
-memset(assigned_irq_data, 0, sizeof(assigned_irq_data));
-assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(dev);
-assigned_irq_data.guest_irq = irq;
 if (dev-irq_requested_type) {
-assigned_irq_data.flags = dev-irq_requested_type;
-r = kvm_deassign_irq(kvm_state, assigned_irq_data);
+r = kvm_device_irq_deassign(kvm_state, dev_id,
+dev-irq_requested_type);
 if (r) {
 perror(assign_intx: deassign);
 }
 dev-irq_requested_type = 0;
 }
 
+memset(assigned_irq_data, 0, sizeof(assigned_irq_data));
+assigned_irq_data.assigned_dev_id = dev_id;
+assigned_irq_data.guest_irq = irq;
 assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX;
 if (dev-features  ASSIGNED_DEVICE_PREFER_MSI_MASK 
 dev-cap.available  ASSIGNED_DEVICE_CAP_MSI)
@@ -913,20 +917,19 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
 AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
 uint8_t ctrl_byte = pci_get_byte(pci_dev-config + pci_dev-msi_cap +
  PCI_MSI_FLAGS);
+uint32_t dev_id;
 int r;
 
-memset(assigned_irq_data, 0, sizeof assigned_irq_data);
-assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(assigned_dev);
+dev_id = calc_assigned_dev_id(assigned_dev);
 
 /* Some guests gratuitously disable MSI even if they're not using it,
  * try to catch this by only deassigning irqs if the guest is using
  * MSI or intends to start. */
 if ((assigned_dev-irq_requested_type  KVM_DEV_IRQ_GUEST_MSI) ||
 (ctrl_byte  PCI_MSI_FLAGS_ENABLE)) {
-
-assigned_irq_data.flags = assigned_dev-irq_requested_type;
 free_dev_irq_entries(assigned_dev);
-r = kvm_deassign_irq(kvm_state, assigned_irq_data);
+r = kvm_device_irq_deassign(kvm_state, dev_id,
+assigned_dev-irq_requested_type);
 /* -ENXIO means no assigned irq */
 if (r  r != -ENXIO)
 perror(assigned_dev_update_msi: deassign irq);
@@ -958,6 +961,8 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
 }
assigned_dev-irq_entries_nr = 1;
 
+memset(assigned_irq_data, 0, sizeof assigned_irq_data);
+assigned_irq_data.assigned_dev_id = dev_id;
 assigned_irq_data.guest_irq = assigned_dev-entry-gsi;
assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI;
 if (kvm_assign_irq(kvm_state, assigned_irq_data)  0) {
@@ -1066,20 +1071,19 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
 AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
 uint16_t ctrl_word = pci_get_word(pci_dev-config + pci_dev-msix_cap +
   PCI_MSIX_FLAGS);
+uint32_t dev_id;
 int r;
 
-memset(assigned_irq_data, 0, sizeof assigned_irq_data);
-assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(assigned_dev);
+dev_id = calc_assigned_dev_id(assigned_dev);
 
 /* Some guests gratuitously disable MSIX even if they're not using it,
  * try to catch this by only deassigning irqs if the guest is using
  * MSIX or intends to start. */
 if ((assigned_dev-irq_requested_type  KVM_DEV_IRQ_GUEST_MSIX) ||
 (ctrl_word  PCI_MSIX_FLAGS_ENABLE)) {
-
-assigned_irq_data.flags = assigned_dev-irq_requested_type;
 free_dev_irq_entries(assigned_dev);
-r = kvm_deassign_irq(kvm_state, assigned_irq_data);
+r = kvm_device_irq_deassign(kvm_state, dev_id,
+assigned_dev-irq_requested_type);
 /* -ENXIO means no assigned irq */
 if (r  r != -ENXIO)
 perror(assigned_dev_update_msix: deassign irq);
@@ -1088,6 +1092,8 @@

[Qemu-devel] [RFC][PATCH 13/45] hpet: Use msi_deliver

2011-10-17 Thread Jan Kiszka

Avoid the slow-path MSI delivery via stl_phys by switching to
msi_deliver. This also allows to prepare these rarely changing messages
in advance.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/hpet.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index d8e6b8e..c6d6e35 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -31,6 +31,7 @@
 #include hpet_emul.h
 #include sysbus.h
 #include mc146818rtc.h
+#include msi.h
 
 //#define HPET_DEBUG
 #ifdef HPET_DEBUG
@@ -55,6 +56,8 @@ typedef struct HPETTimer {  /* timers */
 uint8_t wrap_flag;  /* timer pop will indicate wrap for one-shot 32-bit
  * mode. Next pop will be actual timer expiration.
  */
+MSIMessage msi_msg;
+MSIRoutingCache msi_cache;
 } HPETTimer;
 
 typedef struct HPETState {
@@ -192,7 +195,7 @@ static void update_irq(struct HPETTimer *timer, int set)
 qemu_irq_lower(s-irqs[route]);
 }
 } else if (timer_fsb_route(timer)) {
-stl_le_phys(timer-fsb  32, timer-fsb  0x);
+msi_deliver(timer-msi_msg, timer-msi_cache);
 } else if (timer-config  HPET_TN_TYPE_LEVEL) {
 s-isr |= mask;
 qemu_irq_raise(s-irqs[route]);
@@ -533,9 +536,11 @@ static void hpet_ram_writel(void *opaque, 
target_phys_addr_t addr,
 break;
 case HPET_TN_ROUTE:
 timer-fsb = (timer-fsb  0xULL) | new_val;
+timer-msi_msg.data = new_val;
 break;
 case HPET_TN_ROUTE + 4:
 timer-fsb = (new_val  32) | (timer-fsb  0x);
+timer-msi_msg.address = new_val;
 break;
 default:
 DPRINTF(qemu: invalid hpet_ram_writel\n);
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 27/45] qemu-kvm: Lazily update MSI caches

2011-10-17 Thread Jan Kiszka

Instead of registering every possible MSI message that is prepared in
some device's config space, this commit only registers those messages
that are actually sent.

Every message that runs through the delivery hook is first checked
against its cached data. If there is a mismatch, then the registration
is created or updated, if it matches, delivery is performed directly.

To avoid exhausting limited KVM IRQ routes, devices are expected to
flush their MSI caches whenever the content is no longer used or valid.
If we run out of routes nevertheless, we flush all caches that were
created dynamically, ie. via the MSI delivery hook. However, we keep all
those cached routes intact that are static because they are associated
with external sources (irqfds).

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/apic.c  |4 +--
 hw/msi.c   |   93 ++--
 hw/msi.h   |2 +-
 hw/msix.c  |   91 --
 hw/pci.c   |1 -
 hw/pci.h   |3 --
 kvm-stub.c |   13 +
 kvm.h  |6 ++--
 qemu-kvm.c |   69 ++-
 9 files changed, 75 insertions(+), 207 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index cb6662c..2cafc49 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -807,9 +807,7 @@ static uint32_t apic_mem_readl(void *opaque, 
target_phys_addr_t addr)
 void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache)
 {
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
-if (kvm_set_irq(cache-kvm_gsi, 1, NULL)  0) {
-abort();
-}
+kvm_msi_deliver(msg, cache);
 } else {
 uint8_t dest =
 (msg-address  MSI_ADDR_DEST_ID_MASK)  MSI_ADDR_DEST_ID_SHIFT;
diff --git a/hw/msi.c b/hw/msi.c
index 1328903..23d79dd 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -140,71 +140,18 @@ static void msi_message_from_vector(PCIDevice *dev, 
uint16_t msi_flags,
 }
 }
 
-static void kvm_msi_update(PCIDevice *dev)
-{
-uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
-unsigned int max_vectors = 1 
-((flags  PCI_MSI_FLAGS_QMASK)  (ffs(PCI_MSI_FLAGS_QMASK) - 1));
-unsigned int nr_vectors = msi_nr_vectors(flags);
-MSIRoutingCache *cache;
-bool changed = false;
-unsigned int vector;
-MSIMessage msg;
-int r;
-
-for (vector = 0; vector  max_vectors; vector++) {
-cache = dev-msi_cache[vector];
-
-if (vector = nr_vectors) {
-if (vector  dev-msi_entries_nr) {
-kvm_msi_message_del(cache);
-changed = true;
-}
-} else if (vector = dev-msi_entries_nr) {
-msi_message_from_vector(dev, flags, vector, msg);
-r = kvm_msi_message_add(msg, cache);
-if (r) {
-fprintf(stderr, %s: kvm_msi_add failed: %s\n, __func__,
-strerror(-r));
-exit(1);
-}
-changed = true;
-} else {
-msi_message_from_vector(dev, flags, vector, msg);
-r = kvm_msi_message_update(msg, cache);
-if (r  0) {
-fprintf(stderr, %s: kvm_update_msi failed: %s\n,
-__func__, strerror(-r));
-exit(1);
-}
-if (r  0) {
-changed = true;
-}
-}
-}
-dev-msi_entries_nr = nr_vectors;
-if (changed) {
-r = kvm_commit_irq_routes();
-if (r) {
-fprintf(stderr, %s: kvm_commit_irq_routes failed: %s\n, __func__,
-strerror(-r));
-exit(1);
-}
-}
-}
-
-/* KVM specific MSI helpers */
 static void kvm_msi_free(PCIDevice *dev)
 {
-unsigned int vector;
+unsigned int vector, nr_vectors;
 
-for (vector = 0; vector  dev-msi_entries_nr; ++vector) {
-kvm_msi_message_del(dev-msi_cache[vector]);
+if (!kvm_enabled() || !kvm_irqchip_in_kernel()) {
+return;
 }
-if (dev-msi_entries_nr  0) {
-kvm_commit_irq_routes();
+nr_vectors =
+msi_nr_vectors(pci_get_word(dev-config + msi_flags_off(dev)));
+for (vector = 0; vector  nr_vectors; ++vector) {
+kvm_msi_cache_invalidate(dev-msi_cache[vector]);
 }
-dev-msi_entries_nr = 0;
 }
 
 int msi_init(struct PCIDevice *dev, uint8_t offset,
@@ -283,10 +230,7 @@ void msi_uninit(struct PCIDevice *dev)
 flags = pci_get_word(dev-config + msi_flags_off(dev));
 cap_size = msi_cap_sizeof(flags);
 
-if (kvm_enabled()  kvm_irqchip_in_kernel()) {
-kvm_msi_free(dev);
-}
-
+kvm_msi_free(dev);
 g_free(dev-msi_cache);
 
 pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
@@ -303,9 +247,6 @@ void msi_reset(PCIDevice *dev)
 if (!msi_present(dev)) {
 return;
 }
-if (kvm_enabled()  kvm_irqchip_in_kernel()) {
-kvm_msi_free(dev);
-}
 
 flags = pci_get_word(dev-config +

[Qemu-devel] [RFC][PATCH 09/45] msi: Factor out msi_message_from_vector

2011-10-17 Thread Jan Kiszka

This helper will also be used by the upcoming config notifier.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msi.c |   43 +--
 1 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/hw/msi.c b/hw/msi.c
index 2b7b6e3..3c7ebc3 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -113,6 +113,25 @@ bool msi_enabled(const PCIDevice *dev)
  PCI_MSI_FLAGS_ENABLE);
 }
 
+static void msi_message_from_vector(PCIDevice *dev, uint16_t msi_flags,
+unsigned vector, MSIMessage *msg)
+{
+bool msi64bit = msi_flags  PCI_MSI_FLAGS_64BIT;
+unsigned int nr_vectors = msi_nr_vectors(msi_flags);
+
+msg-address = pci_get_long(dev-config + msi_address_lo_off(dev));
+if (msi64bit) {
+msg-address |= (uint64_t)pci_get_long(dev-config +
+   msi_address_hi_off(dev))  32;
+}
+
+msg-data = pci_get_word(dev-config + msi_data_off(dev, msi64bit));
+if (nr_vectors  1) {
+msg-data = ~(nr_vectors - 1);
+msg-data |= vector;
+}
+}
+
 static void kvm_msi_message_from_vector(PCIDevice *dev, unsigned vector,
 KVMMsiMessage *kmm)
 {
@@ -339,11 +358,10 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
 {
 uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
 bool msi64bit = flags  PCI_MSI_FLAGS_64BIT;
-unsigned int nr_vectors = msi_nr_vectors(flags);
-uint64_t address;
-uint32_t data;
+MSIMessage msg;
+
+assert(vector  msi_nr_vectors(flags));
 
-assert(vector  nr_vectors);
 if (msi_is_masked(dev, vector)) {
 assert(flags  PCI_MSI_FLAGS_MASKBIT);
 pci_long_test_and_set_mask(
@@ -357,24 +375,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
 return;
 }
 
-if (msi64bit) {
-address = pci_get_quad(dev-config + msi_address_lo_off(dev));
-} else {
-address = pci_get_long(dev-config + msi_address_lo_off(dev));
-}
-
-/* upper bit 31:16 is zero */
-data = pci_get_word(dev-config + msi_data_off(dev, msi64bit));
-if (nr_vectors  1) {
-data = ~(nr_vectors - 1);
-data |= vector;
-}
+msi_message_from_vector(dev, flags, vector, msg);
 
 MSI_DEV_PRINTF(dev,
notify vector 0x%x
 address: 0x%PRIx64 data: 0x%PRIx32\n,
-   vector, address, data);
-stl_le_phys(address, data);
+   vector, msg.address, msg.data);
+stl_le_phys(msg.address, msg.data);
 }
 
 /* Normally called by pci_default_write_config(). */
-- 
1.7.3.4

[Qemu-devel] [PATCH v2 0/3] coroutinization of flush and discard (split out of NBD series)

2011-10-17 Thread Paolo Bonzini

This series, applying on top of block branch, enables drivers to use
coroutines for flush and discard.  I kept aio_discard after discussing
with Kevin since it should be useful not only for raw-posix-aio, but also
for the userspace iSCSI backend (and in general for backends relying on
an external library that is designed around aio).

BTW, with this patch we get for free the invariant that bdrv_aio_*
never returns a NULL acb (Stefan's patches already got to that point
for read/write, of course).

v1-v2:
add bdrv_co_flush and bdrv_co_discard entry points

Paolo Bonzini (2):
  block: unify flush implementations
  block: add bdrv_co_discard and bdrv_aio_discard support

Stefan Hajnoczi (1):
  block: drop redundant bdrv_flush implementation

 block.c   |  258 +
 block.h   |5 +
 block/blkdebug.c  |6 --
 block/blkverify.c |9 --
 block/qcow.c  |6 --
 block/qcow2.c |   19 
 block/qed.c   |6 --
 block/raw-posix.c |   18 
 block/raw.c   |   23 ++---
 block_int.h   |   10 ++-
 trace-events  |1 +
 11 files changed, 184 insertions(+), 177 deletions(-)

-- 
1.7.6

[Qemu-devel] [PATCH 2/3] block: drop redundant bdrv_flush implementation

2011-10-17 Thread Paolo Bonzini

From: Stefan Hajnoczi stefa...@linux.vnet.ibm.com

Block drivers now only need to provide either of .bdrv_co_flush,
.bdrv_aio_flush() or for legacy drivers .bdrv_flush().  Remove
the redundant .bdrv_flush() implementations.

[Paolo Bonzini: change raw driver to bdrv_co_flush]

Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block/blkdebug.c  |6 --
 block/blkverify.c |9 -
 block/qcow.c  |6 --
 block/qcow2.c |   19 ---
 block/qed.c   |6 --
 block/raw-posix.c |   18 --
 block/raw.c   |   13 +++--
 7 files changed, 3 insertions(+), 74 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index b3c5d42..9b88535 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -397,11 +397,6 @@ static void blkdebug_close(BlockDriverState *bs)
 }
 }
 
-static int blkdebug_flush(BlockDriverState *bs)
-{
-return bdrv_flush(bs-file);
-}
-
 static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
 BlockDriverCompletionFunc *cb, void *opaque)
 {
@@ -454,7 +449,6 @@ static BlockDriver bdrv_blkdebug = {
 
 .bdrv_file_open = blkdebug_open,
 .bdrv_close = blkdebug_close,
-.bdrv_flush = blkdebug_flush,
 
 .bdrv_aio_readv = blkdebug_aio_readv,
 .bdrv_aio_writev= blkdebug_aio_writev,
diff --git a/block/blkverify.c b/block/blkverify.c
index c7522b4..483f3b3 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -116,14 +116,6 @@ static void blkverify_close(BlockDriverState *bs)
 s-test_file = NULL;
 }
 
-static int blkverify_flush(BlockDriverState *bs)
-{
-BDRVBlkverifyState *s = bs-opaque;
-
-/* Only flush test file, the raw file is not important */
-return bdrv_flush(s-test_file);
-}
-
 static int64_t blkverify_getlength(BlockDriverState *bs)
 {
 BDRVBlkverifyState *s = bs-opaque;
@@ -368,7 +360,6 @@ static BlockDriver bdrv_blkverify = {
 
 .bdrv_file_open = blkverify_open,
 .bdrv_close = blkverify_close,
-.bdrv_flush = blkverify_flush,
 
 .bdrv_aio_readv = blkverify_aio_readv,
 .bdrv_aio_writev= blkverify_aio_writev,
diff --git a/block/qcow.c b/block/qcow.c
index c8bfecc..9b71116 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -781,11 +781,6 @@ static int qcow_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
 return 0;
 }
 
-static int qcow_flush(BlockDriverState *bs)
-{
-return bdrv_flush(bs-file);
-}
-
 static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
 BlockDriverCompletionFunc *cb, void *opaque)
 {
@@ -826,7 +821,6 @@ static BlockDriver bdrv_qcow = {
 .bdrv_open = qcow_open,
 .bdrv_close= qcow_close,
 .bdrv_create   = qcow_create,
-.bdrv_flush= qcow_flush,
 .bdrv_is_allocated = qcow_is_allocated,
 .bdrv_set_key  = qcow_set_key,
 .bdrv_make_empty   = qcow_make_empty,
diff --git a/block/qcow2.c b/block/qcow2.c
index 510ff68..4dc980c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1092,24 +1092,6 @@ static int qcow2_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
 return 0;
 }
 
-static int qcow2_flush(BlockDriverState *bs)
-{
-BDRVQcowState *s = bs-opaque;
-int ret;
-
-ret = qcow2_cache_flush(bs, s-l2_table_cache);
-if (ret  0) {
-return ret;
-}
-
-ret = qcow2_cache_flush(bs, s-refcount_block_cache);
-if (ret  0) {
-return ret;
-}
-
-return bdrv_flush(bs-file);
-}
-
 static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs,
  BlockDriverCompletionFunc *cb,
  void *opaque)
@@ -1242,7 +1224,6 @@ static BlockDriver bdrv_qcow2 = {
 .bdrv_open  = qcow2_open,
 .bdrv_close = qcow2_close,
 .bdrv_create= qcow2_create,
-.bdrv_flush = qcow2_flush,
 .bdrv_is_allocated  = qcow2_is_allocated,
 .bdrv_set_key   = qcow2_set_key,
 .bdrv_make_empty= qcow2_make_empty,
diff --git a/block/qed.c b/block/qed.c
index e87dc4d..2e06992 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -533,11 +533,6 @@ static void bdrv_qed_close(BlockDriverState *bs)
 qemu_vfree(s-l1_table);
 }
 
-static int bdrv_qed_flush(BlockDriverState *bs)
-{
-return bdrv_flush(bs-file);
-}
-
 static int qed_create(const char *filename, uint32_t cluster_size,
   uint64_t image_size, uint32_t table_size,
   const char *backing_file, const char *backing_fmt)
@@ -1479,7 +1474,6 @@ static BlockDriver bdrv_qed = {
 .bdrv_open= bdrv_qed_open,
 .bdrv_close   = bdrv_qed_close,
 .bdrv_create  = bdrv_qed_create,
-.bdrv_flush   = bdrv_qed_flush,
 .bdrv_is_allocated= bdrv_qed_is_allocated,
 .bdrv_make_empty  = bdrv_qed_make_empty,

[Qemu-devel] [PATCH 1/3] block: unify flush implementations

2011-10-17 Thread Paolo Bonzini

Add coroutine support for flush and apply the same emulation that
we already do for read/write.  bdrv_aio_flush is simplified to always
go through a coroutine.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block.c |  164 ++
 block.h |1 +
 block_int.h |1 +
 3 files changed, 76 insertions(+), 90 deletions(-)

diff --git a/block.c b/block.c
index 7184a0f..7b8b14d 100644
--- a/block.c
+++ b/block.c
@@ -53,17 +53,12 @@ static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState 
*bs,
 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
-BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
-BlockDriverCompletionFunc *cb, void *opaque);
 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
  int64_t sector_num, int nb_sectors,
  QEMUIOVector *iov);
 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
  int64_t sector_num, int nb_sectors,
  QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
@@ -203,9 +198,6 @@ void bdrv_register(BlockDriver *bdrv)
 }
 }
 
-if (!bdrv-bdrv_aio_flush)
-bdrv-bdrv_aio_flush = bdrv_aio_flush_em;
-
 QLIST_INSERT_HEAD(bdrv_drivers, bdrv, list);
 }
 
@@ -1027,11 +1019,6 @@ static int bdrv_check_request(BlockDriverState *bs, 
int64_t sector_num,
nb_sectors * BDRV_SECTOR_SIZE);
 }
 
-static inline bool bdrv_has_async_flush(BlockDriver *drv)
-{
-return drv-bdrv_aio_flush != bdrv_aio_flush_em;
-}
-
 typedef struct RwCo {
 BlockDriverState *bs;
 int64_t sector_num;
@@ -1759,33 +1746,6 @@ const char *bdrv_get_device_name(BlockDriverState *bs)
 return bs-device_name;
 }
 
-int bdrv_flush(BlockDriverState *bs)
-{
-if (bs-open_flags  BDRV_O_NO_FLUSH) {
-return 0;
-}
-
-if (bs-drv  bdrv_has_async_flush(bs-drv)  qemu_in_coroutine()) {
-return bdrv_co_flush_em(bs);
-}
-
-if (bs-drv  bs-drv-bdrv_flush) {
-return bs-drv-bdrv_flush(bs);
-}
-
-/*
- * Some block drivers always operate in either writethrough or unsafe mode
- * and don't support bdrv_flush therefore. Usually qemu doesn't know how
- * the server works (because the behaviour is hardcoded or depends on
- * server-side configuration), so we can't ensure that everything is safe
- * on disk. Returning an error doesn't work because that would break guests
- * even if the server operates in writethrough mode.
- *
- * Let's hope the user knows what he's doing.
- */
-return 0;
-}
-
 void bdrv_flush_all(void)
 {
 BlockDriverState *bs;
@@ -2610,22 +2570,6 @@ fail:
 return -1;
 }
 
-BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
-BlockDriverCompletionFunc *cb, void *opaque)
-{
-BlockDriver *drv = bs-drv;
-
-trace_bdrv_aio_flush(bs, opaque);
-
-if (bs-open_flags  BDRV_O_NO_FLUSH) {
-return bdrv_aio_noop_em(bs, cb, opaque);
-}
-
-if (!drv)
-return NULL;
-return drv-bdrv_aio_flush(bs, cb, opaque);
-}
-
 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
 {
 acb-pool-cancel(acb);
@@ -2785,41 +2729,28 @@ static BlockDriverAIOCB 
*bdrv_co_aio_rw_vector(BlockDriverState *bs,
 return acb-common;
 }
 
-static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
-BlockDriverCompletionFunc *cb, void *opaque)
+static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
 {
-BlockDriverAIOCBSync *acb;
-
-acb = qemu_aio_get(bdrv_em_aio_pool, bs, cb, opaque);
-acb-is_write = 1; /* don't bounce in the completion hadler */
-acb-qiov = NULL;
-acb-bounce = NULL;
-acb-ret = 0;
-
-if (!acb-bh)
-acb-bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+BlockDriverAIOCBCoroutine *acb = opaque;
+BlockDriverState *bs = acb-common.bs;
 
-bdrv_flush(bs);
+acb-req.error = bdrv_co_flush(bs);
+acb-bh = qemu_bh_new(bdrv_co_em_bh, acb);
 qemu_bh_schedule(acb-bh);
-return acb-common;
 }
 
-static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
 BlockDriverCompletionFunc *cb, void *opaque)
 {
-BlockDriverAIOCBSync *acb;
+trace_bdrv_aio_flush(bs, opaque);
 
-acb = qemu_aio_get(bdrv_em_aio_pool, bs, cb, opaque);
-acb-is_write = 1; /* don't bounce in the

[Qemu-devel] [PATCH 3/3] block: add bdrv_co_discard and bdrv_aio_discard support

2011-10-17 Thread Paolo Bonzini

This similarly adds support for coroutine and asynchronous discard.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block.c  |  102 +++--
 block.h  |4 ++
 block/raw.c  |   10 +++--
 block_int.h  |9 -
 trace-events |1 +
 5 files changed, 109 insertions(+), 17 deletions(-)

diff --git a/block.c b/block.c
index 7b8b14d..28508f2 100644
--- a/block.c
+++ b/block.c
@@ -1768,17 +1768,6 @@ int bdrv_has_zero_init(BlockDriverState *bs)
 return 1;
 }
 
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
-{
-if (!bs-drv) {
-return -ENOMEDIUM;
-}
-if (!bs-drv-bdrv_discard) {
-return 0;
-}
-return bs-drv-bdrv_discard(bs, sector_num, nb_sectors);
-}
-
 /*
  * Returns true iff the specified sector is present in the disk image. Drivers
  * not implementing the functionality are assumed to not support backing files,
@@ -2754,6 +2743,34 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
 return acb-common;
 }
 
+static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
+{
+BlockDriverAIOCBCoroutine *acb = opaque;
+BlockDriverState *bs = acb-common.bs;
+
+acb-req.error = bdrv_co_discard(bs, acb-req.sector, acb-req.nb_sectors);
+acb-bh = qemu_bh_new(bdrv_co_em_bh, acb);
+qemu_bh_schedule(acb-bh);
+}
+
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+int64_t sector_num, int nb_sectors,
+BlockDriverCompletionFunc *cb, void *opaque)
+{
+Coroutine *co;
+BlockDriverAIOCBCoroutine *acb;
+
+trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
+
+acb = qemu_aio_get(bdrv_em_co_aio_pool, bs, cb, opaque);
+acb-req.sector = sector_num;
+acb-req.nb_sectors = nb_sectors;
+co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
+qemu_coroutine_enter(co, acb);
+
+return acb-common;
+}
+
 void bdrv_init(void)
 {
 module_call_init(MODULE_INIT_BLOCK);
@@ -2915,6 +2932,69 @@ int bdrv_flush(BlockDriverState *bs)
 return rwco.ret;
 }
 
+static void coroutine_fn bdrv_discard_co_entry(void *opaque)
+{
+RwCo *rwco = opaque;
+
+rwco-ret = bdrv_co_discard(rwco-bs, rwco-sector_num, rwco-nb_sectors);
+}
+
+int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+if (!bs-drv) {
+return -ENOMEDIUM;
+} else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+return -EIO;
+} else if (bs-read_only) {
+return -EROFS;
+} else if (bs-drv-bdrv_co_discard) {
+return bs-drv-bdrv_co_discard(bs, sector_num, nb_sectors);
+} else if (bs-drv-bdrv_aio_discard) {
+BlockDriverAIOCB *acb;
+CoroutineIOCompletion co = {
+.coroutine = qemu_coroutine_self(),
+};
+
+acb = bs-drv-bdrv_aio_discard(bs, sector_num, nb_sectors,
+bdrv_co_io_em_complete, co);
+if (acb == NULL) {
+return -EIO;
+} else {
+qemu_coroutine_yield();
+return co.ret;
+}
+} else if (bs-drv-bdrv_discard) {
+return bs-drv-bdrv_discard(bs, sector_num, nb_sectors);
+} else {
+return 0;
+}
+}
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+Coroutine *co;
+RwCo rwco = {
+.bs = bs,
+.sector_num = sector_num,
+.nb_sectors = nb_sectors,
+.ret = NOT_DONE,
+};
+
+if (qemu_in_coroutine()) {
+/* Fast-path if already in coroutine context */
+bdrv_discard_co_entry(rwco);
+} else {
+co = qemu_coroutine_create(bdrv_discard_co_entry);
+qemu_coroutine_enter(co, rwco);
+while (rwco.ret == NOT_DONE) {
+qemu_aio_wait();
+}
+}
+
+return rwco.ret;
+}
+
 /**/
 /* removable device support */
 
diff --git a/block.h b/block.h
index 65c5166..5a042c9 100644
--- a/block.h
+++ b/block.h
@@ -166,6 +166,9 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, 
int64_t sector_num,
   BlockDriverCompletionFunc *cb, void *opaque);
 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
  BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+   int64_t sector_num, int nb_sectors,
+   BlockDriverCompletionFunc *cb, void 
*opaque);
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
 
 typedef struct BlockRequest {
@@ -196,6 +199,7 @@ void bdrv_flush_all(void);
 void bdrv_close_all(void);
 
 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
 int bdrv_has_zero_init(BlockDriverState *bs);
 int

Re: [Qemu-devel] [PATCH] Memory API bugfix - abolish addrrrange_end()

2011-10-17 Thread Avi Kivity

On 10/17/2011 07:31 AM, David Gibson wrote:
  
  In terms of how the code looks, it's seriously more ugly (see the
  patches I sent out).  Conceptually it's cleaner, since we're not dodging
  the issue that we need to deal with a full 64-bit domain.

 We don't have to dodge that issue.  I know how to remove the
 requirement for intermediate negative values, I just haven't made up a
 patch yet.  With that we can change to uint64 and cover the full 64
 bit range.  In fact I think I can make it so that size==0 represents
 size=2^64 and even handle the full 64-bit, inclusive range properly.

That means you can't do a real size == 0.

  But my main concern is maintainability.  The 64-bit blanket is to short,
  if we keep pulling it in various directions we'll just expose ourselves
  in new ways.

 Nonsense, dealing with full X-bit range calculations in X-bit types is
 a fairly standard problem.  The kernel does it in VMA handling for
 one.  It just requires thinking about overflow cases.

We discovered three bugs already (you found two, and I had one during
development).  Even if it can probably be done with extreme care, but is
it worth spending all that development time on?

I'm not sure there is a parallel with vmas, since we're offsetting in
both the positive and negative directions.

-- 
error compiling committee.c: too many arguments to function

[Qemu-devel] [RFC][PATCH 12/45] msi: Introduce MSIRoutingCache

2011-10-17 Thread Jan Kiszka

This cache will help us implementing KVM in-kernel irqchip support
without spreading hooks all over the place.

KVM requires us to register it first and then deliver it by raising a
pseudo IRQ line returned on registration. While this could be changed
for QEMU-originated MSI messages by adding direct MSI injection, we will
still need this translation for irqfd-originated messages. The
MSIRoutingCache will allow to track those registrations and update them
lazily before the actual delivery. This avoid having to track MSI
vectors at device level (like qemu-kvm currently does).

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/apic.c |5 +++--
 hw/apic.h |2 +-
 hw/msi.c  |   10 +++---
 hw/msi.h  |   14 +-
 hw/msix.c |7 ++-
 hw/pc.c   |4 ++--
 hw/pci.h  |4 
 qemu-common.h |1 +
 8 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index c1d557d..6811ae1 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -804,7 +804,7 @@ static uint32_t apic_mem_readl(void *opaque, 
target_phys_addr_t addr)
 return val;
 }
 
-void apic_deliver_msi(MSIMessage *msg)
+void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache)
 {
 uint8_t dest =
 (msg-address  MSI_ADDR_DEST_ID_MASK)  MSI_ADDR_DEST_ID_SHIFT;
@@ -829,8 +829,9 @@ static void apic_mem_writel(void *opaque, 
target_phys_addr_t addr, uint32_t val)
  * Mapping them on the global bus happens to work because
  * MSI registers are reserved in APIC MMIO and vice versa. */
 MSIMessage msg = { .address = addr, .data = val };
+static MSIRoutingCache cache;
 
-msi_deliver(msg);
+msi_deliver(msg, cache);
 return;
 }
 
diff --git a/hw/apic.h b/hw/apic.h
index fa848fd..353ea3a 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -18,7 +18,7 @@ void cpu_set_apic_tpr(DeviceState *s, uint8_t val);
 uint8_t cpu_get_apic_tpr(DeviceState *s);
 void apic_init_reset(DeviceState *s);
 void apic_sipi(DeviceState *s);
-void apic_deliver_msi(MSIMessage *msg);
+void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache);
 
 /* pc.c */
 int cpu_is_bsp(CPUState *env);
diff --git a/hw/msi.c b/hw/msi.c
index 9055155..c8ccb17 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -40,13 +40,13 @@
 /* Flag for interrupt controller to declare MSI/MSI-X support */
 bool msi_supported;
 
-static void msi_unsupported(MSIMessage *msg)
+static void msi_unsupported(MSIMessage *msg, MSIRoutingCache *cache)
 {
 /* If we get here, the board failed to register a delivery handler. */
 abort();
 }
 
-void (*msi_deliver)(MSIMessage *msg) = msi_unsupported;
+void (*msi_deliver)(MSIMessage *msg, MSIRoutingCache *cache) = msi_unsupported;
 
 /* If we get rid of cap allocator, we won't need this. */
 static inline uint8_t msi_cap_sizeof(uint16_t flags)
@@ -288,6 +288,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
  0x  (PCI_MSI_VECTORS_MAX - nr_vectors));
 }
 
+dev-msi_cache = g_malloc0(nr_vectors * sizeof(*dev-msi_cache));
+
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
 dev-msi_irq_entries = g_malloc(nr_vectors *
 sizeof(*dev-msix_irq_entries));
@@ -312,6 +314,8 @@ void msi_uninit(struct PCIDevice *dev)
 g_free(dev-msi_irq_entries);
 }
 
+g_free(dev-msi_cache);
+
 pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
 dev-cap_present = ~QEMU_PCI_CAP_MSI;
 
@@ -389,7 +393,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
notify vector 0x%x
 address: 0x%PRIx64 data: 0x%PRIx32\n,
vector, msg.address, msg.data);
-msi_deliver(msg);
+msi_deliver(msg, dev-msi_cache[vector]);
 }
 
 /* Normally called by pci_default_write_config(). */
diff --git a/hw/msi.h b/hw/msi.h
index f3152f3..20ae215 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -29,6 +29,18 @@ struct MSIMessage {
 uint32_t data;
 };
 
+typedef enum {
+MSI_ROUTE_NONE = 0,
+MSI_ROUTE_STATIC,
+} MSIRouteType;
+
+struct MSIRoutingCache {
+MSIMessage msg;
+MSIRouteType type;
+int kvm_gsi;
+int kvm_irqfd;
+};
+
 extern bool msi_supported;
 
 bool msi_enabled(const PCIDevice *dev);
@@ -46,6 +58,6 @@ static inline bool msi_present(const PCIDevice *dev)
 return dev-cap_present  QEMU_PCI_CAP_MSI;
 }
 
-extern void (*msi_deliver)(MSIMessage *msg);
+extern void (*msi_deliver)(MSIMessage *msg, MSIRoutingCache *cache);
 
 #endif /* QEMU_MSI_H */
diff --git a/hw/msix.c b/hw/msix.c
index 08cc526..e824aef 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -358,6 +358,8 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
 if (ret)
 goto err_config;
 
+dev-msix_cache = g_malloc0(nentries * sizeof *dev-msix_cache);
+
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
 dev-msix_irq_entries = g_malloc(nentries *
  sizeof

Re: [Qemu-devel] [PATCH] ioapic: Convert to memory API

2011-10-17 Thread Avi Kivity

On 10/16/2011 07:21 PM, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com

 Dispatching byte and word accesses like dwords looks strange, but let's
 just convert mechanically.

  
 -static CPUReadMemoryFunc * const ioapic_mem_read[3] = {
 -ioapic_mem_readl,
 -ioapic_mem_readl,
 -ioapic_mem_readl,
 -};
 -
 -static CPUWriteMemoryFunc * const ioapic_mem_write[3] = {
 -ioapic_mem_writel,
 -ioapic_mem_writel,
 -ioapic_mem_writel,
 +static const MemoryRegionOps ioapic_io_ops = {
 +.old_mmio = {
 +.read = { ioapic_mem_readl, ioapic_mem_readl, ioapic_mem_readl, },
 +.write = { ioapic_mem_writel, ioapic_mem_writel, ioapic_mem_writel, 
 },
 +},
 +.endianness = DEVICE_NATIVE_ENDIAN,
  };

Why use old_mmio?  Use the ordinary .read and .write, and ignore the
size parameter.

-- 
error compiling committee.c: too many arguments to function

[Qemu-devel] [RFC][PATCH 39/45] pci-assign: Use generic MSI support

2011-10-17 Thread Jan Kiszka

Implement MSI support of a assigned devices via the generic MSI layer of
QEMU. Use config notifiers to update the vector route or switch back to
INTx when MSI gets disabled again.

Using the generic layer not only saves a bit code, it also fixes reset
while legacy MSI is in use and adds 64 bit support.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |   77 +++
 1 files changed, 31 insertions(+), 46 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 2484afd..10b30a3 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -699,10 +699,6 @@ static void free_assigned_device(AssignedDevice *dev)
 close(dev-real_device.config_fd);
 }
 
-if (dev-dev.msi_cache) {
-kvm_msi_cache_invalidate(dev-dev.msi_cache[0]);
-g_free(dev-dev.msi_cache);
-}
 invalidate_msix_vectors(dev);
 g_free(dev-dev.msix_cache);
 }
@@ -847,7 +843,7 @@ static int assign_intx(AssignedDevice *dev)
 
 irq_type = KVM_DEV_IRQ_GUEST_INTX;
 if (dev-features  ASSIGNED_DEVICE_PREFER_MSI_MASK 
-dev-cap.available  ASSIGNED_DEVICE_CAP_MSI) {
+msi_present(dev-dev)) {
 irq_type |= KVM_DEV_IRQ_HOST_MSI;
 } else {
 irq_type |= KVM_DEV_IRQ_HOST_INTX;
@@ -920,31 +916,33 @@ void assigned_dev_update_irqs(void)
 }
 }
 
-static void assigned_dev_update_msi(PCIDevice *pci_dev)
+static void assigned_dev_update_msi(PCIDevice *pci_dev, bool enabled)
 {
 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
-uint8_t ctrl_byte = pci_get_byte(pci_dev-config + pci_dev-msi_cap +
- PCI_MSI_FLAGS);
-
-if (ctrl_byte  PCI_MSI_FLAGS_ENABLE) {
-uint8_t *pos = pci_dev-config + pci_dev-msi_cap;
-MSIMessage msg;
 
-deassign_irq(dev);
+if (!enabled) {
+assign_intx(dev);
+}
+}
 
-msg.address = pci_get_long(pos + PCI_MSI_ADDRESS_LO);
-msg.data = pci_get_word(pos + PCI_MSI_DATA_32);
+static int assigned_dev_update_msi_vector(PCIDevice *pci_dev,
+  unsigned int vector,
+  MSIMessage *msg, bool masked)
+{
+AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
+int ret;
 
-if (kvm_device_msi_assign(kvm_state, calc_assigned_dev_id(dev), msg,
-  dev-dev.msi_cache[0])  0) {
-perror(assigned_dev_update_msi: assign msi);
-return;
+if (!masked) {
+deassign_irq(dev);
+ret = kvm_device_msi_assign(kvm_state, calc_assigned_dev_id(dev), msg,
+dev-dev.msi_cache[0]);
+if (ret  0) {
+perror(assigned_dev_update_msi_vector: assign msi);
+return ret;
 }
 dev-irq_requested_type = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI;
-} else {
-kvm_msi_cache_invalidate(dev-dev.msi_cache[0]);
-assign_intx(dev);
 }
+return 0;
 }
 
 static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev)
@@ -1085,12 +1083,6 @@ static void assigned_dev_pci_write_config(PCIDevice 
*pci_dev, uint32_t address,
 
 pci_default_write_config(pci_dev, address, val, len);
 
-if (assigned_dev-cap.available  ASSIGNED_DEVICE_CAP_MSI) {
-if (range_covers_byte(address, len,
-  pci_dev-msi_cap + PCI_MSI_FLAGS)) {
-assigned_dev_update_msi(pci_dev);
-}
-}
 if (assigned_dev-cap.available  ASSIGNED_DEVICE_CAP_MSIX) {
 if (range_covers_byte(address, len,
   pci_dev-msix_cap + PCI_MSIX_FLAGS + 1)) {
@@ -1136,26 +1128,19 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev)
  * MSI capability is the 1st capability in capability config */
 pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0);
 if (pos != 0  kvm_check_extension(kvm_state, KVM_CAP_ASSIGN_DEV_IRQ)) {
-dev-cap.available |= ASSIGNED_DEVICE_CAP_MSI;
-/* Only 32-bit/no-mask currently supported */
-if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10))  0) {
+uint16_t flags = pci_get_word(pci_dev-config + pos + PCI_MSI_FLAGS);
+
+/* Note: KVM does not support multiple messages */
+ret = msi_init(pci_dev, pos, 1, flags  PCI_MSI_FLAGS_64BIT,
+   flags  PCI_MSI_FLAGS_MASKBIT);
+if (ret  0) {
+return ret;
+}
+ret = msi_set_config_notifiers(pci_dev, assigned_dev_update_msi,
+   assigned_dev_update_msi_vector);
+if (ret  0) {
 return ret;
 }
-pci_dev-msi_cap = pos;
-
-pci_set_word(pci_dev-config + pos + PCI_MSI_FLAGS,
- pci_get_word(pci_dev-config + pos + PCI_MSI_FLAGS) 
- PCI_MSI_FLAGS_QMASK);
-pci_set_long(pci_dev-config + pos +

[Qemu-devel] [RFC][PATCH 23/45] qemu-kvm: Rework MSI-X mask notifier to generic MSI config notifiers

2011-10-17 Thread Jan Kiszka

MSI config notifiers are supposed to be triggered on every relevant
configuration change of MSI vectors or if MSI is enabled/disabled.

Two notifiers are established, one for vector changes and one for general
enabling. The former notifier additionally passes the currently active
MSI message. This will allow to update potential in-kernel IRQ routes on
changes. The latter notifier is optional and will only be used by a
subset of clients.

These notifiers are currently only available for MSI-X but will be
extended to legacy MSI as well.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msix.c   |  119 +-
 hw/msix.h   |6 ++-
 hw/pci.h|8 ++-
 hw/virtio-pci.c |   24 ++--
 4 files changed, 102 insertions(+), 55 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 247b255..176bc76 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -219,16 +219,24 @@ static bool msix_is_masked(PCIDevice *dev, int vector)
   dev-msix_table_page[offset]  PCI_MSIX_ENTRY_CTRL_MASKBIT;
 }
 
-static void msix_handle_mask_update(PCIDevice *dev, int vector)
+static void msix_fire_vector_config_notifier(PCIDevice *dev,
+ unsigned int vector, bool masked)
 {
-bool masked = msix_is_masked(dev, vector);
+MSIMessage msg;
 int ret;
 
-if (dev-msix_mask_notifier) {
-ret = dev-msix_mask_notifier(dev, vector,
-  msix_is_masked(dev, vector));
+if (dev-msix_vector_config_notifier) {
+msix_message_from_vector(dev, vector, msg);
+ret = dev-msix_vector_config_notifier(dev, vector, msg, masked);
 assert(ret = 0);
 }
+}
+
+static void msix_handle_mask_update(PCIDevice *dev, int vector)
+{
+bool masked = msix_is_masked(dev, vector);
+
+msix_fire_vector_config_notifier(dev, vector, masked);
 if (!masked  msix_is_pending(dev, vector)) {
 msix_clr_pending(dev, vector);
 msix_notify(dev, vector);
@@ -240,20 +248,27 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
uint32_t old_val, int len)
 {
 unsigned enable_pos = dev-msix_cap + MSIX_CONTROL_OFFSET;
-bool was_masked;
+bool was_masked, was_enabled, is_enabled;
 int vector;
 
 if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
 return;
 }
 
-if (!msix_enabled(dev)) {
+old_val = (enable_pos - addr) * 8;
+
+was_enabled = old_val  MSIX_ENABLE_MASK;
+is_enabled = msix_enabled(dev);
+if (was_enabled != is_enabled  dev-msix_enable_notifier) {
+dev-msix_enable_notifier(dev, is_enabled);
+}
+
+if (!is_enabled) {
 return;
 }
 
 pci_device_deassert_intx(dev);
 
-old_val = (enable_pos - addr) * 8;
 was_masked =
 (old_val  (MSIX_MASKALL_MASK | MSIX_ENABLE_MASK)) != MSIX_ENABLE_MASK;
 if (was_masked != msix_function_masked(dev)) {
@@ -270,15 +285,20 @@ static void msix_mmio_write(void *opaque, 
target_phys_addr_t addr,
 unsigned int offset = addr  (MSIX_PAGE_SIZE - 1)  ~0x3;
 unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE;
 bool was_masked = msix_is_masked(dev, vector);
+bool is_masked;
 
 pci_set_long(dev-msix_table_page + offset, val);
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
 kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
 }
 
-if (vector  dev-msix_entries_nr 
-was_masked != msix_is_masked(dev, vector)) {
-msix_handle_mask_update(dev, vector);
+if (vector  dev-msix_entries_nr) {
+is_masked = msix_is_masked(dev, vector);
+if (was_masked != is_masked) {
+msix_handle_mask_update(dev, vector);
+} else {
+msix_fire_vector_config_notifier(dev, vector, is_masked);
+}
 }
 }
 
@@ -305,17 +325,17 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion 
*bar)
 
 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
 {
-int vector, r;
+int vector;
+
 for (vector = 0; vector  nentries; ++vector) {
 unsigned offset =
 vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
 bool was_masked = msix_is_masked(dev, vector);
+
 dev-msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
-if (was_masked != msix_is_masked(dev, vector) 
-dev-msix_mask_notifier) {
-r = dev-msix_mask_notifier(dev, vector,
-msix_is_masked(dev, vector));
-assert(r = 0);
+
+if (!was_masked) {
+msix_handle_mask_update(dev, vector);
 }
 }
 }
@@ -337,7 +357,6 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
 if (nentries  MSIX_MAX_ENTRIES)
 return -EINVAL;
 
-dev-msix_mask_notifier = NULL;
 dev-msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
 sizeof

[Qemu-devel] [RFC][PATCH 19/45] qemu-kvm: Factor out kvm_msi_irqfd_set

2011-10-17 Thread Jan Kiszka

This makes the KVM core layer aware of the irqfd associated with some
MSI cache. kvm_msi_irqfd_set is defined for this purpose, which avoids
that virtio needs to peek into the cache for extracting the GSI.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/virtio-pci.c |6 +++---
 kvm.h   |2 ++
 qemu-kvm.c  |   14 +-
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 23880e0..ad6a002 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -524,9 +524,9 @@ static int virtio_pci_mask_vq(PCIDevice *dev, unsigned 
vector,
   VirtQueue *vq, int masked)
 {
 EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
-int r = kvm_set_irqfd(dev-msix_cache[vector].kvm_gsi,
-  event_notifier_get_fd(notifier),
-  !masked);
+int r = kvm_msi_irqfd_set(dev-msix_cache[vector],
+  event_notifier_get_fd(notifier),
+  !masked);
 if (r  0) {
 return (r == -ENOSYS) ? 0 : r;
 }
diff --git a/kvm.h b/kvm.h
index 3706fc6..fe2eec5 100644
--- a/kvm.h
+++ b/kvm.h
@@ -208,6 +208,8 @@ int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache 
*cache);
 int kvm_msi_message_del(MSIRoutingCache *cache);
 int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache);
 
+int kvm_msi_irqfd_set(MSIRoutingCache *cache, int fd, bool assigned);
+
 int kvm_commit_irq_routes(void);
 
 int kvm_irqchip_in_kernel(void);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 13d4f90..ab7703b 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -352,8 +352,11 @@ int kvm_del_routing_entry(struct kvm_irq_routing_entry 
*entry)
 *e = *p;
 
 cache = s-msi_cache[i];
-if (cache) {
+if (cache  cache-type != MSI_ROUTE_NONE) {
 cache-type = MSI_ROUTE_NONE;
+if (cache-kvm_irqfd = 0) {
+kvm_set_irqfd(cache-kvm_gsi, cache-kvm_irqfd, false);
+}
 }
 s-msi_cache[i] = s-msi_cache[s-irq_routes-nr];
 
@@ -521,6 +524,15 @@ int kvm_msi_message_update(MSIMessage *msg, 
MSIRoutingCache *cache)
 }
 
 
+int kvm_msi_irqfd_set(MSIRoutingCache *cache, int fd, bool assigned)
+{
+if (cache-type == MSI_ROUTE_NONE) {
+return assigned ? -EINVAL : 0;
+}
+cache-kvm_irqfd = assigned ? fd : -1;
+return kvm_set_irqfd(cache-kvm_gsi, fd, assigned);
+}
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(KVMState *s, struct kvm_assigned_msix_nr *msix_nr)
 {
-- 
1.7.3.4

Re: [Qemu-devel] [RFC128 3/2] Adjust system and pci address spaces to full 64-bit

2011-10-17 Thread Avi Kivity

On 10/17/2011 07:33 AM, David Gibson wrote:
 On Sun, Oct 16, 2011 at 05:29:07PM +0200, Avi Kivity wrote:
  Now that the memory API supports full 64-bit buses, adjust the relevant
  callers to take advantage of it.

 Note that this doesn't, strictly speaking doesn't give you full 64-bit
 coverage, since the range covered is 2^64-1 bytes rather than 2^64
 bytes.  Cases where that will matter would be very rare, of course.


An undocumented and indeed unmentioned feature of patch 2 is that
UINT64_MAX sizes are expanded to UINT64_MAX+1.  I did that to avoid
introducing memory_region_init_128() (or perhaps
memory_region_init_2_64() that doesn't take a size argument).  That
removes the ability to create container regions that span exactly
UINT64_MAX bytes.  It is strange in a patchset that tries to makes
things more regular, I admit.

-- 
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [PATCH v2 0/3] coroutinization of flush and discard (split out of NBD series)

2011-10-17 Thread Kevin Wolf

Am 17.10.2011 12:32, schrieb Paolo Bonzini:
 This series, applying on top of block branch, enables drivers to use
 coroutines for flush and discard.  I kept aio_discard after discussing
 with Kevin since it should be useful not only for raw-posix-aio, but also
 for the userspace iSCSI backend (and in general for backends relying on
 an external library that is designed around aio).
 
 BTW, with this patch we get for free the invariant that bdrv_aio_*
 never returns a NULL acb (Stefan's patches already got to that point
 for read/write, of course).

Cool, I wasn't aware of that. That's a very nice side effect!

Maybe we should write this down in a comment and remove the now
unnecessary error handling from callers.

Kevin

[Qemu-devel] [RFC][PATCH 22/45] qemu-kvm: msix: Fire mask notifier on global mask changes

2011-10-17 Thread Jan Kiszka

Also invoke the mask notifier if the global MSI-X mask is modified. For
this purpose, we push the notifier call from the per-vector mask update
to the central msix_handle_mask_update.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msix.c |   16 +---
 1 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 739b56f..247b255 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -221,7 +221,15 @@ static bool msix_is_masked(PCIDevice *dev, int vector)
 
 static void msix_handle_mask_update(PCIDevice *dev, int vector)
 {
-if (!msix_is_masked(dev, vector)  msix_is_pending(dev, vector)) {
+bool masked = msix_is_masked(dev, vector);
+int ret;
+
+if (dev-msix_mask_notifier) {
+ret = dev-msix_mask_notifier(dev, vector,
+  msix_is_masked(dev, vector));
+assert(ret = 0);
+}
+if (!masked  msix_is_pending(dev, vector)) {
 msix_clr_pending(dev, vector);
 msix_notify(dev, vector);
 }
@@ -262,7 +270,6 @@ static void msix_mmio_write(void *opaque, 
target_phys_addr_t addr,
 unsigned int offset = addr  (MSIX_PAGE_SIZE - 1)  ~0x3;
 unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE;
 bool was_masked = msix_is_masked(dev, vector);
-int r;
 
 pci_set_long(dev-msix_table_page + offset, val);
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
@@ -271,11 +278,6 @@ static void msix_mmio_write(void *opaque, 
target_phys_addr_t addr,
 
 if (vector  dev-msix_entries_nr 
 was_masked != msix_is_masked(dev, vector)) {
-if (dev-msix_mask_notifier) {
-r = dev-msix_mask_notifier(dev, vector,
-msix_is_masked(dev, vector));
-assert(r = 0);
-}
 msix_handle_mask_update(dev, vector);
 }
 }
-- 
1.7.3.4

Re: [Qemu-devel] [PATCH RFC v1 2/2] hyper-v: initialize Hyper-V CPUID leafs.

2011-10-17 Thread Avi Kivity

On 10/17/2011 11:40 AM, Paolo Bonzini wrote:
 On 10/17/2011 11:17 AM, Vadim Rozenfeld wrote:
 @@ -379,11 +380,16 @@ int kvm_arch_init_vcpu(CPUState *env)
   cpuid_i = 0;

   /* Paravirtualization CPUIDs */
 -memcpy(signature, KVMKVMKVM\0\0\0, 12);
   c =cpuid_data.entries[cpuid_i++];
   memset(c, 0, sizeof(*c));
   c-function = KVM_CPUID_SIGNATURE;
 -c-eax = 0;
 +if (!hyperv_enabled()) {
 +memcpy(signature, KVMKVMKVM\0\0\0, 12);
 +c-eax = 0;
 +} else {
 +memcpy(signature, Microsoft Hv, 12);
 +c-eax = HYPERV_CPUID_MIN;
 +}

 Even not counting that hyper-v support should IMHO not be in
 KVM-specific code, I still think this shouldn't remove KVM leaves
 completely but rather move them to 0x4100.  The KVM
 paravirtualization code then can similarly probe with 0x100 stride up
 to 0x40001000.  This is what was done for Xen, and it allows to enable
 enlightenments independent of whether the guest is Linux or Windows.

 However, let's get a third opinion---Avi, what do you think?

I agree with you, especially as this already works for Xen.

Note it doesn't completely solve the issue (so we have two interfaces,
which is the preferred one?), but it's better than nothing.

-- 
error compiling committee.c: too many arguments to function

[Qemu-devel] [RFC][PATCH 18/45] qemu-kvm: Hook into MSI delivery at APIC level

2011-10-17 Thread Jan Kiszka

Move the two hooks for MSI delivery to in-kernel irqchips from the MSI
layer to a single place: the APIC.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/apic.c |   24 +++-
 hw/msi.c  |5 -
 hw/msix.c |5 -
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 6811ae1..cb6662c 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -806,15 +806,21 @@ static uint32_t apic_mem_readl(void *opaque, 
target_phys_addr_t addr)
 
 void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache)
 {
-uint8_t dest =
-(msg-address  MSI_ADDR_DEST_ID_MASK)  MSI_ADDR_DEST_ID_SHIFT;
-uint8_t vector =
-(msg-data  MSI_DATA_VECTOR_MASK)  MSI_DATA_VECTOR_SHIFT;
-uint8_t dest_mode = (msg-address  MSI_ADDR_DEST_MODE_SHIFT)  0x1;
-uint8_t trigger_mode = (msg-data  MSI_DATA_TRIGGER_SHIFT)  0x1;
-uint8_t delivery = (msg-data  MSI_DATA_DELIVERY_MODE_SHIFT)  0x7;
-/* XXX: Ignore redirection hint. */
-apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
+if (kvm_enabled()  kvm_irqchip_in_kernel()) {
+if (kvm_set_irq(cache-kvm_gsi, 1, NULL)  0) {
+abort();
+}
+} else {
+uint8_t dest =
+(msg-address  MSI_ADDR_DEST_ID_MASK)  MSI_ADDR_DEST_ID_SHIFT;
+uint8_t vector =
+(msg-data  MSI_DATA_VECTOR_MASK)  MSI_DATA_VECTOR_SHIFT;
+uint8_t dest_mode = (msg-address  MSI_ADDR_DEST_MODE_SHIFT)  0x1;
+uint8_t trigger_mode = (msg-data  MSI_DATA_TRIGGER_SHIFT)  0x1;
+uint8_t delivery = (msg-data  MSI_DATA_DELIVERY_MODE_SHIFT)  0x7;
+/* XXX: Ignore redirection hint. */
+apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
+}
 }
 
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t 
val)
diff --git a/hw/msi.c b/hw/msi.c
index b947104..1328903 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -355,11 +355,6 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
 return;
 }
 
-if (kvm_enabled()  kvm_irqchip_in_kernel()) {
-kvm_set_irq(dev-msi_cache[vector].kvm_gsi, 1, NULL);
-return;
-}
-
 msi_message_from_vector(dev, flags, vector, msg);
 
 MSI_DEV_PRINTF(dev,
diff --git a/hw/msix.c b/hw/msix.c
index 0be022e..6886255 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -457,11 +457,6 @@ void msix_notify(PCIDevice *dev, unsigned vector)
 return;
 }
 
-if (kvm_enabled()  kvm_irqchip_in_kernel()) {
-kvm_set_irq(dev-msix_cache[vector].kvm_gsi, 1, NULL);
-return;
-}
-
 msix_message_from_vector(dev, vector, msg);
 
 msi_deliver(msg, dev-msix_cache[vector]);
-- 
1.7.3.4

Re: [Qemu-devel] [PATCH] qxl: create slots on post_load in any state (fix RHBZ 740547)

2011-10-17 Thread Yonit Halperin


ACK
On 10/17/2011 12:24 PM, Alon Levy wrote:

If we migrate when the device is not in a native state the guest
still believes the slots are created, and will cause operations
that reference the slots, causing a panic: virtual address out of range
on the first of them. Easy to see by migrating in vga mode (with
a driver loaded, for instance windows cmd window in full screen mode)
and then exiting vga mode back to native mode will cause said panic.

Fixed by doing the slot recreation unconditionally at post_load

Signed-off-by: Alon Levyal...@redhat.com
---
  hw/qxl.c |   14 --
  1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/qxl.c b/hw/qxl.c
index 03848ed..4e9f39f 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -1684,6 +1684,14 @@ static int qxl_post_load(void *opaque, int version)
  qxl_mode_to_string(d-mode));
  newmode = d-mode;
  d-mode = QXL_MODE_UNDEFINED;
+for (i = 0; i  NUM_MEMSLOTS; i++) {
+if (!d-guest_slots[i].active) {
+continue;
+}
+dprint(d, 1, %s: restoring guest slot %d delta %PRIu64\n,
+   __func__, i, d-guest_slots[i].delta);
+qxl_add_memslot(d, i, d-guest_slots[i].delta, QXL_SYNC);
+}
  switch (newmode) {
  case QXL_MODE_UNDEFINED:
  break;
@@ -1691,12 +1699,6 @@ static int qxl_post_load(void *opaque, int version)
  qxl_enter_vga_mode(d);
  break;
  case QXL_MODE_NATIVE:
-for (i = 0; i  NUM_MEMSLOTS; i++) {
-if (!d-guest_slots[i].active) {
-continue;
-}
-qxl_add_memslot(d, i, 0, QXL_SYNC);
-}
  qxl_create_guest_primary(d, 1, QXL_SYNC);

  /* replay surface-create and cursor-set commands */

Re: [Qemu-devel] [PATCH RFC v1 2/2] hyper-v: initialize Hyper-V CPUID leafs.

2011-10-17 Thread Paolo Bonzini


On 10/17/2011 12:41 PM, Avi Kivity wrote:

  Even not counting that hyper-v support should IMHO not be in
  KVM-specific code, I still think this shouldn't remove KVM leaves
  completely but rather move them to 0x4100.  The KVM
  paravirtualization code then can similarly probe with 0x100 stride up
  to 0x40001000.  This is what was done for Xen, and it allows to enable
  enlightenments independent of whether the guest is Linux or Windows.

  However, let's get a third opinion---Avi, what do you think?

I agree with you, especially as this already works for Xen.

Note it doesn't completely solve the issue (so we have two interfaces,
which is the preferred one?), but it's better than nothing.


Windows doesn't look beyond 0x4000, so Hyper-V stays there and KVM 
has to shift.  So MS solved that part for us. :)


Paolo

Re: [Qemu-devel] [PATCH v2 0/3] coroutinization of flush and discard (split out of NBD series)

2011-10-17 Thread Paolo Bonzini


On 10/17/2011 12:43 PM, Kevin Wolf wrote:

Cool, I wasn't aware of that. That's a very nice side effect!

Maybe we should write this down in a comment and remove the now
unnecessary error handling from callers.


Looks like I finally have an excuse to play with Coccinelle!

Paolo

[Qemu-devel] [RFC][PATCH 33/45] qemu-kvm: Factor out kvm_device_intx_assign

2011-10-17 Thread Jan Kiszka

Avoid passing kvm_assigned_irq on INTx assignment and separate this
function from (to-be-refactored) MSI/MSI-X assignment.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |   21 ++---
 qemu-kvm.c |   17 +
 qemu-kvm.h |2 ++
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index e5ac54c..f145a84 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -825,7 +825,7 @@ static void deassign_irq(AssignedDevice *dev)
 
 static int assign_intx(AssignedDevice *dev)
 {
-struct kvm_assigned_irq assigned_irq_data;
+uint32_t irq_type = 0;
 int irq, r;
 
 /* Interrupt PIN 0 means don't use INTx */
@@ -841,17 +841,16 @@ static int assign_intx(AssignedDevice *dev)
 
 deassign_irq(dev);
 
-memset(assigned_irq_data, 0, sizeof(assigned_irq_data));
-assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(dev);
-assigned_irq_data.guest_irq = irq;
-assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX;
+irq_type = KVM_DEV_IRQ_GUEST_INTX;
 if (dev-features  ASSIGNED_DEVICE_PREFER_MSI_MASK 
-dev-cap.available  ASSIGNED_DEVICE_CAP_MSI)
-assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_MSI;
-else
-assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_INTX;
+dev-cap.available  ASSIGNED_DEVICE_CAP_MSI) {
+irq_type |= KVM_DEV_IRQ_HOST_MSI;
+} else {
+irq_type |= KVM_DEV_IRQ_HOST_INTX;
+}
 
-r = kvm_assign_irq(kvm_state, assigned_irq_data);
+r = kvm_device_intx_assign(kvm_state, calc_assigned_dev_id(dev), irq_type,
+   irq);
 if (r  0) {
 fprintf(stderr, Failed to assign irq for \%s\: %s\n,
 dev-dev.qdev.id, strerror(-r));
@@ -861,7 +860,7 @@ static int assign_intx(AssignedDevice *dev)
 }
 
 dev-girq = irq;
-dev-irq_requested_type = assigned_irq_data.flags;
+dev-irq_requested_type = irq_type;
 return r;
 }
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index c24e93c..0086514 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -194,6 +194,23 @@ static int kvm_old_assign_irq(KVMState *s,
 return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, assigned_irq);
 }
 
+int kvm_device_intx_assign(KVMState *s, uint32_t dev_id,
+   uint32_t host_irq_type, uint32_t guest_irq)
+{
+struct kvm_assigned_irq assigned_irq;
+
+assigned_irq.assigned_dev_id = dev_id;
+assigned_irq.guest_irq = guest_irq;
+assigned_irq.flags = KVM_DEV_IRQ_GUEST_INTX |
+(host_irq_type  (KVM_DEV_IRQ_HOST_INTX | KVM_DEV_IRQ_HOST_MSI));
+if (kvm_check_extension(s, KVM_CAP_ASSIGN_DEV_IRQ)) {
+return kvm_vm_ioctl(s, KVM_ASSIGN_DEV_IRQ, assigned_irq);
+} else {
+assigned_irq.host_irq = 0;
+return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, assigned_irq);
+}
+}
+
 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
 int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq)
 {
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 7cdb5a8..783df7f 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -150,6 +150,8 @@ int kvm_assign_pci_device(KVMState *s,
  */
 int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq);
 
+int kvm_device_intx_assign(KVMState *s, uint32_t dev_id,
+   uint32_t host_irq_type, uint32_t guest_irq);
 int kvm_device_irq_deassign(KVMState *s, uint32_t dev_id, uint32_t type);
 
 /*!
-- 
1.7.3.4

Re: [Qemu-devel] GPLv3 troubles (was: [PATCH 6/7] target-xtensa: add fsf core)

2011-10-17 Thread Andreas Färber

Am 15.10.2011 11:02, schrieb Blue Swirl:
 On Mon, Oct 10, 2011 at 2:26 AM, Max Filippov jcmvb...@gmail.com wrote:
 diff --git a/target-xtensa/core-fsf/gdb-config.c 
 b/target-xtensa/core-fsf/gdb-config.c
 new file mode 100644
 index 000..6705d9c
 --- /dev/null
 +++ b/target-xtensa/core-fsf/gdb-config.c
 @@ -0,0 +1,152 @@
 +/* Configuration for the Xtensa architecture for GDB, the GNU debugger.
 +
 +   Copyright (C) 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
 +
 +   This file is part of GDB.
 +
 +   This program is free software; you can redistribute it and/or modify
 +   it under the terms of the GNU General Public License as published by
 +   the Free Software Foundation; either version 3 of the License, or
 
 Nack. GPLv3 is by design incompatible with GPLv2only (but not with
 GPLv2+ or IIRC BSD-like) licenses. Please only use code from GDB
 before v3 switch.
 
 As a side note, a quick grep shows that GPLv2only is a small minority
 in QEMU. In theory it should be possible to agree to switch from
 GPLv2only to some GPLv3 compatible license for all of QEMU code, or in
 a theory with alternative universes, even get FSF to relicense GDB
 under GPLv2only compatible way. Or, with the aid of infinite number of
 monkeys of Internet waiting to waste their time, rewrite incompatible
 but interesting parts of GDB or QEMU under The One True License of the
 day.

Could we please draft some policy on this? This is not a GDB issue, it's
very general. Whether we like it or not, there is GPLv3-licensed code
and there will probably be a GPLv4 one day.

IMO having old GPLv2-only code is one thing. But there's a lot of new
GPLv2-only code cooking and occasionally pouring in, especially from
qemu-kvm. Device assignment is a current example I encountered.

If we could make checkpatch.pl detect new GPLv2-only code, then I would
hope, given the dynamic QEMU development of the last few years, that the
GPLv2-only portions become so small (both in relation and absolute) that
they can either be replaced or the authors' permission be obtained to
change the license to GPLv2-or-later.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746, AG Nürnberg

Re: [Qemu-devel] GPLv3 troubles

2011-10-17 Thread Paolo Bonzini


On 10/17/2011 12:45 PM, Andreas Färber wrote:

Could we please draft some policy on this? This is not a GDB issue, it's
very general. Whether we like it or not, there is GPLv3-licensed code
and there will probably be a GPLv4 one day.

IMO having old GPLv2-only code is one thing. But there's a lot of new
GPLv2-only code cooking and occasionally pouring in, especially from
qemu-kvm. Device assignment is a current example I encountered.

If we could make checkpatch.pl detect new GPLv2-only code, then I would
hope, given the dynamic QEMU development of the last few years, that the
GPLv2-only portions become so small (both in relation and absolute) that
they can either be replaced or the authors' permission be obtained to
change the license to GPLv2-or-later.


That is close to impossible, you usually ask permission for all the 
authors in the history to avoid bigger problems.


Paolo

[Qemu-devel] [PATCH] arm gic saving/loading fix

2011-10-17 Thread Dmitry Koshelev

irq_target field saving/loading is in the wrong loop

Signed-off-by: Dmitry Koshelev karagio...@gmail.com
---
 hw/arm_gic.c |   12 ++--
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index 8286a28..ba05131 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -662,9 +662,6 @@ static void gic_save(QEMUFile *f, void *opaque)
 qemu_put_be32(f, s-enabled);
 for (i = 0; i  NUM_CPU(s); i++) {
 qemu_put_be32(f, s-cpu_enabled[i]);
-#ifndef NVIC
-qemu_put_be32(f, s-irq_target[i]);
-#endif
 for (j = 0; j  32; j++)
 qemu_put_be32(f, s-priority1[j][i]);
 for (j = 0; j  GIC_NIRQ; j++)
@@ -678,6 +675,9 @@ static void gic_save(QEMUFile *f, void *opaque)
 qemu_put_be32(f, s-priority2[i]);
 }
 for (i = 0; i  GIC_NIRQ; i++) {
+#ifndef NVIC
+qemu_put_be32(f, s-irq_target[i]);
+#endif
 qemu_put_byte(f, s-irq_state[i].enabled);
 qemu_put_byte(f, s-irq_state[i].pending);
 qemu_put_byte(f, s-irq_state[i].active);
@@ -699,9 +699,6 @@ static int gic_load(QEMUFile *f, void *opaque, int
version_id)
 s-enabled = qemu_get_be32(f);
 for (i = 0; i  NUM_CPU(s); i++) {
 s-cpu_enabled[i] = qemu_get_be32(f);
-#ifndef NVIC
-s-irq_target[i] = qemu_get_be32(f);
-#endif
 for (j = 0; j  32; j++)
 s-priority1[j][i] = qemu_get_be32(f);
 for (j = 0; j  GIC_NIRQ; j++)
@@ -715,6 +712,9 @@ static int gic_load(QEMUFile *f, void *opaque, int
version_id)
 s-priority2[i] = qemu_get_be32(f);
 }
 for (i = 0; i  GIC_NIRQ; i++) {
+#ifndef NVIC
+s-irq_target[i] = qemu_get_be32(f);
+#endif
 s-irq_state[i].enabled = qemu_get_byte(f);
 s-irq_state[i].pending = qemu_get_byte(f);
 s-irq_state[i].active = qemu_get_byte(f);

[Qemu-devel] [RFC][PATCH 20/45] qemu-kvm: msix: Only invoke msix_handle_mask_update on changes

2011-10-17 Thread Jan Kiszka

Reorganize msix_mmio_writel so that msix_handle_mask_update is only
called on mask changes. Pass previous config space value to
msix_write_config so that is can check if a mask change took place.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msix.c |   36 
 hw/msix.h |2 +-
 hw/pci.c  |3 ++-
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 6886255..57d0aac 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -206,12 +206,12 @@ static void msix_clr_pending(PCIDevice *dev, int vector)
 *msix_pending_byte(dev, vector) = ~msix_pending_mask(vector);
 }
 
-static int msix_function_masked(PCIDevice *dev)
+static bool msix_function_masked(PCIDevice *dev)
 {
 return dev-config[dev-msix_cap + MSIX_CONTROL_OFFSET]  
MSIX_MASKALL_MASK;
 }
 
-static int msix_is_masked(PCIDevice *dev, int vector)
+static bool msix_is_masked(PCIDevice *dev, int vector)
 {
 unsigned offset =
 vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
@@ -229,9 +229,10 @@ static void msix_handle_mask_update(PCIDevice *dev, int 
vector)
 
 /* Handle MSI-X capability config write. */
 void msix_write_config(PCIDevice *dev, uint32_t addr,
-   uint32_t val, int len)
+   uint32_t old_val, int len)
 {
 unsigned enable_pos = dev-msix_cap + MSIX_CONTROL_OFFSET;
+bool was_masked;
 int vector;
 
 if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
@@ -244,12 +245,13 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
 
 pci_device_deassert_intx(dev);
 
-if (msix_function_masked(dev)) {
-return;
-}
-
-for (vector = 0; vector  dev-msix_entries_nr; ++vector) {
-msix_handle_mask_update(dev, vector);
+old_val = (enable_pos - addr) * 8;
+was_masked =
+(old_val  (MSIX_MASKALL_MASK | MSIX_ENABLE_MASK)) != MSIX_ENABLE_MASK;
+if (was_masked != msix_function_masked(dev)) {
+for (vector = 0; vector  dev-msix_entries_nr; ++vector) {
+msix_handle_mask_update(dev, vector);
+}
 }
 }
 
@@ -259,17 +261,19 @@ static void msix_mmio_write(void *opaque, 
target_phys_addr_t addr,
 PCIDevice *dev = opaque;
 unsigned int offset = addr  (MSIX_PAGE_SIZE - 1)  ~0x3;
 unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE;
-int was_masked = msix_is_masked(dev, vector);
+bool was_masked = msix_is_masked(dev, vector);
+int r;
+
 pci_set_long(dev-msix_table_page + offset, val);
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
 kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
 }
 
-if (vector  dev-msix_entries_nr) {
-if (was_masked != msix_is_masked(dev, vector) 
-dev-msix_mask_notifier) {
-int r = dev-msix_mask_notifier(dev, vector,
-msix_is_masked(dev, vector));
+if (vector  dev-msix_entries_nr 
+was_masked != msix_is_masked(dev, vector)) {
+if (dev-msix_mask_notifier) {
+r = dev-msix_mask_notifier(dev, vector,
+msix_is_masked(dev, vector));
 assert(r = 0);
 }
 msix_handle_mask_update(dev, vector);
@@ -303,7 +307,7 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned 
nentries)
 for (vector = 0; vector  nentries; ++vector) {
 unsigned offset =
 vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
-int was_masked = msix_is_masked(dev, vector);
+bool was_masked = msix_is_masked(dev, vector);
 dev-msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
 if (was_masked != msix_is_masked(dev, vector) 
 dev-msix_mask_notifier) {
diff --git a/hw/msix.h b/hw/msix.h
index a8661e1..685dbe2 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -9,7 +9,7 @@ int msix_init(PCIDevice *pdev, unsigned short nentries,
   unsigned bar_nr, unsigned bar_size);
 
 void msix_write_config(PCIDevice *pci_dev, uint32_t address,
-   uint32_t val, int len);
+   uint32_t old_val, int len);
 
 int msix_uninit(PCIDevice *d, MemoryRegion *bar);
 
diff --git a/hw/pci.c b/hw/pci.c
index 6673989..39b2173 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1129,6 +1129,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
 
 void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
+uint32_t old_val = pci_default_read_config(d, addr, l);
 int i, was_irq_disabled = pci_irq_disabled(d);
 
 for (i = 0; i  l; val = 8, ++i) {
@@ -1156,7 +1157,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t 
addr, uint32_t val, int l)
 pci_update_irq_disabled(d, was_irq_disabled);
 
 msi_write_config(d, addr, val, l);
-msix_write_config(d, addr, val, l);
+msix_write_config(d, addr, old_val, l);
 }
 
 /***/

[Qemu-devel] [PATCH] arm cpu state loading fix

2011-10-17 Thread Dmitry Koshelev

Floating registers loading fix.

Signed-off-by: Dmitry Koshelev karaghio...@gmail.com
---
 target-arm/machine.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-arm/machine.c b/target-arm/machine.c
index 3925d3a..73d82c9 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -175,7 +175,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
 env-vfp.vec_stride = qemu_get_be32(f);

 if (arm_feature(env, ARM_FEATURE_VFP3)) {
-for (i = 0;  i  16; i++) {
+for (i = 16;  i  32; i++) {
 CPU_DoubleU u;
 u.l.upper = qemu_get_be32(f);
 u.l.lower = qemu_get_be32(f);

Re: [Qemu-devel] [PATCH] arm gic saving/loading fix

2011-10-17 Thread Andreas Färber

Am 17.10.2011 12:48, schrieb Dmitry Koshelev:
 irq_target field saving/loading is in the wrong loop
 
 Signed-off-by: Dmitry Koshelev karagio...@gmail.com

Reviewed-by: Andreas Färber afaer...@suse.de

Andreas

 ---
  hw/arm_gic.c |   12 ++--
  1 files changed, 6 insertions(+), 6 deletions(-)
 
 diff --git a/hw/arm_gic.c b/hw/arm_gic.c
 index 8286a28..ba05131 100644
 --- a/hw/arm_gic.c
 +++ b/hw/arm_gic.c
 @@ -662,9 +662,6 @@ static void gic_save(QEMUFile *f, void *opaque)
  qemu_put_be32(f, s-enabled);
  for (i = 0; i  NUM_CPU(s); i++) {
  qemu_put_be32(f, s-cpu_enabled[i]);
 -#ifndef NVIC
 -qemu_put_be32(f, s-irq_target[i]);
 -#endif
  for (j = 0; j  32; j++)
  qemu_put_be32(f, s-priority1[j][i]);
  for (j = 0; j  GIC_NIRQ; j++)
 @@ -678,6 +675,9 @@ static void gic_save(QEMUFile *f, void *opaque)
  qemu_put_be32(f, s-priority2[i]);
  }
  for (i = 0; i  GIC_NIRQ; i++) {
 +#ifndef NVIC
 +qemu_put_be32(f, s-irq_target[i]);
 +#endif
  qemu_put_byte(f, s-irq_state[i].enabled);
  qemu_put_byte(f, s-irq_state[i].pending);
  qemu_put_byte(f, s-irq_state[i].active);
 @@ -699,9 +699,6 @@ static int gic_load(QEMUFile *f, void *opaque, int
 version_id)
  s-enabled = qemu_get_be32(f);
  for (i = 0; i  NUM_CPU(s); i++) {
  s-cpu_enabled[i] = qemu_get_be32(f);
 -#ifndef NVIC
 -s-irq_target[i] = qemu_get_be32(f);
 -#endif
  for (j = 0; j  32; j++)
  s-priority1[j][i] = qemu_get_be32(f);
  for (j = 0; j  GIC_NIRQ; j++)
 @@ -715,6 +712,9 @@ static int gic_load(QEMUFile *f, void *opaque, int
 version_id)
  s-priority2[i] = qemu_get_be32(f);
  }
  for (i = 0; i  GIC_NIRQ; i++) {
 +#ifndef NVIC
 +s-irq_target[i] = qemu_get_be32(f);
 +#endif
  s-irq_state[i].enabled = qemu_get_byte(f);
  s-irq_state[i].pending = qemu_get_byte(f);
  s-irq_state[i].active = qemu_get_byte(f);


-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746, AG Nürnberg

Re: [Qemu-devel] [PATCH v2 0/3] coroutinization of flush and discard (split out of NBD series)

2011-10-17 Thread Kevin Wolf

Am 17.10.2011 12:32, schrieb Paolo Bonzini:
 This series, applying on top of block branch, enables drivers to use
 coroutines for flush and discard.  I kept aio_discard after discussing
 with Kevin since it should be useful not only for raw-posix-aio, but also
 for the userspace iSCSI backend (and in general for backends relying on
 an external library that is designed around aio).
 
 BTW, with this patch we get for free the invariant that bdrv_aio_*
 never returns a NULL acb (Stefan's patches already got to that point
 for read/write, of course).
 
 v1-v2:
   add bdrv_co_flush and bdrv_co_discard entry points
 
 Paolo Bonzini (2):
   block: unify flush implementations
   block: add bdrv_co_discard and bdrv_aio_discard support
 
 Stefan Hajnoczi (1):
   block: drop redundant bdrv_flush implementation
 
  block.c   |  258 
 +
  block.h   |5 +
  block/blkdebug.c  |6 --
  block/blkverify.c |9 --
  block/qcow.c  |6 --
  block/qcow2.c |   19 
  block/qed.c   |6 --
  block/raw-posix.c |   18 
  block/raw.c   |   23 ++---
  block_int.h   |   10 ++-
  trace-events  |1 +
  11 files changed, 184 insertions(+), 177 deletions(-)
 

Thanks, applied all to the block branch.

Kevin

[Qemu-devel] [RFC][PATCH 36/45] qemu-kvm: Factor out kvm_device_msix_* services

2011-10-17 Thread Jan Kiszka

Create kvm_device_msix_{supported,init_vectors,set_vector,assign},
replacing the old kvm_assign_set_msix_{nr,entry} services. The new API
no longer requires direct fiddling with the KVM API data structures and
just takes the required parameters. kvm_device_msix_set_vector also
combines MSI route creation/update with registering the vector with the
device assignment kernel part. The routing information is now stored in
the msix_cache of the backing QEMU PCI device, maintained by the device
assigment code until we switch to generic MSI-X support.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |  103 +++
 hw/device-assignment.h |1 -
 qemu-kvm.c |   42 +--
 qemu-kvm.h |   11 +++--
 4 files changed, 76 insertions(+), 81 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 83951a3..2484afd 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -648,15 +648,13 @@ again:
 
 static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs);
 
-static void free_dev_irq_entries(AssignedDevice *dev)
+static void invalidate_msix_vectors(AssignedDevice *dev)
 {
 int i;
 
-for (i = 0; i  dev-irq_entries_nr; i++)
-kvm_del_routing_entry(dev-entry[i]);
-g_free(dev-entry);
-dev-entry = NULL;
-dev-irq_entries_nr = 0;
+for (i = 0; i  dev-irq_entries_nr; i++) {
+kvm_msi_cache_invalidate(dev-dev.msix_cache[i]);
+}
 }
 
 static void free_assigned_device(AssignedDevice *dev)
@@ -701,12 +699,12 @@ static void free_assigned_device(AssignedDevice *dev)
 close(dev-real_device.config_fd);
 }
 
-free_dev_irq_entries(dev);
-
 if (dev-dev.msi_cache) {
 kvm_msi_cache_invalidate(dev-dev.msi_cache[0]);
 g_free(dev-dev.msi_cache);
 }
+invalidate_msix_vectors(dev);
+g_free(dev-dev.msix_cache);
 }
 
 static uint32_t calc_assigned_dev_id(AssignedDevice *dev)
@@ -953,11 +951,12 @@ static int assigned_dev_set_msix_vectors(PCIDevice 
*pci_dev)
 {
 AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
 uint16_t entries_nr = 0, entries_max_nr;
-int pos = 0, i, r = 0;
-uint32_t msg_addr, msg_upper_addr, msg_data;
-struct kvm_assigned_msix_nr msix_nr;
-struct kvm_assigned_msix_entry msix_entry;
 void *msix_page = adev-msix_table_page;
+uint32_t dev_id;
+MSIMessage msg;
+int pos, i, r;
+
+assert(adev-irq_entries_nr == 0);
 
 pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
@@ -980,72 +979,40 @@ static int assigned_dev_set_msix_vectors(PCIDevice 
*pci_dev)
 return -EINVAL;
 }
 
-msix_nr.assigned_dev_id = calc_assigned_dev_id(adev);
-msix_nr.entry_nr = entries_nr;
-r = kvm_assign_set_msix_nr(kvm_state, msix_nr);
-if (r != 0) {
-fprintf(stderr, fail to set MSI-X entry number for MSIX! %s\n,
-strerror(-r));
+dev_id = calc_assigned_dev_id(adev);
+
+r = kvm_device_msix_init_vectors(kvm_state, dev_id, entries_nr);
+if (r  0) {
 return r;
 }
-
-free_dev_irq_entries(adev);
+pci_dev-msix_cache = g_malloc0(entries_nr * sizeof(MSIRoutingCache));
 adev-irq_entries_nr = entries_nr;
-adev-entry = g_malloc0(entries_nr * sizeof(*(adev-entry)));
 
-msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
-entries_nr = 0;
 for (i = 0; i  entries_max_nr; i++) {
-if (entries_nr = msix_nr.entry_nr) {
+if (entries_nr == 0) {
 break;
 }
-msg_data = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
+msg.data = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
 PCI_MSIX_ENTRY_DATA);
-if (msg_data == 0) {
+if (msg.data == 0) {
 continue;
 }
-msg_addr = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
-PCI_MSIX_ENTRY_LOWER_ADDR);
-msg_upper_addr = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
-  PCI_MSIX_ENTRY_UPPER_ADDR);
+msg.address = pci_get_quad(msix_page + i * PCI_MSIX_ENTRY_SIZE +
+   PCI_MSIX_ENTRY_LOWER_ADDR);
 
-r = kvm_get_irq_route_gsi();
+r = kvm_device_msix_set_vector(kvm_state, dev_id, i, msg,
+   pci_dev-msix_cache[i]);
 if (r  0) {
 return r;
 }
-
-adev-entry[entries_nr].gsi = r;
-adev-entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
-adev-entry[entries_nr].flags = 0;
-adev-entry[entries_nr].u.msi.address_lo = msg_addr;
-adev-entry[entries_nr].u.msi.address_hi = msg_upper_addr;
-adev-entry[entries_nr].u.msi.data = msg_data;
-DEBUG(MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!, msg_data, msg_addr);
-kvm_add_routing_entry(adev-entry[entries_nr], NULL);
-
-

[Qemu-devel] [RFC][PATCH 25/45] qemu-kvm: Update MSI cache on kvm_msi_irqfd_set

2011-10-17 Thread Jan Kiszka

Updating the MSI message registration on kvm_msi_irqfd_set will allow us
to switch to a lazy mode and remove the need to track message changes in
the device config space.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/virtio-pci.c |   10 ++
 kvm.h   |3 ++-
 qemu-kvm.c  |   17 ++---
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 6718945..85d6771 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -521,10 +521,10 @@ static void virtio_pci_guest_notifier_read(void *opaque)
 }
 
 static int virtio_pci_mask_vq(PCIDevice *dev, unsigned int vector,
-  VirtQueue *vq, bool masked)
+  MSIMessage *msg, VirtQueue *vq, bool masked)
 {
 EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
-int r = kvm_msi_irqfd_set(dev-msix_cache[vector],
+int r = kvm_msi_irqfd_set(msg, dev-msix_cache[vector],
   event_notifier_get_fd(notifier),
   !masked);
 if (r  0) {
@@ -554,7 +554,8 @@ static int virtio_pci_msi_vector_config(PCIDevice *dev, 
unsigned int vector,
 if (virtio_queue_vector(vdev, n) != vector) {
 continue;
 }
-r = virtio_pci_mask_vq(dev, vector, virtio_get_queue(vdev, n), masked);
+r = virtio_pci_mask_vq(dev, vector, msg, virtio_get_queue(vdev, n),
+   masked);
 if (r  0) {
 goto undo;
 }
@@ -565,7 +566,8 @@ undo:
 if (virtio_queue_vector(vdev, n) != vector) {
 continue;
 }
-virtio_pci_mask_vq(dev, vector, virtio_get_queue(vdev, n), !masked);
+virtio_pci_mask_vq(dev, vector, msg, virtio_get_queue(vdev, n),
+   !masked);
 }
 return r;
 }
diff --git a/kvm.h b/kvm.h
index fe2eec5..8647647 100644
--- a/kvm.h
+++ b/kvm.h
@@ -208,7 +208,8 @@ int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache 
*cache);
 int kvm_msi_message_del(MSIRoutingCache *cache);
 int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache);
 
-int kvm_msi_irqfd_set(MSIRoutingCache *cache, int fd, bool assigned);
+int kvm_msi_irqfd_set(MSIMessage *msg, MSIRoutingCache *cache, int fd,
+  bool assigned);
 
 int kvm_commit_irq_routes(void);
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index ab7703b..6bdd7b5 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -524,10 +524,21 @@ int kvm_msi_message_update(MSIMessage *msg, 
MSIRoutingCache *cache)
 }
 
 
-int kvm_msi_irqfd_set(MSIRoutingCache *cache, int fd, bool assigned)
+int kvm_msi_irqfd_set(MSIMessage *msg, MSIRoutingCache *cache, int fd,
+  bool assigned)
 {
-if (cache-type == MSI_ROUTE_NONE) {
-return assigned ? -EINVAL : 0;
+int ret;
+
+if (assigned) {
+if (cache-type == MSI_ROUTE_NONE) {
+return -EINVAL;
+}
+ret = kvm_msi_message_update(msg, cache);
+if (ret  0) {
+return ret;
+}
+} else if (cache-type == MSI_ROUTE_NONE) {
+return 0;
 }
 cache-kvm_irqfd = assigned ? fd : -1;
 return kvm_set_irqfd(cache-kvm_gsi, fd, assigned);
-- 
1.7.3.4

Re: [Qemu-devel] [RFC][PATCH 11/45] msi: Factor out delivery hook

2011-10-17 Thread Avi Kivity

On 10/17/2011 11:27 AM, Jan Kiszka wrote:
 So far we deliver MSI messages by writing them into the target MMIO
 area. This reflects what happens on hardware, but imposes some
 limitations on the emulation when introducing KVM in-kernel irqchip
 models. For those we will need to track the message origin.

Why do we need to track the message origin?  Emulated interrupt remapping?

  Moreover,
 different architecture or accelerators may want to overload the delivery
 handler.

 Therefore, this commit introduces a delivery hook that is called by the
 MSI/MSI-X layer when devices send normal messages, but also on spurious
 deliveries that ended up on the APIC MMIO handler. Our default delivery
 handler for APIC-based PCs then dispatches between real MSIs and other
 DMA requests that happened to take the MSI patch.

'path'

  
 -static void apic_send_msi(target_phys_addr_t addr, uint32_t data)
 +void apic_deliver_msi(MSIMessage *msg)

In general, it is better these days to pass small structures by value.


Not sure what the gain is from intercepting the msi just before the
stl_phys() vs. in the apic handler.

-- 
error compiling committee.c: too many arguments to function

[Qemu-devel] [PATCH 1/2] hda: do not mix output and input streams, RHBZ #740493

2011-10-17 Thread Marc-André Lureau

Windows 7 may use the same stream number for input and output.
That will result in lot of garbage on playback.

The hardcoded value of 4 needs to be in sync with GCAP streams
description and IN/OUT registers.

Signed-off-by: Marc-André Lureau marcandre.lur...@redhat.com
---
 hw/intel-hda.c |9 +
 1 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index 4272204..c6a3fec 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -389,14 +389,15 @@ static bool intel_hda_xfer(HDACodecDevice *dev, uint32_t 
stnr, bool output,
 {
 HDACodecBus *bus = DO_UPCAST(HDACodecBus, qbus, dev-qdev.parent_bus);
 IntelHDAState *d = container_of(bus, IntelHDAState, codecs);
-IntelHDAStream *st = NULL;
 target_phys_addr_t addr;
 uint32_t s, copy, left;
+IntelHDAStream *st;
 bool irq = false;
 
-for (s = 0; s  ARRAY_SIZE(d-st); s++) {
-if (stnr == ((d-st[s].ctl  20)  0x0f)) {
-st = d-st + s;
+st = output ? d-st + 4 : d-st;
+for (s = 0; s  4; s++) {
+if (stnr == ((st[s].ctl  20)  0x0f)) {
+st = st + s;
 break;
 }
 }
-- 
1.7.6.2

[Qemu-devel] [PATCH 2/2] hda: do not mix output and input stream states, RHBZ #740493

2011-10-17 Thread Marc-André Lureau

Windows 7 may use the same stream number for input and output.
Current code will confuse streams.

Changes since v1:
- keep running_compat[] for migration version 1
- add running_real[] for migration version 2

Signed-off-by: Marc-André Lureau marcandre.lur...@redhat.com
---
 hw/hda-audio.c |   26 +++---
 hw/intel-hda.c |9 +
 hw/intel-hda.h |2 +-
 3 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/hw/hda-audio.c b/hw/hda-audio.c
index 03c0a24..a72b721 100644
--- a/hw/hda-audio.c
+++ b/hw/hda-audio.c
@@ -462,7 +462,8 @@ struct HDAAudioState {
 QEMUSoundCard card;
 const desc_codec *desc;
 HDAAudioStream st[4];
-bool running[16];
+bool running_compat[16];
+bool running_real[2 * 16];
 
 /* properties */
 uint32_t debug;
@@ -659,7 +660,7 @@ static void hda_audio_command(HDACodecDevice *hda, uint32_t 
nid, uint32_t data)
 st-channel = payload  0x0f;
 dprint(a, 2, %s: stream %d, channel %d\n,
st-node-name, st-stream, st-channel);
-hda_audio_set_running(st, a-running[st-stream]);
+hda_audio_set_running(st, a-running_real[st-output * 16 + 
st-stream]);
 hda_codec_response(hda, true, 0);
 break;
 case AC_VERB_GET_CONV:
@@ -742,16 +743,20 @@ fail:
 hda_codec_response(hda, true, 0);
 }
 
-static void hda_audio_stream(HDACodecDevice *hda, uint32_t stnr, bool running)
+static void hda_audio_stream(HDACodecDevice *hda, uint32_t stnr, bool running, 
bool output)
 {
 HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
 int s;
 
-a-running[stnr] = running;
+a-running_compat[stnr] = running;
+a-running_real[output * 16 + stnr] = running;
 for (s = 0; s  ARRAY_SIZE(a-st); s++) {
 if (a-st[s].node == NULL) {
 continue;
 }
+if (a-st[s].output != output) {
+continue;
+}
 if (a-st[s].stream != stnr) {
 continue;
 }
@@ -833,6 +838,12 @@ static int hda_audio_post_load(void *opaque, int version)
 int i;
 
 dprint(a, 1, %s\n, __FUNCTION__);
+if (version == 1) {
+/* assume running_compat[] is for output streams */
+for (i = 0; i  ARRAY_SIZE(a-running_compat); i++)
+a-running_real[16 + i] = a-running_compat[i];
+}
+
 for (i = 0; i  ARRAY_SIZE(a-st); i++) {
 st = a-st + i;
 if (st-node == NULL)
@@ -840,7 +851,7 @@ static int hda_audio_post_load(void *opaque, int version)
 hda_codec_parse_fmt(st-format, st-as);
 hda_audio_setup(st);
 hda_audio_set_amp(st);
-hda_audio_set_running(st, a-running[st-stream]);
+hda_audio_set_running(st, a-running_real[st-output * 16 + 
st-stream]);
 }
 return 0;
 }
@@ -864,13 +875,14 @@ static const VMStateDescription vmstate_hda_audio_stream 
= {
 
 static const VMStateDescription vmstate_hda_audio = {
 .name = hda-audio,
-.version_id = 1,
+.version_id = 2,
 .post_load = hda_audio_post_load,
 .fields = (VMStateField []) {
 VMSTATE_STRUCT_ARRAY(st, HDAAudioState, 4, 0,
  vmstate_hda_audio_stream,
  HDAAudioStream),
-VMSTATE_BOOL_ARRAY(running, HDAAudioState, 16),
+VMSTATE_BOOL_ARRAY(running_compat, HDAAudioState, 16),
+VMSTATE_BOOL_ARRAY_V(running_real, HDAAudioState, 2 * 16, 2),
 VMSTATE_END_OF_LIST()
 }
 };
diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index c6a3fec..f97775c 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -485,7 +485,7 @@ static void intel_hda_parse_bdl(IntelHDAState *d, 
IntelHDAStream *st)
 st-bp= 0;
 }
 
-static void intel_hda_notify_codecs(IntelHDAState *d, uint32_t stream, bool 
running)
+static void intel_hda_notify_codecs(IntelHDAState *d, uint32_t stream, bool 
running, bool output)
 {
 DeviceState *qdev;
 HDACodecDevice *cdev;
@@ -493,7 +493,7 @@ static void intel_hda_notify_codecs(IntelHDAState *d, 
uint32_t stream, bool runn
 QLIST_FOREACH(qdev, d-codecs.qbus.children, sibling) {
 cdev = DO_UPCAST(HDACodecDevice, qdev, qdev);
 if (cdev-info-stream) {
-cdev-info-stream(cdev, stream, running);
+cdev-info-stream(cdev, stream, running, output);
 }
 }
 }
@@ -567,6 +567,7 @@ static void intel_hda_set_ics(IntelHDAState *d, const 
IntelHDAReg *reg, uint32_t
 
 static void intel_hda_set_st_ctl(IntelHDAState *d, const IntelHDAReg *reg, 
uint32_t old)
 {
+bool output = reg-stream = 4;
 IntelHDAStream *st = d-st + reg-stream;
 
 if (st-ctl  0x01) {
@@ -582,11 +583,11 @@ static void intel_hda_set_st_ctl(IntelHDAState *d, const 
IntelHDAReg *reg, uint3
 dprint(d, 1, st #%d: start %d (ring buf %d bytes)\n,
reg-stream, stnr, st-cbl);
 intel_hda_parse_bdl(d, st);
-intel_hda_notify_codecs(d, stnr, true);
+intel_hda_notify_codecs(d, stnr, true, output);

[Qemu-devel] [RFC][PATCH 08/45] Introduce MSIMessage structure

2011-10-17 Thread Jan Kiszka

Will be used for generating and distributing MSI messages, both in
emulation mode and under KVM.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msi.h  |5 +
 qemu-common.h |1 +
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/hw/msi.h b/hw/msi.h
index e5e821f..22e3932 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -24,6 +24,11 @@
 #include qemu-common.h
 #include pci.h
 
+struct MSIMessage {
+uint64_t address;
+uint32_t data;
+};
+
 extern bool msi_supported;
 
 bool msi_enabled(const PCIDevice *dev);
diff --git a/qemu-common.h b/qemu-common.h
index 5e87bdf..d3901bd 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -15,6 +15,7 @@ typedef struct QEMUTimer QEMUTimer;
 typedef struct QEMUFile QEMUFile;
 typedef struct QEMUBH QEMUBH;
 typedef struct DeviceState DeviceState;
+typedef struct MSIMessage MSIMessage;
 
 struct Monitor;
 typedef struct Monitor Monitor;
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 03/45] msi: Use msi/msix_present more consistently

2011-10-17 Thread Jan Kiszka

Replace some open-coded msi/msix_present checks and drop redundant
msix_supported tests (present implies supported).

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msi.c  |2 +-
 hw/msix.c |   20 
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/hw/msi.c b/hw/msi.c
index 5db..b117f69 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -266,7 +266,7 @@ void msi_uninit(struct PCIDevice *dev)
 uint16_t flags;
 uint8_t cap_size;
 
-if (!(dev-cap_present  QEMU_PCI_CAP_MSI)) {
+if (!msi_present(dev)) {
 return;
 }
 flags = pci_get_word(dev-config + msi_flags_off(dev));
diff --git a/hw/msix.c b/hw/msix.c
index ebd5aee..2c4de21 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -383,8 +383,9 @@ static void msix_free_irq_entries(PCIDevice *dev)
 /* Clean up resources for the device. */
 int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
 {
-if (!(dev-cap_present  QEMU_PCI_CAP_MSIX))
+if (!msix_present(dev)) {
 return 0;
+}
 pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
 dev-msix_cap = 0;
 msix_free_irq_entries(dev);
@@ -405,11 +406,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
 {
 unsigned n = dev-msix_entries_nr;
 
-if (!msix_supported) {
-return;
-}
-
-if (!(dev-cap_present  QEMU_PCI_CAP_MSIX)) {
+if (!msix_present(dev)) {
 return;
 }
 qemu_put_buffer(f, dev-msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
@@ -421,10 +418,7 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
 {
 unsigned n = dev-msix_entries_nr;
 
-if (!msix_supported)
-return;
-
-if (!(dev-cap_present  QEMU_PCI_CAP_MSIX)) {
+if (!msix_present(dev)) {
 return;
 }
 
@@ -480,8 +474,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
 
 void msix_reset(PCIDevice *dev)
 {
-if (!(dev-cap_present  QEMU_PCI_CAP_MSIX))
+if (!msix_present(dev)) {
 return;
+}
 msix_free_irq_entries(dev);
 dev-config[dev-msix_cap + MSIX_CONTROL_OFFSET] =
~dev-wmask[dev-msix_cap + MSIX_CONTROL_OFFSET];
@@ -531,8 +526,9 @@ void msix_vector_unuse(PCIDevice *dev, unsigned vector)
 
 void msix_unuse_all_vectors(PCIDevice *dev)
 {
-if (!(dev-cap_present  QEMU_PCI_CAP_MSIX))
+if (!msix_present(dev)) {
 return;
+}
 msix_free_irq_entries(dev);
 }
 
-- 
1.7.3.4

[Qemu-devel] [RFC][PATCH 45/45] pci-assign: Fix coding style issues

2011-10-17 Thread Jan Kiszka

Also remove the dead get_assigned_device at this chance. No functional
changes.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |  199 
 hw/device-assignment.h |   14 ++--
 2 files changed, 107 insertions(+), 106 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index df554b3..c7930e4 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -58,10 +58,10 @@
 #ifdef DEVICE_ASSIGNMENT_DEBUG
 #define DEBUG(fmt, ...)   \
 do {  \
-  fprintf(stderr, %s:  fmt, __func__ , __VA_ARGS__);\
+fprintf(stderr, %s:  fmt, __func__ , __VA_ARGS__);  \
 } while (0)
 #else
-#define DEBUG(fmt, ...) do { } while(0)
+#define DEBUG(fmt, ...) do { } while (0)
 #endif
 
 static void assigned_dev_load_option_rom(AssignedDevice *dev);
@@ -97,27 +97,27 @@ static uint32_t assigned_dev_ioport_rw(AssignedDevRegion 
*dev_region,
 DEBUG(out val=%x, len=%d, e_phys=%x, host=%x\n,
   *val, len, addr, port);
 switch (len) {
-case 1:
-outb(*val, port);
-break;
-case 2:
-outw(*val, port);
-break;
-case 4:
-outl(*val, port);
-break;
+case 1:
+outb(*val, port);
+break;
+case 2:
+outw(*val, port);
+break;
+case 4:
+outl(*val, port);
+break;
 }
 } else {
 switch (len) {
-case 1:
-ret = inb(port);
-break;
-case 2:
-ret = inw(port);
-break;
-case 4:
-ret = inl(port);
-break;
+case 1:
+ret = inb(port);
+break;
+case 2:
+ret = inw(port);
+break;
+case 4:
+ret = inl(port);
+break;
 }
 DEBUG(in val=%x, len=%d, e_phys=%x, host=%x\n,
   ret, len, addr, port);
@@ -130,21 +130,18 @@ static void assigned_dev_ioport_writeb(void *opaque, 
uint32_t addr,
uint32_t value)
 {
 assigned_dev_ioport_rw(opaque, addr, 1, value);
-return;
 }
 
 static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
uint32_t value)
 {
 assigned_dev_ioport_rw(opaque, addr, 2, value);
-return;
 }
 
 static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
uint32_t value)
 {
 assigned_dev_ioport_rw(opaque, addr, 4, value);
-return;
 }
 
 static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
@@ -295,13 +292,13 @@ static uint32_t assigned_dev_pci_read(PCIDevice *d, int 
pos, int len)
 again:
 ret = pread(fd, val, len, pos);
 if (ret != len) {
-   if ((ret  0)  (errno == EINTR || errno == EAGAIN))
-   goto again;
-
-   fprintf(stderr, %s: pread failed, ret = %zd errno = %d\n,
-   __func__, ret, errno);
+if ((ret  0)  (errno == EINTR || errno == EAGAIN)) {
+goto again;
+}
+fprintf(stderr, %s: pread failed, ret = %zd errno = %d\n,
+__func__, ret, errno);
 
-   exit(1);
+exit(1);
 }
 
 return val;
@@ -321,16 +318,14 @@ static void assigned_dev_pci_write(PCIDevice *d, int pos, 
uint32_t val, int len)
 again:
 ret = pwrite(fd, val, len, pos);
 if (ret != len) {
-   if ((ret  0)  (errno == EINTR || errno == EAGAIN))
-   goto again;
-
-   fprintf(stderr, %s: pwrite failed, ret = %zd errno = %d\n,
-   __func__, ret, errno);
+if ((ret  0)  (errno == EINTR || errno == EAGAIN)) {
+goto again;
+}
+fprintf(stderr, %s: pwrite failed, ret = %zd errno = %d\n,
+__func__, ret, errno);
 
-   exit(1);
+exit(1);
 }
-
-return;
 }
 
 static void assigned_dev_emulate_config_read(AssignedDevice *dev,
@@ -359,22 +354,24 @@ static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t 
cap, uint8_t start)
 int status;
 
 status = assigned_dev_pci_read_byte(d, PCI_STATUS);
-if ((status  PCI_STATUS_CAP_LIST) == 0)
+if ((status  PCI_STATUS_CAP_LIST) == 0) {
 return 0;
+}
 
 while (max_cap--) {
 pos = assigned_dev_pci_read_byte(d, pos);
-if (pos  0x40)
+if (pos  0x40) {
 break;
-
+}
 pos = ~3;
 id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID);
 
-if (id == 0xff)
+if (id == 0xff) {
 break;
-if (id

Re: [Qemu-devel] [RFC][PATCH 12/45] msi: Introduce MSIRoutingCache

2011-10-17 Thread Avi Kivity

On 10/17/2011 11:27 AM, Jan Kiszka wrote:
 This cache will help us implementing KVM in-kernel irqchip support
 without spreading hooks all over the place.

 KVM requires us to register it first and then deliver it by raising a
 pseudo IRQ line returned on registration. While this could be changed
 for QEMU-originated MSI messages by adding direct MSI injection, we will
 still need this translation for irqfd-originated messages. The
 MSIRoutingCache will allow to track those registrations and update them
 lazily before the actual delivery. This avoid having to track MSI
 vectors at device level (like qemu-kvm currently does).


 +typedef enum {
 +MSI_ROUTE_NONE = 0,
 +MSI_ROUTE_STATIC,
 +} MSIRouteType;
 +
 +struct MSIRoutingCache {
 +MSIMessage msg;
 +MSIRouteType type;
 +int kvm_gsi;
 +int kvm_irqfd;
 +};
 +
 diff --git a/hw/pci.h b/hw/pci.h
 index 329ab32..5b5d2fd 100644
 --- a/hw/pci.h
 +++ b/hw/pci.h
 @@ -197,6 +197,10 @@ struct PCIDevice {
  MemoryRegion rom;
  uint32_t rom_bar;
  
 +/* MSI routing chaches */
 +MSIRoutingCache *msi_cache;
 +MSIRoutingCache *msix_cache;
 +
  /* MSI entries */
  int msi_entries_nr;
  struct KVMMsiMessage *msi_irq_entries;

IMO this needlessly leaks kvm information into core qemu.  The cache
should be completely hidden in kvm code.

I think msi_deliver() can hide the use of the cache completely.  For
pre-registered events like kvm's irqfd, you can use something like

  qemu_irq qemu_msi_irq(MSIMessage msg)

for non-kvm, it simply returns a qemu_irq that triggers a stl_phys();
for kvm, it allocates an irqfd and a permanent entry in the cache and
returns a qemu_irq that triggers the irqfd.

-- 
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] GPLv3 troubles

2011-10-17 Thread Andreas Färber

Am 17.10.2011 12:47, schrieb Paolo Bonzini:
 On 10/17/2011 12:45 PM, Andreas Färber wrote:
 Could we please draft some policy on this? This is not a GDB issue, it's
 very general. Whether we like it or not, there is GPLv3-licensed code
 and there will probably be a GPLv4 one day.

 IMO having old GPLv2-only code is one thing. But there's a lot of new
 GPLv2-only code cooking and occasionally pouring in, especially from
 qemu-kvm. Device assignment is a current example I encountered.

 If we could make checkpatch.pl detect new GPLv2-only code, then I would
 hope, given the dynamic QEMU development of the last few years, that the
 GPLv2-only portions become so small (both in relation and absolute) that
 they can either be replaced or the authors' permission be obtained to
 change the license to GPLv2-or-later.
 
 That is close to impossible, you usually ask permission for all the
 authors in the history to avoid bigger problems.

I did refer to authors in history, in case that was unclear.

I was thinking of how much code we rewrote for TCG, qdev, etc. In the
end it'll depend on which files are affected, and I don't have a list -
hard to grep due to varying formulations and line breaks.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746, AG Nürnberg

Re: [Qemu-devel] [RFC][PATCH 06/45] msix: Prevent bogus mask updates on MMIO accesses

2011-10-17 Thread Michael S. Tsirkin

On Mon, Oct 17, 2011 at 11:27:40AM +0200, Jan Kiszka wrote:
 Only accesses to the MSI-X table must trigger a call to
 msix_handle_mask_update or a notifier invocation.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com

Why would msix_mmio_write be called on an access
outside the table?

 ---
  hw/msix.c |   16 ++--
  1 files changed, 10 insertions(+), 6 deletions(-)
 
 diff --git a/hw/msix.c b/hw/msix.c
 index 2c4de21..33cb716 100644
 --- a/hw/msix.c
 +++ b/hw/msix.c
 @@ -264,18 +264,22 @@ static void msix_mmio_write(void *opaque, 
 target_phys_addr_t addr,
  {
  PCIDevice *dev = opaque;
  unsigned int offset = addr  (MSIX_PAGE_SIZE - 1)  ~0x3;
 -int vector = offset / PCI_MSIX_ENTRY_SIZE;
 +unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE;

Why the int/unsigned change? this has no chance to overflow, and using
unsigned causes signed/unsigned comparison below,
and unsigned/signed conversion on calls such as msix_is_masked.

  int was_masked = msix_is_masked(dev, vector);
  pci_set_long(dev-msix_table_page + offset, val);
  if (kvm_enabled()  kvm_irqchip_in_kernel()) {
  kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, 
 vector));
  }

I would say if we need to check the address, check it first thing
and return if the address is out of a sensible range.
For example, are you worried about kvm_msix_update calls with
a sensible mask?

 -if (was_masked != msix_is_masked(dev, vector)  
 dev-msix_mask_notifier) {
 -int r = dev-msix_mask_notifier(dev, vector,
 - msix_is_masked(dev, vector));
 -assert(r = 0);
 +
 +if (vector  dev-msix_entries_nr) {
 +if (was_masked != msix_is_masked(dev, vector) 
 +dev-msix_mask_notifier) {
 +int r = dev-msix_mask_notifier(dev, vector,
 +msix_is_masked(dev, vector));
 +assert(r = 0);
 +}
 +msix_handle_mask_update(dev, vector);
  }
 -msix_handle_mask_update(dev, vector);
  }
  
  static const MemoryRegionOps msix_mmio_ops = {
 -- 
 1.7.3.4

Re: [Qemu-devel] GPLv3 troubles

2011-10-17 Thread Paolo Bonzini


On 10/17/2011 01:07 PM, Andreas Färber wrote:

  That is close to impossible, you usually ask permission for all the
  authors in the history to avoid bigger problems.

I did refer to authors in history, in case that was unclear.


Authors in history (unlike authors in git blame, but you cannot trust 
that) almost never disappear, no matter how much you rewrite.  Even 
dyngen-TCG kept a lot of the target-* code unchanged.


Making a list of GPLv2 files would be a start, though.

Paolo

[Qemu-devel] [RFC][PATCH 24/45] qemu-kvm: msix: Don't handle mask updated while disabled

2011-10-17 Thread Jan Kiszka

As long as MSI-X is disabled, it's incorrect to invoke
msix_handle_mask_update on per-vector mask changes. That may misguide
the config notifier callback or spuriously trigger an MSI event.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msix.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 176bc76..7d45760 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -292,7 +292,7 @@ static void msix_mmio_write(void *opaque, 
target_phys_addr_t addr,
 kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
 }
 
-if (vector  dev-msix_entries_nr) {
+if (msix_enabled(dev)  vector  dev-msix_entries_nr) {
 is_masked = msix_is_masked(dev, vector);
 if (was_masked != is_masked) {
 msix_handle_mask_update(dev, vector);
-- 
1.7.3.4

[Qemu-devel] [PATCH v2 1/2] ioapic: Convert to memory API

2011-10-17 Thread Jan Kiszka

This maintains the old imprecise access size handling.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

Changes in v2:
 - use new-style handlers

 hw/ioapic.c |   28 +++-
 1 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/hw/ioapic.c b/hw/ioapic.c
index 61991d7..56b1612 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -86,6 +86,7 @@ typedef struct IOAPICState IOAPICState;
 
 struct IOAPICState {
 SysBusDevice busdev;
+MemoryRegion io_memory;
 uint8_t id;
 uint8_t ioregsel;
 uint32_t irr;
@@ -195,7 +196,8 @@ void ioapic_eoi_broadcast(int vector)
 }
 }
 
-static uint32_t ioapic_mem_readl(void *opaque, target_phys_addr_t addr)
+static uint64_t
+ioapic_mem_read(void *opaque, target_phys_addr_t addr, unsigned int size)
 {
 IOAPICState *s = opaque;
 int index;
@@ -234,7 +236,8 @@ static uint32_t ioapic_mem_readl(void *opaque, 
target_phys_addr_t addr)
 }
 
 static void
-ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+ioapic_mem_write(void *opaque, target_phys_addr_t addr, uint64_t val,
+ unsigned int size)
 {
 IOAPICState *s = opaque;
 int index;
@@ -309,32 +312,23 @@ static void ioapic_reset(DeviceState *d)
 }
 }
 
-static CPUReadMemoryFunc * const ioapic_mem_read[3] = {
-ioapic_mem_readl,
-ioapic_mem_readl,
-ioapic_mem_readl,
-};
-
-static CPUWriteMemoryFunc * const ioapic_mem_write[3] = {
-ioapic_mem_writel,
-ioapic_mem_writel,
-ioapic_mem_writel,
+static const MemoryRegionOps ioapic_io_ops = {
+.read = ioapic_mem_read,
+.write = ioapic_mem_write,
+.endianness = DEVICE_NATIVE_ENDIAN,
 };
 
 static int ioapic_init1(SysBusDevice *dev)
 {
 IOAPICState *s = FROM_SYSBUS(IOAPICState, dev);
-int io_memory;
 static int ioapic_no;
 
 if (ioapic_no = MAX_IOAPICS) {
 return -1;
 }
 
-io_memory = cpu_register_io_memory(ioapic_mem_read,
-   ioapic_mem_write, s,
-   DEVICE_NATIVE_ENDIAN);
-sysbus_init_mmio(dev, 0x1000, io_memory);
+memory_region_init_io(s-io_memory, ioapic_io_ops, s, ioapic, 0x1000);
+sysbus_init_mmio_region(dev, s-io_memory);
 
 qdev_init_gpio_in(dev-qdev, ioapic_set_irq, IOAPIC_NUM_PINS);
 
-- 
1.7.3.4

[Qemu-devel] [PATCH 2/2] ioapic: Reject non-dword accesses to IOWIN register

2011-10-17 Thread Jan Kiszka

Aligns the model with the spec.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/ioapic.c |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/hw/ioapic.c b/hw/ioapic.c
index 56b1612..eb75766 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -208,6 +208,9 @@ ioapic_mem_read(void *opaque, target_phys_addr_t addr, 
unsigned int size)
 val = s-ioregsel;
 break;
 case IOAPIC_IOWIN:
+if (size != 4) {
+break;
+}
 switch (s-ioregsel) {
 case IOAPIC_REG_ID:
 val = s-id  IOAPIC_ID_SHIFT;
@@ -247,6 +250,9 @@ ioapic_mem_write(void *opaque, target_phys_addr_t addr, 
uint64_t val,
 s-ioregsel = val;
 break;
 case IOAPIC_IOWIN:
+if (size != 4) {
+break;
+}
 DPRINTF(write: %08x = %08x\n, s-ioregsel, val);
 switch (s-ioregsel) {
 case IOAPIC_REG_ID:
-- 
1.7.3.4

Re: [Qemu-devel] [RFC][PATCH 17/45] qemu-kvm: Track MSIRoutingCache in KVM routing table

2011-10-17 Thread Avi Kivity

On 10/17/2011 11:27 AM, Jan Kiszka wrote:
 Keep a link from the internal KVM routing table to potential MSI routing
 cache entries. The link is used so far whenever the entry is dropped to
 invalidate the cache content. It will allow us to build MSI routing
 entries on demand and flush existing ones on table overflow.


Does this not require a destructor for MSIRoutingCache?

-- 
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [RFC][PATCH 11/45] msi: Factor out delivery hook

2011-10-17 Thread Jan Kiszka

On 2011-10-17 12:56, Avi Kivity wrote:
 On 10/17/2011 11:27 AM, Jan Kiszka wrote:
 So far we deliver MSI messages by writing them into the target MMIO
 area. This reflects what happens on hardware, but imposes some
 limitations on the emulation when introducing KVM in-kernel irqchip
 models. For those we will need to track the message origin.
 
 Why do we need to track the message origin?  Emulated interrupt remapping?

The origin holds the routing cache which we need to track if the message
already has a route (and that without searching long lists) and to
update that route instead of add another one.

 
  Moreover,
 different architecture or accelerators may want to overload the delivery
 handler.

 Therefore, this commit introduces a delivery hook that is called by the
 MSI/MSI-X layer when devices send normal messages, but also on spurious
 deliveries that ended up on the APIC MMIO handler. Our default delivery
 handler for APIC-based PCs then dispatches between real MSIs and other
 DMA requests that happened to take the MSI patch.
 
 'path'
 
  
 -static void apic_send_msi(target_phys_addr_t addr, uint32_t data)
 +void apic_deliver_msi(MSIMessage *msg)
 
 In general, it is better these days to pass small structures by value.

OK, will adjust this.

 
 
 Not sure what the gain is from intercepting the msi just before the
 stl_phys() vs. in the apic handler.

APIC is x86-specific, MSI is not. I think Xen will also want to make use
of this hook. I originally though of using it for the KVM in-kernel
models as well, but I will now establish a callback at APIC-level
(upstream will look differently from qemu-kvm in this regard).

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

Re: [Qemu-devel] [PATCH] arm gic saving/loading fix

2011-10-17 Thread Peter Maydell

On 17 October 2011 11:54, Andreas Färber afaer...@suse.de wrote:
 Am 17.10.2011 12:48, schrieb Dmitry Koshelev:
 irq_target field saving/loading is in the wrong loop

 Signed-off-by: Dmitry Koshelev karagio...@gmail.com

 Reviewed-by: Andreas Färber afaer...@suse.de

Doesn't it need a vmstate version bump too?

-- PMM

 Andreas

 ---
  hw/arm_gic.c |   12 ++--
  1 files changed, 6 insertions(+), 6 deletions(-)

 diff --git a/hw/arm_gic.c b/hw/arm_gic.c
 index 8286a28..ba05131 100644
 --- a/hw/arm_gic.c
 +++ b/hw/arm_gic.c
 @@ -662,9 +662,6 @@ static void gic_save(QEMUFile *f, void *opaque)
      qemu_put_be32(f, s-enabled);
      for (i = 0; i  NUM_CPU(s); i++) {
          qemu_put_be32(f, s-cpu_enabled[i]);
 -#ifndef NVIC
 -        qemu_put_be32(f, s-irq_target[i]);
 -#endif
          for (j = 0; j  32; j++)
              qemu_put_be32(f, s-priority1[j][i]);
          for (j = 0; j  GIC_NIRQ; j++)
 @@ -678,6 +675,9 @@ static void gic_save(QEMUFile *f, void *opaque)
          qemu_put_be32(f, s-priority2[i]);
      }
      for (i = 0; i  GIC_NIRQ; i++) {
 +#ifndef NVIC
 +        qemu_put_be32(f, s-irq_target[i]);
 +#endif
          qemu_put_byte(f, s-irq_state[i].enabled);
          qemu_put_byte(f, s-irq_state[i].pending);
          qemu_put_byte(f, s-irq_state[i].active);
 @@ -699,9 +699,6 @@ static int gic_load(QEMUFile *f, void *opaque, int
 version_id)
      s-enabled = qemu_get_be32(f);
      for (i = 0; i  NUM_CPU(s); i++) {
          s-cpu_enabled[i] = qemu_get_be32(f);
 -#ifndef NVIC
 -        s-irq_target[i] = qemu_get_be32(f);
 -#endif
          for (j = 0; j  32; j++)
              s-priority1[j][i] = qemu_get_be32(f);
          for (j = 0; j  GIC_NIRQ; j++)
 @@ -715,6 +712,9 @@ static int gic_load(QEMUFile *f, void *opaque, int
 version_id)
          s-priority2[i] = qemu_get_be32(f);
      }
      for (i = 0; i  GIC_NIRQ; i++) {
 +#ifndef NVIC
 +        s-irq_target[i] = qemu_get_be32(f);
 +#endif
          s-irq_state[i].enabled = qemu_get_byte(f);
          s-irq_state[i].pending = qemu_get_byte(f);
          s-irq_state[i].active = qemu_get_byte(f);


 --
 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
 GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746, AG Nürnberg




-- 
12345678901234567890123456789012345678901234567890123456789012345678901234567890
         1         2         3         4         5         6         7         8

[Qemu-devel] [RFC][PATCH 16/45] qemu-kvm: Use MSIMessage and MSIRoutingCache

2011-10-17 Thread Jan Kiszka

Start benefiting from the new abstractions and drop the KVM-specific
vector tracking to generic MSIMessage and MSIRoutingCache data
structures and helpers, also reducing the diff to upstream.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/msi.c|   49 +++--
 hw/msix.c   |   37 +
 hw/pci.h|4 
 hw/virtio-pci.c |3 ++-
 kvm-stub.c  |6 +++---
 kvm.h   |   13 +++--
 qemu-kvm.c  |   46 +-
 7 files changed, 57 insertions(+), 101 deletions(-)

diff --git a/hw/msi.c b/hw/msi.c
index c8ccb17..b947104 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -140,49 +140,29 @@ static void msi_message_from_vector(PCIDevice *dev, 
uint16_t msi_flags,
 }
 }
 
-static void kvm_msi_message_from_vector(PCIDevice *dev, unsigned vector,
-KVMMsiMessage *kmm)
-{
-uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
-bool msi64bit = flags  PCI_MSI_FLAGS_64BIT;
-unsigned int nr_vectors = msi_nr_vectors(flags);
-
-kmm-addr_lo = pci_get_long(dev-config + msi_address_lo_off(dev));
-if (msi64bit) {
-kmm-addr_hi = pci_get_long(dev-config + msi_address_hi_off(dev));
-} else {
-kmm-addr_hi = 0;
-}
-
-kmm-data = pci_get_word(dev-config + msi_data_off(dev, msi64bit));
-if (nr_vectors  1) {
-kmm-data = ~(nr_vectors - 1);
-kmm-data |= vector;
-}
-}
-
 static void kvm_msi_update(PCIDevice *dev)
 {
 uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
 unsigned int max_vectors = 1 
 ((flags  PCI_MSI_FLAGS_QMASK)  (ffs(PCI_MSI_FLAGS_QMASK) - 1));
 unsigned int nr_vectors = msi_nr_vectors(flags);
-KVMMsiMessage new_entry, *entry;
+MSIRoutingCache *cache;
 bool changed = false;
 unsigned int vector;
+MSIMessage msg;
 int r;
 
 for (vector = 0; vector  max_vectors; vector++) {
-entry = dev-msi_irq_entries + vector;
+cache = dev-msi_cache[vector];
 
 if (vector = nr_vectors) {
 if (vector  dev-msi_entries_nr) {
-kvm_msi_message_del(entry);
+kvm_msi_message_del(cache);
 changed = true;
 }
 } else if (vector = dev-msi_entries_nr) {
-kvm_msi_message_from_vector(dev, vector, entry);
-r = kvm_msi_message_add(entry);
+msi_message_from_vector(dev, flags, vector, msg);
+r = kvm_msi_message_add(msg, cache);
 if (r) {
 fprintf(stderr, %s: kvm_msi_add failed: %s\n, __func__,
 strerror(-r));
@@ -190,15 +170,14 @@ static void kvm_msi_update(PCIDevice *dev)
 }
 changed = true;
 } else {
-kvm_msi_message_from_vector(dev, vector, new_entry);
-r = kvm_msi_message_update(entry, new_entry);
+msi_message_from_vector(dev, flags, vector, msg);
+r = kvm_msi_message_update(msg, cache);
 if (r  0) {
 fprintf(stderr, %s: kvm_update_msi failed: %s\n,
 __func__, strerror(-r));
 exit(1);
 }
 if (r  0) {
-*entry = new_entry;
 changed = true;
 }
 }
@@ -220,7 +199,7 @@ static void kvm_msi_free(PCIDevice *dev)
 unsigned int vector;
 
 for (vector = 0; vector  dev-msi_entries_nr; ++vector) {
-kvm_msi_message_del(dev-msi_irq_entries[vector]);
+kvm_msi_message_del(dev-msi_cache[vector]);
 }
 if (dev-msi_entries_nr  0) {
 kvm_commit_irq_routes();
@@ -290,11 +269,6 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
 
 dev-msi_cache = g_malloc0(nr_vectors * sizeof(*dev-msi_cache));
 
-if (kvm_enabled()  kvm_irqchip_in_kernel()) {
-dev-msi_irq_entries = g_malloc(nr_vectors *
-sizeof(*dev-msix_irq_entries));
-}
-
 return config_offset;
 }
 
@@ -311,7 +285,6 @@ void msi_uninit(struct PCIDevice *dev)
 
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
 kvm_msi_free(dev);
-g_free(dev-msi_irq_entries);
 }
 
 g_free(dev-msi_cache);
@@ -383,7 +356,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
 }
 
 if (kvm_enabled()  kvm_irqchip_in_kernel()) {
-kvm_set_irq(dev-msi_irq_entries[vector].gsi, 1, NULL);
+kvm_set_irq(dev-msi_cache[vector].kvm_gsi, 1, NULL);
 return;
 }
 
@@ -504,7 +477,7 @@ void msi_post_load(PCIDevice *dev)
 {
 uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
 
-if (kvm_enabled()  dev-msi_irq_entries) {
+if (kvm_enabled()  kvm_irqchip_in_kernel()) {
 kvm_msi_free(dev);
 
 if (flags  PCI_MSI_FLAGS_ENABLE) {
diff --git a/hw/msix.c b/hw/msix.c
index e824aef..0be022e 100644
--- a/hw/msix.c

[Qemu-devel] [RFC][PATCH 35/45] pci-assign: Polish assigned_dev_update_msix_mmio

2011-10-17 Thread Jan Kiszka

- rename to assigned_dev_set_msix_vectors
- drop unused msg_ctrl
- use pci_get_* accessors
- rename variable va to msix_page
- clarify comment on msg_data == 0 optimization
- fix coding style

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 hw/device-assignment.c |   53 ++-
 1 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 7a8f702..83951a3 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -949,42 +949,43 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
 }
 }
 
-static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev)
 {
 AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
 uint16_t entries_nr = 0, entries_max_nr;
 int pos = 0, i, r = 0;
-uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+uint32_t msg_addr, msg_upper_addr, msg_data;
 struct kvm_assigned_msix_nr msix_nr;
 struct kvm_assigned_msix_entry msix_entry;
-void *va = adev-msix_table_page;
+void *msix_page = adev-msix_table_page;
 
 pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
-entries_max_nr = *(uint16_t *)(pci_dev-config + pos + 2);
+entries_max_nr = pci_get_word(pci_dev-config + pos + PCI_MSIX_FLAGS);
 entries_max_nr = PCI_MSIX_FLAGS_QSIZE;
 entries_max_nr += 1;
 
 /* Get the usable entry number for allocating */
 for (i = 0; i  entries_max_nr; i++) {
-memcpy(msg_ctrl, va + i * 16 + 12, 4);
-memcpy(msg_data, va + i * 16 + 8, 4);
-/* Ignore unused entry even it's unmasked */
-if (msg_data == 0)
+/* Assuming IA-32 MSI message format:
+ * Ignore unused entry (invalid vector) */
+if (pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_DATA) == 0) {
 continue;
-entries_nr ++;
+}
+entries_nr++;
 }
-
 if (entries_nr == 0) {
 fprintf(stderr, MSI-X entry number is zero!\n);
 return -EINVAL;
 }
+
 msix_nr.assigned_dev_id = calc_assigned_dev_id(adev);
 msix_nr.entry_nr = entries_nr;
 r = kvm_assign_set_msix_nr(kvm_state, msix_nr);
 if (r != 0) {
 fprintf(stderr, fail to set MSI-X entry number for MSIX! %s\n,
-   strerror(-r));
+strerror(-r));
 return r;
 }
 
@@ -995,19 +996,23 @@ static int assigned_dev_update_msix_mmio(PCIDevice 
*pci_dev)
 msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
 entries_nr = 0;
 for (i = 0; i  entries_max_nr; i++) {
-if (entries_nr = msix_nr.entry_nr)
+if (entries_nr = msix_nr.entry_nr) {
 break;
-memcpy(msg_ctrl, va + i * 16 + 12, 4);
-memcpy(msg_data, va + i * 16 + 8, 4);
-if (msg_data == 0)
+}
+msg_data = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
+PCI_MSIX_ENTRY_DATA);
+if (msg_data == 0) {
 continue;
-
-memcpy(msg_addr, va + i * 16, 4);
-memcpy(msg_upper_addr, va + i * 16 + 4, 4);
+}
+msg_addr = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
+PCI_MSIX_ENTRY_LOWER_ADDR);
+msg_upper_addr = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
+  PCI_MSIX_ENTRY_UPPER_ADDR);
 
 r = kvm_get_irq_route_gsi();
-if (r  0)
+if (r  0) {
 return r;
+}
 
 adev-entry[entries_nr].gsi = r;
 adev-entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
@@ -1026,13 +1031,13 @@ static int assigned_dev_update_msix_mmio(PCIDevice 
*pci_dev)
 break;
 }
 DEBUG(MSI-X entry gsi 0x%x, entry %d\n!,
-msix_entry.gsi, msix_entry.entry);
-entries_nr ++;
+  msix_entry.gsi, msix_entry.entry);
+entries_nr++;
 }
 
 if (r == 0  kvm_commit_irq_routes()  0) {
-   perror(assigned_dev_update_msix_mmio: kvm_commit_irq_routes);
-   return -EINVAL;
+perror(assigned_dev_update_msix_mmio: kvm_commit_irq_routes);
+return -EINVAL;
 }
 
 return r;
@@ -1070,7 +1075,7 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
   KVM_DEV_IRQ_GUEST_MSIX;
 
-if (assigned_dev_update_msix_mmio(pci_dev)  0) {
+if (assigned_dev_set_msix_vectors(pci_dev)  0) {
 perror(assigned_dev_update_msix_mmio);
 return;
 }
-- 
1.7.3.4

Re: [Qemu-devel] [PATCH] arm cpu state loading fix

2011-10-17 Thread Peter Maydell

On 17 October 2011 11:53, Dmitry Koshelev karaghio...@gmail.com wrote:
 Floating registers loading fix.

 Signed-off-by: Dmitry Koshelev karaghio...@gmail.com
 ---
  target-arm/machine.c |    2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

 diff --git a/target-arm/machine.c b/target-arm/machine.c
 index 3925d3a..73d82c9 100644
 --- a/target-arm/machine.c
 +++ b/target-arm/machine.c
 @@ -175,7 +175,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
         env-vfp.vec_stride = qemu_get_be32(f);

         if (arm_feature(env, ARM_FEATURE_VFP3)) {
 -            for (i = 0;  i  16; i++) {
 +            for (i = 16;  i  32; i++) {
                 CPU_DoubleU u;
                 u.l.upper = qemu_get_be32(f);
                 u.l.lower = qemu_get_be32(f);

Reviewed-by: Peter Maydell peter.mayd...@linaro.org

-- PMM

Re: [Qemu-devel] [RFC][PATCH 12/45] msi: Introduce MSIRoutingCache

2011-10-17 Thread Jan Kiszka

On 2011-10-17 13:06, Avi Kivity wrote:
 On 10/17/2011 11:27 AM, Jan Kiszka wrote:
 This cache will help us implementing KVM in-kernel irqchip support
 without spreading hooks all over the place.

 KVM requires us to register it first and then deliver it by raising a
 pseudo IRQ line returned on registration. While this could be changed
 for QEMU-originated MSI messages by adding direct MSI injection, we will
 still need this translation for irqfd-originated messages. The
 MSIRoutingCache will allow to track those registrations and update them
 lazily before the actual delivery. This avoid having to track MSI
 vectors at device level (like qemu-kvm currently does).


 +typedef enum {
 +MSI_ROUTE_NONE = 0,
 +MSI_ROUTE_STATIC,
 +} MSIRouteType;
 +
 +struct MSIRoutingCache {
 +MSIMessage msg;
 +MSIRouteType type;
 +int kvm_gsi;
 +int kvm_irqfd;
 +};
 +
 diff --git a/hw/pci.h b/hw/pci.h
 index 329ab32..5b5d2fd 100644
 --- a/hw/pci.h
 +++ b/hw/pci.h
 @@ -197,6 +197,10 @@ struct PCIDevice {
  MemoryRegion rom;
  uint32_t rom_bar;
  
 +/* MSI routing chaches */
 +MSIRoutingCache *msi_cache;
 +MSIRoutingCache *msix_cache;
 +
  /* MSI entries */
  int msi_entries_nr;
  struct KVMMsiMessage *msi_irq_entries;
 
 IMO this needlessly leaks kvm information into core qemu.  The cache
 should be completely hidden in kvm code.
 
 I think msi_deliver() can hide the use of the cache completely.  For
 pre-registered events like kvm's irqfd, you can use something like
 
   qemu_irq qemu_msi_irq(MSIMessage msg)
 
 for non-kvm, it simply returns a qemu_irq that triggers a stl_phys();
 for kvm, it allocates an irqfd and a permanent entry in the cache and
 returns a qemu_irq that triggers the irqfd.

See my previously mail: you want to track the life-cycle of an MSI
source to avoid generating routes for identical sources. A messages is
not a source. Two identical messages can come from different sources. So
we need a separate data structure for that purpose.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

1 2 3 4 >

1 - 100 of 327 matches

Mail list logo