[Qemu-devel] [PATCH] cloop.c: use gfree,instead of free
Use gfree, to pair with g_malloc. Also fix coding style. Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com --- block/cloop.c | 114 +++-- 1 files changed, 62 insertions(+), 52 deletions(-) diff --git a/block/cloop.c b/block/cloop.c index 8cff9f2..708093e 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -39,21 +39,23 @@ typedef struct BDRVCloopState { static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename) { -const char* magic_version_2_0=#!/bin/sh\n - #V2.0 Format\n - modprobe cloop file=$0 mount -r -t iso9660 /dev/cloop $1\n; -int length=strlen(magic_version_2_0); -if(lengthbuf_size) - length=buf_size; -if(!memcmp(magic_version_2_0,buf,length)) - return 2; +const char *magic_version_2_0 = #!/bin/sh\n +#V2.0 Format\n +modprobe cloop file=$0 mount -r -t iso9660 /dev/cloop $1\n; +int length = strlen(magic_version_2_0); +if (length buf_size) { +length = buf_size; +} +if (!memcmp(magic_version_2_0, buf, length)) { +return 2; +} return 0; } static int cloop_open(BlockDriverState *bs, int flags) { BDRVCloopState *s = bs-opaque; -uint32_t offsets_size,max_compressed_block_size=1,i; +uint32_t offsets_size, max_compressed_block_size = 1, i; bs-read_only = 1; @@ -73,26 +75,28 @@ static int cloop_open(BlockDriverState *bs, int flags) s-offsets = g_malloc(offsets_size); if (bdrv_pread(bs-file, 128 + 4 + 4, s-offsets, offsets_size) offsets_size) { - goto cloop_close; +goto cloop_close; } for(i=0;is-n_blocks;i++) { - s-offsets[i]=be64_to_cpu(s-offsets[i]); - if(i0) { - uint32_t size=s-offsets[i]-s-offsets[i-1]; - if(sizemax_compressed_block_size) - max_compressed_block_size=size; - } +s-offsets[i] = be64_to_cpu(s-offsets[i]); +if (i 0) { +uint32_t size = s-offsets[i]-s-offsets[i - 1]; +if (size max_compressed_block_size) { +max_compressed_block_size = size; +} +} } /* initialize zlib engine */ -s-compressed_block = g_malloc(max_compressed_block_size+1); +s-compressed_block = g_malloc(max_compressed_block_size + 1); s-uncompressed_block = g_malloc(s-block_size); -if(inflateInit(s-zstream) != Z_OK) - goto cloop_close; -s-current_block=s-n_blocks; +if (inflateInit(s-zstream) != Z_OK) { +goto cloop_close; +} +s-current_block = s-n_blocks; s-sectors_per_block = s-block_size/512; -bs-total_sectors = s-n_blocks*s-sectors_per_block; +bs-total_sectors = s-n_blocks * s-sectors_per_block; return 0; cloop_close: @@ -104,26 +108,29 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num) BDRVCloopState *s = bs-opaque; if(s-current_block != block_num) { - int ret; -uint32_t bytes = s-offsets[block_num+1]-s-offsets[block_num]; +int ret; +uint32_t bytes = s-offsets[block_num+1] - s-offsets[block_num]; ret = bdrv_pread(bs-file, s-offsets[block_num], s-compressed_block, bytes); -if (ret != bytes) +if (ret != bytes) { return -1; +} + +s-zstream.next_in = s-compressed_block; +s-zstream.avail_in = bytes; +s-zstream.next_out = s-uncompressed_block; +s-zstream.avail_out = s-block_size; +ret = inflateReset(s-zstream); +if (ret != Z_OK) { +return -1; +} +ret = inflate(s-zstream, Z_FINISH); +if (ret != Z_STREAM_END || s-zstream.total_out != s-block_size) { +return -1; +} - s-zstream.next_in = s-compressed_block; - s-zstream.avail_in = bytes; - s-zstream.next_out = s-uncompressed_block; - s-zstream.avail_out = s-block_size; - ret = inflateReset(s-zstream); - if(ret != Z_OK) - return -1; - ret = inflate(s-zstream, Z_FINISH); - if(ret != Z_STREAM_END || s-zstream.total_out != s-block_size) - return -1; - - s-current_block = block_num; +s-current_block = block_num; } return 0; } @@ -134,12 +141,14 @@ static int cloop_read(BlockDriverState *bs, int64_t sector_num, BDRVCloopState *s = bs-opaque; int i; -for(i=0;inb_sectors;i++) { - uint32_t sector_offset_in_block=((sector_num+i)%s-sectors_per_block), - block_num=(sector_num+i)/s-sectors_per_block; - if(cloop_read_block(bs, block_num) != 0) - return -1; - memcpy(buf+i*512,s-uncompressed_block+sector_offset_in_block*512,512); +for (i = 0; i nb_sectors; i++) { +uint32_t sector_offset_in_block = ((sector_num + i) % s-sectors_per_block), +block_num = (sector_num + i) / s-sectors_per_block; +if (cloop_read_block(bs, block_num)
Re: [Qemu-devel] [RFC128 3/2] Adjust system and pci address spaces to full 64-bit
On Sun, Oct 16, 2011 at 05:29:07PM +0200, Avi Kivity wrote: Now that the memory API supports full 64-bit buses, adjust the relevant callers to take advantage of it. Note that this doesn't, strictly speaking doesn't give you full 64-bit coverage, since the range covered is 2^64-1 bytes rather than 2^64 bytes. Cases where that will matter would be very rare, of course. Signed-off-by: Avi Kivity a...@redhat.com --- Note needs slight adjustment to patch 2 to make 'info mtree' work. exec.c |2 +- hw/pc_piix.c|2 +- hw/pci_bridge.c |2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/exec.c b/exec.c index d0cbf15..16e37a7 100644 --- a/exec.c +++ b/exec.c @@ -3825,7 +3825,7 @@ static void io_mem_init(void) static void memory_map_init(void) { system_memory = g_malloc(sizeof(*system_memory)); -memory_region_init(system_memory, system, INT64_MAX); +memory_region_init(system_memory, system, UINT64_MAX); set_system_memory_map(system_memory); system_io = g_malloc(sizeof(*system_io)); diff --git a/hw/pc_piix.c b/hw/pc_piix.c index ce1c87f..45540e5 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -115,7 +115,7 @@ static void pc_init1(MemoryRegion *system_memory, if (pci_enabled) { pci_memory = g_new(MemoryRegion, 1); -memory_region_init(pci_memory, pci, INT64_MAX); +memory_region_init(pci_memory, pci, UINT64_MAX); rom_memory = pci_memory; } else { pci_memory = NULL; diff --git a/hw/pci_bridge.c b/hw/pci_bridge.c index b6287cd..3b786aa 100644 --- a/hw/pci_bridge.c +++ b/hw/pci_bridge.c @@ -319,7 +319,7 @@ int pci_bridge_initfn(PCIDevice *dev) sec_bus-parent_dev = dev; sec_bus-map_irq = br-map_irq; sec_bus-address_space_mem = br-address_space_mem; -memory_region_init(br-address_space_mem, pci_pridge_pci, INT64_MAX); +memory_region_init(br-address_space_mem, pci_pridge_pci, UINT64_MAX); sec_bus-address_space_io = br-address_space_io; memory_region_init(br-address_space_io, pci_bridge_io, 65536); pci_bridge_region_init(br); -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [PATCH] cloop.c: use gfree,instead of free
On 10/17/2011 02:11 PM, Dong Xu Wang wrote: Use gfree, to pair with g_malloc. Also fix coding style. Should it be g_free, instead of gfree. Signed-off-by: Dong Xu Wangwdon...@linux.vnet.ibm.com --- block/cloop.c | 114 +++-- 1 files changed, 62 insertions(+), 52 deletions(-) diff --git a/block/cloop.c b/block/cloop.c index 8cff9f2..708093e 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -39,21 +39,23 @@ typedef struct BDRVCloopState { static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename) { -const char* magic_version_2_0=#!/bin/sh\n - #V2.0 Format\n - modprobe cloop file=$0 mount -r -t iso9660 /dev/cloop $1\n; -int length=strlen(magic_version_2_0); -if(lengthbuf_size) - length=buf_size; -if(!memcmp(magic_version_2_0,buf,length)) - return 2; +const char *magic_version_2_0 = #!/bin/sh\n +#V2.0 Format\n +modprobe cloop file=$0 mount -r -t iso9660 /dev/cloop $1\n; +int length = strlen(magic_version_2_0); +if (length buf_size) { +length = buf_size; +} +if (!memcmp(magic_version_2_0, buf, length)) { +return 2; +} return 0; } static int cloop_open(BlockDriverState *bs, int flags) { BDRVCloopState *s = bs-opaque; -uint32_t offsets_size,max_compressed_block_size=1,i; +uint32_t offsets_size, max_compressed_block_size = 1, i; bs-read_only = 1; @@ -73,26 +75,28 @@ static int cloop_open(BlockDriverState *bs, int flags) s-offsets = g_malloc(offsets_size); if (bdrv_pread(bs-file, 128 + 4 + 4, s-offsets, offsets_size) offsets_size) { - goto cloop_close; +goto cloop_close; } for(i=0;is-n_blocks;i++) { - s-offsets[i]=be64_to_cpu(s-offsets[i]); - if(i0) { - uint32_t size=s-offsets[i]-s-offsets[i-1]; - if(sizemax_compressed_block_size) - max_compressed_block_size=size; - } +s-offsets[i] = be64_to_cpu(s-offsets[i]); +if (i 0) { +uint32_t size = s-offsets[i]-s-offsets[i - 1]; +if (size max_compressed_block_size) { +max_compressed_block_size = size; +} +} } /* initialize zlib engine */ -s-compressed_block = g_malloc(max_compressed_block_size+1); +s-compressed_block = g_malloc(max_compressed_block_size + 1); s-uncompressed_block = g_malloc(s-block_size); -if(inflateInit(s-zstream) != Z_OK) - goto cloop_close; -s-current_block=s-n_blocks; +if (inflateInit(s-zstream) != Z_OK) { +goto cloop_close; +} +s-current_block = s-n_blocks; s-sectors_per_block = s-block_size/512; -bs-total_sectors = s-n_blocks*s-sectors_per_block; +bs-total_sectors = s-n_blocks * s-sectors_per_block; return 0; cloop_close: @@ -104,26 +108,29 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num) BDRVCloopState *s = bs-opaque; if(s-current_block != block_num) { - int ret; -uint32_t bytes = s-offsets[block_num+1]-s-offsets[block_num]; +int ret; +uint32_t bytes = s-offsets[block_num+1] - s-offsets[block_num]; ret = bdrv_pread(bs-file, s-offsets[block_num], s-compressed_block, bytes); -if (ret != bytes) +if (ret != bytes) { return -1; +} + +s-zstream.next_in = s-compressed_block; +s-zstream.avail_in = bytes; +s-zstream.next_out = s-uncompressed_block; +s-zstream.avail_out = s-block_size; +ret = inflateReset(s-zstream); +if (ret != Z_OK) { +return -1; +} +ret = inflate(s-zstream, Z_FINISH); +if (ret != Z_STREAM_END || s-zstream.total_out != s-block_size) { +return -1; +} - s-zstream.next_in = s-compressed_block; - s-zstream.avail_in = bytes; - s-zstream.next_out = s-uncompressed_block; - s-zstream.avail_out = s-block_size; - ret = inflateReset(s-zstream); - if(ret != Z_OK) - return -1; - ret = inflate(s-zstream, Z_FINISH); - if(ret != Z_STREAM_END || s-zstream.total_out != s-block_size) - return -1; - - s-current_block = block_num; +s-current_block = block_num; } return 0; } @@ -134,12 +141,14 @@ static int cloop_read(BlockDriverState *bs, int64_t sector_num, BDRVCloopState *s = bs-opaque; int i; -for(i=0;inb_sectors;i++) { - uint32_t sector_offset_in_block=((sector_num+i)%s-sectors_per_block), - block_num=(sector_num+i)/s-sectors_per_block; - if(cloop_read_block(bs, block_num) != 0) - return -1; - memcpy(buf+i*512,s-uncompressed_block+sector_offset_in_block*512,512); +for (i = 0; i nb_sectors; i++) { +uint32_t sector_offset_in_block = ((sector_num + i) % s-sectors_per_block), +
[Qemu-devel] buildbot failure in qemu on s390-next_i386_debian_6_0
The Buildbot has detected a new failure on builder s390-next_i386_debian_6_0 while building qemu. Full details are available at: http://buildbot.b1-systems.de/qemu/builders/s390-next_i386_debian_6_0/builds/64 Buildbot URL: http://buildbot.b1-systems.de/qemu/ Buildslave for this Build: yuzuki Build Reason: The Nightly scheduler named 'nightly_s390-next' triggered this build Build Source Stamp: [branch s390-next] HEAD Blamelist: BUILD FAILED: failed git sincerely, -The Buildbot
Re: [Qemu-devel] [PATCH] target_sparc: Fix use of free() instead of g_free()
On 10/17/2011 02:10 AM, Stefan Weil wrote: This error was reported by cppcheck. Signed-off-by: Stefan Weils...@weilnetz.de --- target-sparc/helper.c |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/target-sparc/helper.c b/target-sparc/helper.c index c80531a..ca9bf6b 100644 --- a/target-sparc/helper.c +++ b/target-sparc/helper.c @@ -1200,8 +1200,8 @@ static int cpu_sparc_register(CPUSPARCState *env, const char *cpu_model) static void cpu_sparc_close(CPUSPARCState *env) { -free(env-def); -free(env); +g_free(env-def); +g_free(env); } CPUSPARCState *cpu_sparc_init(const char *cpu_model) Reviewed-by: Ray Wangrayw...@linux.vnet.ibm.com -- Regards, Ray Wang
Re: [Qemu-devel] [PATCH v3 1/4] vga: make PCI devices optional
On 2011-10-16 23:21, Blue Swirl wrote: Improve VGA selection logic, push check for device availabilty to vl.c. Make PCI VGA devices optional. Signed-off-by: Blue Swirl blauwir...@gmail.com --- hw/cirrus_vga.c |5 - hw/pc.c |6 +- hw/pc.h | 33 +++-- hw/pci.c| 18 ++ hw/pci.h|4 hw/qdev.c |5 + hw/qdev.h |1 + hw/vga-pci.c|6 -- vl.c| 33 +++-- 9 files changed, 83 insertions(+), 28 deletions(-) diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index c7e365b..a11444c 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -2955,11 +2955,6 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev) return 0; } -void pci_cirrus_vga_init(PCIBus *bus) -{ -pci_create_simple(bus, -1, cirrus-vga); -} - static PCIDeviceInfo cirrus_vga_info = { .qdev.name= cirrus-vga, .qdev.desc= Cirrus CLGD 54xx VGA, diff --git a/hw/pc.c b/hw/pc.c index f0802b7..057eb9c 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -1080,11 +1080,7 @@ void pc_vga_init(PCIBus *pci_bus) } } else if (vmsvga_enabled) { if (pci_bus) { -if (!pci_vmsvga_init(pci_bus)) { -fprintf(stderr, Warning: vmware_vga not available, - using standard VGA instead\n); -pci_vga_init(pci_bus); -} +pci_vmsvga_init(pci_bus); } else { fprintf(stderr, %s: vmware_vga: no PCI bus\n, __FUNCTION__); } diff --git a/hw/pc.h b/hw/pc.h index b8ad9a3..6c951e8 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -9,6 +9,7 @@ #include net.h #include memory.h #include ioapic.h +#include pci.h /* PC-style peripherals (also used by other machines). */ @@ -203,26 +204,46 @@ enum vga_retrace_method { extern enum vga_retrace_method vga_retrace_method; -static inline int isa_vga_init(void) +static inline bool isa_vga_init(void) { ISADevice *dev; dev = isa_try_create(isa-vga); if (!dev) { -fprintf(stderr, Warning: isa-vga not available\n); -return 0; +return false; } qdev_init_nofail(dev-qdev); -return 1; +return true; +} + +/* vga-pci.c */ +static inline bool pci_vga_init(PCIBus *bus) +{ +PCIDevice *dev; + +dev = pci_try_create_simple(bus, -1, VGA); +if (!dev) { +return false; +} +return true; } -int pci_vga_init(PCIBus *bus); int isa_vga_mm_init(target_phys_addr_t vram_base, target_phys_addr_t ctrl_base, int it_shift, MemoryRegion *address_space); /* cirrus_vga.c */ -void pci_cirrus_vga_init(PCIBus *bus); +static inline bool pci_cirrus_vga_init(PCIBus *bus) +{ +PCIDevice *dev; + +dev = pci_try_create_simple(bus, -1, cirrus-vga); +if (!dev) { +return false; +} +return true; +} + void isa_cirrus_vga_init(MemoryRegion *address_space); /* ne2000.c */ diff --git a/hw/pci.c b/hw/pci.c index 749e8d8..46c01ac 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -1687,6 +1687,19 @@ PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn, return dev; } +PCIDevice *pci_try_create_simple_multifunction(PCIBus *bus, int devfn, + bool multifunction, + const char *name) +{ +PCIDevice *dev = pci_try_create_multifunction(bus, devfn, multifunction, + name); +if (!dev) { +return NULL; +} +qdev_init_nofail(dev-qdev); +return dev; +} + PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name) { return pci_create_multifunction(bus, devfn, false, name); @@ -1702,6 +1715,11 @@ PCIDevice *pci_try_create(PCIBus *bus, int devfn, const char *name) return pci_try_create_multifunction(bus, devfn, false, name); } +PCIDevice *pci_try_create_simple(PCIBus *bus, int devfn, const char *name) +{ +return pci_try_create_simple_multifunction(bus, devfn, false, name); +} + static int pci_find_space(PCIDevice *pdev, uint8_t size) { int config_size = pci_config_size(pdev); diff --git a/hw/pci.h b/hw/pci.h index 86a81c8..aa2e040 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -473,9 +473,13 @@ PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn, PCIDevice *pci_try_create_multifunction(PCIBus *bus, int devfn, bool multifunction, const char *name); +PCIDevice *pci_try_create_simple_multifunction(PCIBus *bus, int devfn, + bool multifunction, + const char *name); PCIDevice *pci_create(PCIBus *bus, int
[Qemu-devel] buildbot failure in qemu on monitor_i386_debian_6_0
The Buildbot has detected a new failure on builder monitor_i386_debian_6_0 while building qemu. Full details are available at: http://buildbot.b1-systems.de/qemu/builders/monitor_i386_debian_6_0/builds/63 Buildbot URL: http://buildbot.b1-systems.de/qemu/ Buildslave for this Build: yuzuki Build Reason: The Nightly scheduler named 'nightly_monitor' triggered this build Build Source Stamp: [branch queue/monitor] HEAD Blamelist: BUILD FAILED: failed git sincerely, -The Buildbot
[Qemu-devel] [PATCH v2 1/2] spice: turn client_migrate_info to async
RHBZ 737921 Spice client is required to connect to the migration target before/as migration starts. Since after migration starts, the target qemu is blocked and cannot accept new spice client we trigger the connection to the target upon client_migrate_info command. client_migrate_info completion cb will be called after spice client has been connected to the target (or a timeout). See following patches and spice patches. Signed-off-by: Yonit Halperin yhalp...@redhat.com --- hmp-commands.hx |3 ++- monitor.c |6 -- qmp-commands.hx |3 ++- ui/qemu-spice.h | 14 +++--- ui/spice-core.c | 10 +++--- 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index 9e1cca8..6f390a0 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -827,7 +827,8 @@ ETEXI .params = protocol hostname port tls-port cert-subject, .help = send migration info to spice/vnc client, .user_print = monitor_user_noop, -.mhandler.cmd_new = client_migrate_info, +.mhandler.cmd_async = client_migrate_info, +.flags = MONITOR_CMD_ASYNC, }, STEXI diff --git a/monitor.c b/monitor.c index df0f622..0374dcc 100644 --- a/monitor.c +++ b/monitor.c @@ -1221,7 +1221,8 @@ static int add_graphics_client(Monitor *mon, const QDict *qdict, QObject **ret_d return -1; } -static int client_migrate_info(Monitor *mon, const QDict *qdict, QObject **ret_data) +static int client_migrate_info(Monitor *mon, const QDict *qdict, + MonitorCompletion cb, void *opaque) { const char *protocol = qdict_get_str(qdict, protocol); const char *hostname = qdict_get_str(qdict, hostname); @@ -1236,7 +1237,8 @@ static int client_migrate_info(Monitor *mon, const QDict *qdict, QObject **ret_d return -1; } -ret = qemu_spice_migrate_info(hostname, port, tls_port, subject); +ret = qemu_spice_migrate_info(hostname, port, tls_port, subject, + cb, opaque); if (ret != 0) { qerror_report(QERR_UNDEFINED_ERROR); return -1; diff --git a/qmp-commands.hx b/qmp-commands.hx index 27cc66e..321fb10 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -578,7 +578,8 @@ EQMP .params = protocol hostname port tls-port cert-subject, .help = send migration info to spice/vnc client, .user_print = monitor_user_noop, -.mhandler.cmd_new = client_migrate_info, +.mhandler.cmd_async = client_migrate_info, +.flags = MONITOR_CMD_ASYNC, }, SQMP diff --git a/ui/qemu-spice.h b/ui/qemu-spice.h index f34be69..c35b29c 100644 --- a/ui/qemu-spice.h +++ b/ui/qemu-spice.h @@ -25,6 +25,7 @@ #include qemu-option.h #include qemu-config.h #include qemu-char.h +#include monitor.h extern int using_spice; @@ -37,7 +38,8 @@ int qemu_spice_set_passwd(const char *passwd, bool fail_if_connected, bool disconnect_if_connected); int qemu_spice_set_pw_expire(time_t expires); int qemu_spice_migrate_info(const char *hostname, int port, int tls_port, -const char *subject); +const char *subject, +MonitorCompletion cb, void *opaque); void do_info_spice_print(Monitor *mon, const QObject *data); void do_info_spice(Monitor *mon, QObject **ret_data); @@ -45,6 +47,7 @@ void do_info_spice(Monitor *mon, QObject **ret_data); int qemu_chr_open_spice(QemuOpts *opts, CharDriverState **_chr); #else /* CONFIG_SPICE */ +#include monitor.h #define using_spice 0 static inline int qemu_spice_set_passwd(const char *passwd, @@ -57,8 +60,13 @@ static inline int qemu_spice_set_pw_expire(time_t expires) { return -1; } -static inline int qemu_spice_migrate_info(const char *h, int p, int t, const char *s) -{ return -1; } +static inline int qemu_spice_migrate_info(const char *h, int p, int t, + const char *s, + MonitorCompletion cb, void *opaque) +{ +cb(opaque, NULL); +return -1; +} #endif /* CONFIG_SPICE */ diff --git a/ui/spice-core.c b/ui/spice-core.c index 3cbc721..50c0d7d 100644 --- a/ui/spice-core.c +++ b/ui/spice-core.c @@ -457,10 +457,14 @@ static void migration_state_notifier(Notifier *notifier, void *data) } int qemu_spice_migrate_info(const char *hostname, int port, int tls_port, -const char *subject) +const char *subject, +MonitorCompletion *cb, void *opaque) { -return spice_server_migrate_info(spice_server, hostname, - port, tls_port, subject); +int ret; +ret = spice_server_migrate_info(spice_server, hostname, +port, tls_port, subject); +cb(opaque, NULL); +
[Qemu-devel] [PATCH v2 2/2] spice: support the new migration interface (spice 0.8.3)
- call spice_server_migrate_(start|end|connect). - register spice_migrate_connect completion callback Signed-off-by: Yonit Halperin yhalp...@redhat.com --- ui/spice-core.c | 56 ++- 1 files changed, 55 insertions(+), 1 deletions(-) diff --git a/ui/spice-core.c b/ui/spice-core.c index 50c0d7d..457cf61 100644 --- a/ui/spice-core.c +++ b/ui/spice-core.c @@ -288,6 +288,38 @@ static SpiceCoreInterface core_interface = { #endif }; +#ifdef SPICE_INTERFACE_MIGRATION +typedef struct SpiceMigration { +SpiceMigrateInstance sin; +struct { +MonitorCompletion *cb; +void *opaque; +} connect_complete; +} SpiceMigration; + +static void migrate_connect_complete_cb(SpiceMigrateInstance *sin); + +static const SpiceMigrateInterface migrate_interface = { +.base.type = SPICE_INTERFACE_MIGRATION, +.base.description = migration, +.base.major_version = SPICE_INTERFACE_MIGRATION_MAJOR, +.base.minor_version = SPICE_INTERFACE_MIGRATION_MINOR, +.migrate_connect_complete = migrate_connect_complete_cb, +.migrate_end_complete = NULL, +}; + +static SpiceMigration spice_migrate; + +static void migrate_connect_complete_cb(SpiceMigrateInstance *sin) +{ +SpiceMigration *sm = container_of(sin, SpiceMigration, sin); +if (sm-connect_complete.cb) { +sm-connect_complete.cb(sm-connect_complete.opaque, NULL); +} +sm-connect_complete.cb = NULL; +} +#endif + /* config string parsing */ static int name2enum(const char *string, const char *table[], int entries) @@ -449,9 +481,19 @@ static void migration_state_notifier(Notifier *notifier, void *data) { int state = get_migration_state(); -if (state == MIG_STATE_COMPLETED) { +if (state == MIG_STATE_ACTIVE) { +#ifdef SPICE_INTERFACE_MIGRATION +spice_server_migrate_start(spice_server); +#endif +} else if (state == MIG_STATE_COMPLETED) { #if SPICE_SERVER_VERSION = 0x000701 /* 0.7.1 */ +#ifndef SPICE_INTERFACE_MIGRATION spice_server_migrate_switch(spice_server); +#else +spice_server_migrate_end(spice_server, true); +} else if (state == MIG_STATE_CANCELLED || state == MIG_STATE_ERROR) { +spice_server_migrate_end(spice_server, false); +#endif #endif } } @@ -461,9 +503,16 @@ int qemu_spice_migrate_info(const char *hostname, int port, int tls_port, MonitorCompletion *cb, void *opaque) { int ret; +#ifdef SPICE_INTERFACE_MIGRATION +spice_migrate.connect_complete.cb = cb; +spice_migrate.connect_complete.opaque = opaque; +ret = spice_server_migrate_connect(spice_server, hostname, + port, tls_port, subject); +#else ret = spice_server_migrate_info(spice_server, hostname, port, tls_port, subject); cb(opaque, NULL); +#endif return ret; } @@ -654,6 +703,11 @@ void qemu_spice_init(void) migration_state.notify = migration_state_notifier; add_migration_state_change_notifier(migration_state); +#ifdef SPICE_INTERFACE_MIGRATION +spice_migrate.sin.base.sif = migrate_interface.base; +spice_migrate.connect_complete.cb = NULL; +qemu_spice_add_interface(spice_migrate.sin.base); +#endif qemu_spice_input_init(); qemu_spice_audio_init(); -- 1.7.6.4
[Qemu-devel] [PATCH v2 0/2] spice migration interface v2 (RHBZ 737921)
Same as the previous series with a small fix to allow compliation without Spice disabled. Yonit Spice client is required to connect to the migration target before/as migration starts. Previously, it connected upon migration completion, however, the ticket was set in the beginning, thus when migration time was ticket_expiration_time, spice failed to connect to the target. Since the migration target is blocked after migration starts, we execute spice-client connection to the target before migration, upon client_migrate_info. We wait till the client is connected to the target, or till a timeout occurs. In order to not block the iothread, this patch turns client_migrate_info to asynchronous. In addition, we changed the spice api: (1) client_migrate_info need to call spice_server_migrate_connect (2) spice_server_migrate_start/end need to be called upon migration start/end ** spice_server_start and the migrate_end_complete callback, were added for future use, in case we implement a real seamless spice migration Yonit Halperin (2): spice: turn client_migrate_info to async spice: support the new migration interface (spice 0.8.3) hmp-commands.hx |3 +- monitor.c |6 +++- qmp-commands.hx |3 +- ui/qemu-spice.h | 14 +-- ui/spice-core.c | 66 +++--- 5 files changed, 81 insertions(+), 11 deletions(-) -- 1.7.6.4
Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation
On 10/16/2011 05:39 PM, Avi Kivity wrote: On 10/14/2011 11:03 AM, Lai Jiangshan wrote: Currently, NMI interrupt is blindly sent to all the vCPUs when NMI button event happens. This doesn't properly emulate real hardware on which NMI button event triggers LINT1. Because of this, NMI is sent to the processor even when LINT1 is masked in LVT. For example, this causes the problem that kdump initiated by NMI sometimes doesn't work on KVM, because kdump assumes NMI is masked on CPUs other than CPU0. With this patch, we introduce introduce KVM_SET_LINT1, and we can use KVM_SET_LINT1 to correctly emulate NMI button without change the old KVM_NMI behavior. @@ -759,6 +762,8 @@ struct kvm_clock_data { #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_SET_LINT1 for x86 */ +#define KVM_SET_LINT1 _IO(KVMIO, 0xaa) LINT1 may have been programmed as a level -triggered interrupt instead of edge triggered (NMI or interrupt). We can use the ioctl argument for the level (and pressing the NMI button needs to pulse the level to 1 and back to 0). Hi, Avi, How to handle level=0 in the kernel? Or just ignore it? Thanks, Lai
[Qemu-devel] [PATCH RFC v1 0/2] Initial support for Microsoft Hyper-V.
With the following series of patches we are starting to implement some basic Microsoft Hyper-V Enlightenment functionality. This series is mostly about adding support for relaxed timing, spinlock, and virtual apic. For more Hyper-V related information please see: Hypervisor Functional Specification v2.0: For Windows Server 2008 R2 at http://www.microsoft.com/download/en/details.aspx?displaylang=enid=18673 Changelog: v0-v1 - move hyper-v parameters under cpu category, - move hyper-v stuff to target-i386 directory, - make CONFIG_HYPERV enabled by default for i386-softmmu and x86_64-softmmu configurations, - rearrange the patches from v0, - set HV_X64_MSR_HYPERCALL, HV_X64_MSR_GUEST_OS_ID, and HV_X64_MSR_APIC_ASSIST_PAGE to 0 on system reset. Vadim Rozenfeld (2): hyper-v: introduce Hyper-V support infrastructure. hyper-v: initialize Hyper-V CPUID leafs. Makefile.target|2 + default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + target-i386/cpuid.c| 14 +++ target-i386/hyperv.c | 69 target-i386/hyperv.h | 30 +++ target-i386/kvm.c | 64 - 7 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 target-i386/hyperv.c create mode 100644 target-i386/hyperv.h -- 1.7.4.4
[Qemu-devel] [PATCH RFC v1 2/2] hyper-v: initialize Hyper-V CPUID leafs.
--- target-i386/kvm.c | 64 +++- 1 files changed, 62 insertions(+), 2 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 3840255..30b3e85 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -29,6 +29,7 @@ #include hw/pc.h #include hw/apic.h #include ioport.h +#include hyperv.h //#define DEBUG_KVM @@ -379,11 +380,16 @@ int kvm_arch_init_vcpu(CPUState *env) cpuid_i = 0; /* Paravirtualization CPUIDs */ -memcpy(signature, KVMKVMKVM\0\0\0, 12); c = cpuid_data.entries[cpuid_i++]; memset(c, 0, sizeof(*c)); c-function = KVM_CPUID_SIGNATURE; -c-eax = 0; +if (!hyperv_enabled()) { +memcpy(signature, KVMKVMKVM\0\0\0, 12); +c-eax = 0; +} else { +memcpy(signature, Microsoft Hv, 12); +c-eax = HYPERV_CPUID_MIN; +} c-ebx = signature[0]; c-ecx = signature[1]; c-edx = signature[2]; @@ -394,6 +400,45 @@ int kvm_arch_init_vcpu(CPUState *env) c-eax = env-cpuid_kvm_features kvm_arch_get_supported_cpuid(s, KVM_CPUID_FEATURES, 0, R_EAX); +if (hyperv_enabled()) { +memcpy(signature, Hv#1\0\0\0\0\0\0\0\0, 12); +c-eax = signature[0]; + +c = cpuid_data.entries[cpuid_i++]; +memset(c, 0, sizeof(*c)); +c-function = HYPERV_CPUID_VERSION; +c-eax = 0x1bbc; +c-ebx = 0x00060001; + +c = cpuid_data.entries[cpuid_i++]; +memset(c, 0, sizeof(*c)); +c-function = HYPERV_CPUID_FEATURES; +if (hyperv_get_relaxed_timing()) { +c-eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; +} +if (hyperv_get_vapic_recommended()) { +c-eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; +c-eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE; +} + +c = cpuid_data.entries[cpuid_i++]; +memset(c, 0, sizeof(*c)); +c-function = HYPERV_CPUID_ENLIGHTMENT_INFO; +if (hyperv_get_relaxed_timing()) { +c-eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; +} +if (hyperv_get_vapic_recommended()) { +c-eax |= HV_X64_APIC_ACCESS_RECOMMENDED; +} +c-ebx = hyperv_get_spinlock_retries(); + +c = cpuid_data.entries[cpuid_i++]; +memset(c, 0, sizeof(*c)); +c-function = HYPERV_CPUID_IMPLEMENT_LIMITS; +c-eax = 0x40; +c-ebx = 0x40; +} + has_msr_async_pf_en = c-eax (1 KVM_FEATURE_ASYNC_PF); cpu_x86_cpuid(env, 0, 0, limit, unused, unused, unused); @@ -945,6 +990,13 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(msrs[n++], MSR_KVM_ASYNC_PF_EN, env-async_pf_en_msr); } +if (hyperv_hypercall_available()) { +kvm_msr_entry_set(msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0); +kvm_msr_entry_set(msrs[n++], HV_X64_MSR_HYPERCALL, 0); +} +if (hyperv_get_vapic_recommended()) { +kvm_msr_entry_set(msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); +} } if (env-mcg_cap) { int i; @@ -1179,6 +1231,14 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_KVM_ASYNC_PF_EN; } +if (hyperv_hypercall_available()) { +msrs[n++].index = HV_X64_MSR_GUEST_OS_ID; +msrs[n++].index = HV_X64_MSR_HYPERCALL; +} +if (hyperv_get_vapic_recommended()) { +msrs[n++].index = HV_X64_MSR_APIC_ASSIST_PAGE; +} + if (env-mcg_cap) { msrs[n++].index = MSR_MCG_STATUS; msrs[n++].index = MSR_MCG_CTL; -- 1.7.4.4
[Qemu-devel] [PATCH RFC v1 1/2] hyper-v: introduce Hyper-V support infrastructure.
with the following series of patches we are starting to implement some basic Microsoft Hyper-V Enlightenment functionality, like relaxed timing, spinlock, and virtual apic support. For more Hyper-V related information please see: Hypervisor Functional Specification v2.0: For Windows Server 2008 R2 at http://www.microsoft.com/download/en/details.aspx?displaylang=enid=18673 --- Makefile.target|2 + default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + target-i386/cpuid.c| 14 +++ target-i386/hyperv.c | 69 target-i386/hyperv.h | 30 +++ 6 files changed, 117 insertions(+), 0 deletions(-) create mode 100644 target-i386/hyperv.c create mode 100644 target-i386/hyperv.h diff --git a/Makefile.target b/Makefile.target index 40cc592..2c8e1b8 100644 --- a/Makefile.target +++ b/Makefile.target @@ -202,6 +202,8 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o obj-y += memory.o LIBS+=-lz +obj-$(CONFIG_HYPERV) += hyperv.o + QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) QEMU_CFLAGS += $(VNC_JPEG_CFLAGS) diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 55589fa..ee69a0a 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -21,3 +21,4 @@ CONFIG_PIIX_PCI=y CONFIG_SOUND=y CONFIG_HPET=y CONFIG_APPLESMC=y +CONFIG_HYPERV=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 8895028..35b1c00 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -21,3 +21,4 @@ CONFIG_PIIX_PCI=y CONFIG_SOUND=y CONFIG_HPET=y CONFIG_APPLESMC=y +CONFIG_HYPERV=y diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index 1e8bcff..50b2d0e 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -27,6 +27,8 @@ #include qemu-option.h #include qemu-config.h +#include hyperv.h + /* feature flags taken from Intel Processor Identification and the CPUID * Instruction and AMD's CPUID Specification. In cases of disagreement * between feature naming conventions, aliases may be added. @@ -716,6 +718,14 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) goto error; } x86_cpu_def-tsc_khz = tsc_freq / 1000; +} else if (!strcmp(featurestr, hv_spinlocks)) { + char* err; + numvalue = strtoul(val, err, 0); + if (!*val || *err) { +fprintf(stderr, bad numerical value %s\n, val); +goto error; +} +hyperv_set_spinlock_retries(numvalue); } else { fprintf(stderr, unrecognized feature %s\n, featurestr); goto error; @@ -724,6 +734,10 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) check_cpuid = 1; } else if (!strcmp(featurestr, enforce)) { check_cpuid = enforce_cpuid = 1; +} else if (!strcmp(featurestr, hv_relaxed)) { +hyperv_set_relaxed_timing(1); +} else if (!strcmp(featurestr, hv_vapic)) { +hyperv_set_vapic_recommended(1); } else { fprintf(stderr, feature string `%s' not in format (+feature|-feature|feature=xyz)\n, featurestr); goto error; diff --git a/target-i386/hyperv.c b/target-i386/hyperv.c new file mode 100644 index 000..bed859e --- /dev/null +++ b/target-i386/hyperv.c @@ -0,0 +1,69 @@ +/* + * QEMU Hyper-V support + * + * Copyright Red Hat, Inc. 2011 + * + * Author: Vadim Rozenfeld vroze...@redhat.com + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include hyperv.h + +static int hyperv_vapic; +static int hyperv_relaxed_timing; +static int hyperv_spinlock_attempts = HYPERV_SPINLOCK_NEVER_RETRY; + +void hyperv_set_vapic_recommended(int val) +{ +hyperv_vapic = val; +} + +void hyperv_set_relaxed_timing(int val) +{ +hyperv_relaxed_timing = val; +} + +void hyperv_set_spinlock_retries(int val) +{ +hyperv_spinlock_attempts = val; +if (hyperv_spinlock_attempts 0xFFF) { +hyperv_spinlock_attempts = 0xFFF; +} +} + +int hyperv_enabled(void) +{ +return hyperv_hypercall_available() || hyperv_get_relaxed_timing(); +} + +int hyperv_hypercall_available(void) +{ +if (hyperv_vapic || +(hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY)) { + return 1; +} +return 0; +} + +int hyperv_get_vapic_recommended(void) +{ +#ifdef KVM_CAP_IRQCHIP +return hyperv_vapic; +#else +return 0; +#endif +} + +int hyperv_get_relaxed_timing(void) +{ +return hyperv_relaxed_timing; +} + +int hyperv_get_spinlock_retries(void) +{ +return hyperv_spinlock_attempts; +} + diff
Re: [Qemu-devel] [PATCHv3] ps2: migrate ledstate
static const VMStateDescription vmstate_ps2_common = { .name = PS2 Common State, -.version_id = 3, +.version_id = 4, .minimum_version_id = 2, .minimum_version_id_old = 2, .fields = (VMStateField []) { @@ -577,6 +585,7 @@ static const VMStateDescription vmstate_ps2_keyboard = { VMSTATE_INT32(scan_enabled, PS2KbdState), VMSTATE_INT32(translate, PS2KbdState), VMSTATE_INT32_V(scancode_set, PS2KbdState,3), +VMSTATE_INT32_V(ledstate, PS2KbdState, 4), VMSTATE_END_OF_LIST() } version_id in vmstate_ps2_keyboard must be updated too. The version update in vmstate_ps2_common might not be needed, IIRC the versions for stuff referenced via VMSTATE_STRUCT() isn't used anyway, Juan? cheers, Gerd
Re: [Qemu-devel] [PATCH RFC v1 1/2] hyper-v: introduce Hyper-V support infrastructure.
Am 17.10.2011 11:17, schrieb Vadim Rozenfeld: with the following series of patches we are starting to implement some basic Microsoft Hyper-V Enlightenment functionality, like relaxed timing, spinlock, and virtual apic support. For more Hyper-V related information please see: Hypervisor Functional Specification v2.0: For Windows Server 2008 R2 at http://www.microsoft.com/download/en/details.aspx?displaylang=enid=18673 --- Makefile.target|2 + default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + target-i386/cpuid.c| 14 +++ target-i386/hyperv.c | 69 target-i386/hyperv.h | 30 +++ 6 files changed, 117 insertions(+), 0 deletions(-) create mode 100644 target-i386/hyperv.c create mode 100644 target-i386/hyperv.h diff --git a/Makefile.target b/Makefile.target index 40cc592..2c8e1b8 100644 --- a/Makefile.target +++ b/Makefile.target @@ -202,6 +202,8 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o obj-y += memory.o LIBS+=-lz +obj-$(CONFIG_HYPERV) += hyperv.o + The patch doesn't look to me as if it could build successfully without CONFIG_HYPERV. An option with only one working value seems a bit pointless. Kevin
[Qemu-devel] [RFC][PATCH 15/45] qemu-kvm: Drop unused kvm_del_irq_route
kvm_add_irq_route only exists to create platform specific static routes. So there is no need for a corresponding delete. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- qemu-kvm.c | 16 qemu-kvm.h |8 2 files changed, 0 insertions(+), 24 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 70481de..e8dc537 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -410,22 +410,6 @@ int kvm_update_routing_entry(struct kvm_irq_routing_entry *entry, #endif } -int kvm_del_irq_route(int gsi, int irqchip, int pin) -{ -#ifdef KVM_CAP_IRQ_ROUTING -struct kvm_irq_routing_entry e; - -e.gsi = gsi; -e.type = KVM_IRQ_ROUTING_IRQCHIP; -e.flags = 0; -e.u.irqchip.irqchip = irqchip; -e.u.irqchip.pin = pin; -return kvm_del_routing_entry(e); -#else -return -ENOSYS; -#endif -} - int kvm_commit_irq_routes(void) { #ifdef KVM_CAP_IRQ_ROUTING diff --git a/qemu-kvm.h b/qemu-kvm.h index 8032388..68a921e 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -181,14 +181,6 @@ int kvm_deassign_pci_device(KVMState *s, */ int kvm_add_irq_route(int gsi, int irqchip, int pin); -/*! - * \brief Removes an irq route from the temporary irq routing table - * - * Adds an irq route to the temporary irq routing table. Nothing is - * committed to the running VM. - */ -int kvm_del_irq_route(int gsi, int irqchip, int pin); - struct kvm_irq_routing_entry; /*! * \brief Adds a routing entry to the temporary irq routing table -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 10/45] msix: Factor out msix_message_from_vector
This helper will also be used by the upcoming config notifier. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msix.c | 19 +-- 1 files changed, 13 insertions(+), 6 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 04e08e5..50fa504 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -33,6 +33,15 @@ #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) #define MSIX_MAX_ENTRIES 32 +static void msix_message_from_vector(PCIDevice *dev, unsigned vector, + MSIMessage *msg) +{ +uint8_t *table_entry = dev-msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; + +msg-address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); +msg-data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); +} + /* KVM specific MSIX helpers */ static void kvm_msix_free(PCIDevice *dev) { @@ -453,9 +462,7 @@ uint32_t msix_bar_size(PCIDevice *dev) /* Send an MSI-X message */ void msix_notify(PCIDevice *dev, unsigned vector) { -uint8_t *table_entry = dev-msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; -uint64_t address; -uint32_t data; +MSIMessage msg; if (vector = dev-msix_entries_nr || !dev-msix_entry_used[vector]) return; @@ -469,9 +476,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) return; } -address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); -data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); -stl_le_phys(address, data); +msix_message_from_vector(dev, vector, msg); + +stl_le_phys(msg.address, msg.data); } void msix_reset(PCIDevice *dev) -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 07/45] msi: Generalize msix_supported to msi_supported
Rename msix_supported to msi_supported and control MSI and MSI-X activation this way. That was likely to original intention for this flag, but MSI support came after MSI-X. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msi.c |8 hw/msi.h |2 ++ hw/msix.c |8 +++- hw/msix.h |2 -- hw/pc.c |4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/hw/msi.c b/hw/msi.c index c924e38..2b7b6e3 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -37,6 +37,9 @@ #define PCI_MSI_VECTORS_MAX 32 +/* Flag for interrupt controller to declare MSI/MSI-X support */ +bool msi_supported; + /* If we get rid of cap allocator, we won't need this. */ static inline uint8_t msi_cap_sizeof(uint16_t flags) { @@ -205,6 +208,11 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, uint16_t flags; uint8_t cap_size; int config_offset; + +if (!msi_supported) { +return -ENOTSUP; +} + MSI_DEV_PRINTF(dev, init offset: 0x%PRIx8 vector: %PRId8 64bit %d mask %d\n, diff --git a/hw/msi.h b/hw/msi.h index 6ff0607..e5e821f 100644 --- a/hw/msi.h +++ b/hw/msi.h @@ -24,6 +24,8 @@ #include qemu-common.h #include pci.h +extern bool msi_supported; + bool msi_enabled(const PCIDevice *dev); int msi_init(struct PCIDevice *dev, uint8_t offset, unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask); diff --git a/hw/msix.c b/hw/msix.c index 33cb716..04e08e5 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -12,6 +12,7 @@ */ #include hw.h +#include msi.h #include msix.h #include pci.h #include range.h @@ -32,10 +33,6 @@ #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) #define MSIX_MAX_ENTRIES 32 - -/* Flag for interrupt controller to declare MSI-X support */ -int msix_supported; - /* KVM specific MSIX helpers */ static void kvm_msix_free(PCIDevice *dev) { @@ -327,8 +324,9 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, unsigned bar_nr, unsigned bar_size) { int ret; + /* Nothing to do if MSI is not supported by interrupt controller */ -if (!msix_supported || +if (!msi_supported || (kvm_enabled() kvm_irqchip_in_kernel() !kvm_has_gsi_routing())) { return -ENOTSUP; } diff --git a/hw/msix.h b/hw/msix.h index 189bb3f..a8661e1 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -29,8 +29,6 @@ void msix_notify(PCIDevice *dev, unsigned vector); void msix_reset(PCIDevice *dev); -extern int msix_supported; - int msix_set_mask_notifier(PCIDevice *dev, msix_mask_notifier_func); int msix_unset_mask_notifier(PCIDevice *dev); #endif diff --git a/hw/pc.c b/hw/pc.c index 70e0d08..768a20c 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -36,7 +36,7 @@ #include elf.h #include multiboot.h #include mc146818rtc.h -#include msix.h +#include msi.h #include sysbus.h #include sysemu.h #include kvm.h @@ -892,7 +892,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id) apic_mapped = 1; } -msix_supported = 1; +msi_supported = true; return dev; } -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 01/45] msi: Guard msi/msix_write_config with msi_present
Terminate msi/msix_write_config early if support is not enabled. This allows to remove checks at the caller site if MSI is optional. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msi.c |3 ++- hw/msix.c |2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/msi.c b/hw/msi.c index 56a4698..bbc9cd7 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -378,7 +378,8 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) unsigned int vector; uint32_t pending; -if (!ranges_overlap(addr, len, dev-msi_cap, msi_cap_sizeof(flags))) { +if (!msi_present(dev) || +!ranges_overlap(addr, len, dev-msi_cap, msi_cap_sizeof(flags))) { return; } diff --git a/hw/msix.c b/hw/msix.c index 60d6d1e..ebd5aee 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -240,7 +240,7 @@ void msix_write_config(PCIDevice *dev, uint32_t addr, unsigned enable_pos = dev-msix_cap + MSIX_CONTROL_OFFSET; int vector; -if (!range_covers_byte(addr, len, enable_pos)) { +if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) { return; } -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 41/45] msix: Drop unused msix_bar_size
No use for it, even more after the upcoming API changes. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msix.c |8 hw/msix.h |2 -- hw/pci.h |2 -- 3 files changed, 0 insertions(+), 12 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 5f0fa6a..bccd8b1 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -83,7 +83,6 @@ static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, new_size = bar_size * 2; } -pdev-msix_bar_size = new_size; config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, 0, MSIX_CAP_LENGTH); if (config_offset 0) { @@ -374,13 +373,6 @@ int msix_enabled(PCIDevice *dev) MSIX_ENABLE_MASK); } -/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */ -uint32_t msix_bar_size(PCIDevice *dev) -{ -return (dev-cap_present QEMU_PCI_CAP_MSIX) ? -dev-msix_bar_size : 0; -} - /* Send an MSI-X message */ void msix_notify(PCIDevice *dev, unsigned vector) { diff --git a/hw/msix.h b/hw/msix.h index 9cd54cf..dfc6087 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -19,8 +19,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f); int msix_enabled(PCIDevice *dev); int msix_present(PCIDevice *dev); -uint32_t msix_bar_size(PCIDevice *dev); - void msix_clear_vector(PCIDevice *dev, unsigned vector); void msix_clear_all_vectors(PCIDevice *dev); diff --git a/hw/pci.h b/hw/pci.h index 266fe34..e2be271 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -178,8 +178,6 @@ struct PCIDevice { uint8_t *msix_table_page; /* MMIO index used to map MSIX table and pending bit entries. */ MemoryRegion msix_mmio; -/* Region including the MSI-X table */ -uint32_t msix_bar_size; /* Version id needed for VMState */ int32_t version_id; -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 37/45] qemu-kvm: Clean up irqrouting API
Drop unused functions, privatize those which are only used internally now. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- kvm-stub.c | 10 -- kvm.h |1 - qemu-kvm.c | 37 ++--- qemu-kvm.h | 39 --- 4 files changed, 6 insertions(+), 81 deletions(-) diff --git a/kvm-stub.c b/kvm-stub.c index acd1446..a4225e0 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -135,20 +135,10 @@ int kvm_has_gsi_routing(void) return 0; } -int kvm_get_irq_route_gsi(void) -{ -return -ENOSYS; -} - void kvm_msi_cache_invalidate(MSIRoutingCache *cache) { } -int kvm_commit_irq_routes(void) -{ -return -ENOSYS; -} - int kvm_set_irq(int irq, int level, int *status) { assert(0); diff --git a/kvm.h b/kvm.h index 61bcfec..9780e53 100644 --- a/kvm.h +++ b/kvm.h @@ -202,7 +202,6 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); int kvm_has_gsi_routing(void); int kvm_allows_irq0_override(void); -int kvm_get_irq_route_gsi(void); void kvm_msi_cache_invalidate(MSIRoutingCache *cache); diff --git a/qemu-kvm.c b/qemu-kvm.c index c9b348c..34aebe5 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -188,12 +188,6 @@ int kvm_assign_pci_device(KVMState *s, return kvm_vm_ioctl(s, KVM_ASSIGN_PCI_DEVICE, assigned_dev); } -static int kvm_old_assign_irq(KVMState *s, - struct kvm_assigned_irq *assigned_irq) -{ -return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, assigned_irq); -} - int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, uint32_t host_irq_type, uint32_t guest_irq) { @@ -210,25 +204,6 @@ int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, assigned_irq); } } - -#ifdef KVM_CAP_ASSIGN_DEV_IRQ -int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq) -{ -int ret; - -ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ); -if (ret 0) { -return kvm_vm_ioctl(s, KVM_ASSIGN_DEV_IRQ, assigned_irq); -} - -return kvm_old_assign_irq(s, assigned_irq); -} -#else -int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq) -{ -return kvm_old_assign_irq(s, assigned_irq); -} -#endif #endif int kvm_device_irq_deassign(KVMState *s, uint32_t dev_id, uint32_t type) @@ -275,8 +250,8 @@ int kvm_has_gsi_routing(void) return r; } -int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry, - MSIRoutingCache *msi_cache) +static int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry, + MSIRoutingCache *msi_cache) { #ifdef KVM_CAP_IRQ_ROUTING KVMState *s = kvm_state; @@ -328,7 +303,7 @@ int kvm_add_irq_route(int gsi, int irqchip, int pin) #endif } -int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry) +static int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry) { #ifdef KVM_CAP_IRQ_ROUTING KVMState *s = kvm_state; @@ -398,8 +373,8 @@ int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry) #endif } -int kvm_update_routing_entry(struct kvm_irq_routing_entry *entry, - struct kvm_irq_routing_entry *newentry) +static int kvm_update_routing_entry(struct kvm_irq_routing_entry *entry, +struct kvm_irq_routing_entry *newentry) { #ifdef KVM_CAP_IRQ_ROUTING KVMState *s = kvm_state; @@ -456,7 +431,7 @@ int kvm_commit_irq_routes(void) static void kvm_msi_cache_flush(KVMState *s); -int kvm_get_irq_route_gsi(void) +static int kvm_get_irq_route_gsi(void) { KVMState *s = kvm_state; int i, bit; diff --git a/qemu-kvm.h b/qemu-kvm.h index 552b668..6b73ce1 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -139,17 +139,6 @@ int kvm_enable_vapic(CPUState *env, uint64_t vapic); int kvm_assign_pci_device(KVMState *s, struct kvm_assigned_pci_dev *assigned_dev); -/*! - * \brief Assign IRQ for an assigned device - * - * Used for PCI device assignment, this function assigns IRQ numbers for - * an physical device and guest IRQ handling. - * - * \param kvm Pointer to the current kvm_context - * \param assigned_irq Parameters, like dev id, host irq, guest irq, etc - */ -int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq); - int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, uint32_t host_irq_type, uint32_t guest_irq); int kvm_device_msi_assign(KVMState *s, uint32_t dev_id, MSIMessage *msg, @@ -182,34 +171,6 @@ int kvm_deassign_pci_device(KVMState *s, */ int kvm_add_irq_route(int gsi, int irqchip, int pin); -struct kvm_irq_routing_entry; -/*! - * \brief Adds a routing entry to the temporary irq routing table - * - * Adds a filled routing entry to the temporary irq routing table. Nothing is - * committed to the running VM. - */ -int kvm_add_routing_entry(struct
[Qemu-devel] [RFC][PATCH 44/45] pci-assign: Use generic MSI-X support
Switch MSI-X support of the device assignment core to the generic layer QEMU offers. As for legacy MSI, we use config notifiers to update IRQ assignment and routes on guest changes. Quite a bit code becomes obsolete in the device assigment core, e.g. the maintenance of the MSI-X vector masking MMIO page. Note that we have to reorder BAR mapping and capability initialization in order to pass the BAR container on msix_init. Also in this case we still do not support per-vector masking even after these changes. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 335 +--- hw/device-assignment.h | 14 +-- 2 files changed, 88 insertions(+), 261 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 10b30a3..df554b3 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -24,6 +24,7 @@ * Copyright (C) 2008, Qumranet, Amit Shah (amit.s...@qumranet.com) * Copyright (C) 2008, Red Hat, Amit Shah (amit.s...@redhat.com) * Copyright (C) 2008, IBM, Muli Ben-Yehuda (m...@il.ibm.com) + * Copyright (C) 2011, Siemens AG, Jan Kiszka (jan.kis...@siemens.com) */ #include stdio.h #include unistd.h @@ -41,6 +42,7 @@ #include range.h #include sysemu.h #include msi.h +#include msix.h #define MSIX_PAGE_SIZE 0x1000 @@ -64,8 +66,6 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev); -static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); - static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -238,24 +238,11 @@ static void assigned_dev_iomem_setup(PCIDevice *pci_dev, int region_num, { AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); AssignedDevRegion *region = r_dev-v_addrs[region_num]; -PCIRegion *real_region = r_dev-real_device.regions[region_num]; if (e_size 0) { memory_region_init(region-container, assigned-dev-container, e_size); memory_region_add_subregion(region-container, 0, region-real_iomem); - -/* deal with MSI-X MMIO page */ -if (real_region-base_addr = r_dev-msix_table_addr -real_region-base_addr + real_region-size -r_dev-msix_table_addr) { -int offset = r_dev-msix_table_addr - real_region-base_addr; - -memory_region_add_subregion_overlap(region-container, -offset, -r_dev-mmio, -1); -} } } @@ -648,21 +635,20 @@ again: static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs); -static void invalidate_msix_vectors(AssignedDevice *dev) -{ -int i; - -for (i = 0; i dev-irq_entries_nr; i++) { -kvm_msi_cache_invalidate(dev-dev.msix_cache[i]); -} -} - static void free_assigned_device(AssignedDevice *dev) { +uint32_t table_bar_nr, pba_bar_nr; +uint8_t *msix_cap; int i; -if (dev-cap.available ASSIGNED_DEVICE_CAP_MSIX) { -assigned_dev_unregister_msix_mmio(dev); +if (msix_present(dev-dev)) { +msix_cap = dev-dev.config + dev-dev.msix_cap; +table_bar_nr = pci_get_long(msix_cap + PCI_MSIX_TABLE) +PCI_MSIX_FLAGS_BIRMASK; +pba_bar_nr = pci_get_long(msix_cap + PCI_MSIX_PBA) +PCI_MSIX_FLAGS_BIRMASK; +msix_uninit(dev-dev, dev-v_addrs[table_bar_nr].container, +dev-v_addrs[pba_bar_nr].container); } for (i = 0; i dev-real_device.region_number; i++) { PCIRegion *pci_region = dev-real_device.regions[i]; @@ -698,9 +684,6 @@ static void free_assigned_device(AssignedDevice *dev) if (dev-real_device.config_fd = 0) { close(dev-real_device.config_fd); } - -invalidate_msix_vectors(dev); -g_free(dev-dev.msix_cache); } static uint32_t calc_assigned_dev_id(AssignedDevice *dev) @@ -916,11 +899,13 @@ void assigned_dev_update_irqs(void) } } +/* used for both MSI and MSI-X */ static void assigned_dev_update_msi(PCIDevice *pci_dev, bool enabled) { AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); if (!enabled) { +dev-msix_vectors_in_use = 0; assign_intx(dev); } } @@ -945,113 +930,66 @@ static int assigned_dev_update_msi_vector(PCIDevice *pci_dev, return 0; } -static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev) +static int assigned_dev_update_msix_vector(PCIDevice *pci_dev, + unsigned int vector, + MSIMessage *msg, bool masked) { -AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev); -uint16_t entries_nr = 0, entries_max_nr; -void *msix_page = adev-msix_table_page; +AssignedDevice *dev =
[Qemu-devel] [RFC][PATCH 32/45] pci-assign: Factor out deassign_irq
Will have more users soon. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 30 ++ 1 files changed, 18 insertions(+), 12 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index e0b9cfe..e5ac54c 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -807,10 +807,25 @@ static int assign_device(AssignedDevice *dev) return r; } +static void deassign_irq(AssignedDevice *dev) +{ +int ret; + +if (dev-irq_requested_type) { +ret = kvm_device_irq_deassign(kvm_state, + calc_assigned_dev_id(dev), + dev-irq_requested_type); +if (ret) { +perror(assigned_dev: deassign irq); +} +dev-girq = -1; +dev-irq_requested_type = 0; +} +} + static int assign_intx(AssignedDevice *dev) { struct kvm_assigned_irq assigned_irq_data; -uint32_t dev_id; int irq, r; /* Interrupt PIN 0 means don't use INTx */ @@ -824,19 +839,10 @@ static int assign_intx(AssignedDevice *dev) return 0; } -dev_id = calc_assigned_dev_id(dev); - -if (dev-irq_requested_type) { -r = kvm_device_irq_deassign(kvm_state, dev_id, -dev-irq_requested_type); -if (r) { -perror(assign_intx: deassign); -} -dev-irq_requested_type = 0; -} +deassign_irq(dev); memset(assigned_irq_data, 0, sizeof(assigned_irq_data)); -assigned_irq_data.assigned_dev_id = dev_id; +assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(dev); assigned_irq_data.guest_irq = irq; assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX; if (dev-features ASSIGNED_DEVICE_PREFER_MSI_MASK -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 28/45] qemu-kvm: msix: Drop tracking of used vectors
This optimization was only required to keep KVM route usage low. Now that we solve that problem via lazy updates, we can drop the field. We still need interfaces to clear pending vectors, though (and we have to make use of them more broadly - but that's unrelated to this patch). Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/ivshmem.c| 16 ++--- hw/msix.c | 62 +++--- hw/msix.h |5 +-- hw/pci.h|2 - hw/virtio-pci.c | 20 +++-- 5 files changed, 26 insertions(+), 79 deletions(-) diff --git a/hw/ivshmem.c b/hw/ivshmem.c index 242fbea..a402c98 100644 --- a/hw/ivshmem.c +++ b/hw/ivshmem.c @@ -535,10 +535,8 @@ static uint64_t ivshmem_get_size(IVShmemState * s) { return value; } -static void ivshmem_setup_msi(IVShmemState * s) { - -int i; - +static void ivshmem_setup_msi(IVShmemState *s) +{ /* allocate the MSI-X vectors */ memory_region_init(s-msix_bar, ivshmem-msix, 4096); @@ -551,11 +549,6 @@ static void ivshmem_setup_msi(IVShmemState * s) { exit(1); } -/* 'activate' the vectors */ -for (i = 0; i s-vectors; i++) { -msix_vector_use(s-dev, i); -} - /* allocate Qemu char devices for receiving interrupts */ s-eventfd_table = g_malloc0(s-vectors * sizeof(EventfdEntry)); } @@ -581,7 +574,7 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int version_id) IVSHMEM_DPRINTF(ivshmem_load\n); IVShmemState *proxy = opaque; -int ret, i; +int ret; if (version_id 0) { return -EINVAL; @@ -599,9 +592,6 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int version_id) if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) { msix_load(proxy-dev, f); -for (i = 0; i proxy-vectors; i++) { -msix_vector_use(proxy-dev, i); -} } else { proxy-intrstatus = qemu_get_be32(f); proxy-intrmask = qemu_get_be32(f); diff --git a/hw/msix.c b/hw/msix.c index ce3375a..f1b97b5 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -292,9 +292,6 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, if (nentries MSIX_MAX_ENTRIES) return -EINVAL; -dev-msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES * -sizeof *dev-msix_entry_used); - dev-msix_table_page = g_malloc0(MSIX_PAGE_SIZE); msix_mask_all(dev, nentries); @@ -317,21 +314,9 @@ err_config: memory_region_destroy(dev-msix_mmio); g_free(dev-msix_table_page); dev-msix_table_page = NULL; -g_free(dev-msix_entry_used); -dev-msix_entry_used = NULL; return ret; } -static void msix_free_irq_entries(PCIDevice *dev) -{ -int vector; - -for (vector = 0; vector dev-msix_entries_nr; ++vector) { -dev-msix_entry_used[vector] = 0; -msix_clr_pending(dev, vector); -} -} - /* Clean up resources for the device. */ int msix_uninit(PCIDevice *dev, MemoryRegion *bar) { @@ -340,14 +325,11 @@ int msix_uninit(PCIDevice *dev, MemoryRegion *bar) } pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); dev-msix_cap = 0; -msix_free_irq_entries(dev); dev-msix_entries_nr = 0; memory_region_del_subregion(bar, dev-msix_mmio); memory_region_destroy(dev-msix_mmio); g_free(dev-msix_table_page); dev-msix_table_page = NULL; -g_free(dev-msix_entry_used); -dev-msix_entry_used = NULL; kvm_msix_free(dev); g_free(dev-msix_cache); @@ -376,7 +358,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f) return; } -msix_free_irq_entries(dev); qemu_get_buffer(f, dev-msix_table_page, n * PCI_MSIX_ENTRY_SIZE); qemu_get_buffer(f, dev-msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); } @@ -407,7 +388,7 @@ void msix_notify(PCIDevice *dev, unsigned vector) { MSIMessage msg; -if (vector = dev-msix_entries_nr || !dev-msix_entry_used[vector]) +if (vector = dev-msix_entries_nr) return; if (msix_is_masked(dev, vector)) { msix_set_pending(dev, vector); @@ -424,48 +405,31 @@ void msix_reset(PCIDevice *dev) if (!msix_present(dev)) { return; } -msix_free_irq_entries(dev); +msix_clear_all_vectors(dev); dev-config[dev-msix_cap + MSIX_CONTROL_OFFSET] = ~dev-wmask[dev-msix_cap + MSIX_CONTROL_OFFSET]; memset(dev-msix_table_page, 0, MSIX_PAGE_SIZE); msix_mask_all(dev, dev-msix_entries_nr); } -/* PCI spec suggests that devices make it possible for software to configure - * less vectors than supported by the device, but does not specify a standard - * mechanism for devices to do so. - * - * We support this by asking devices to declare vectors software is going to - * actually use, and checking this on the notification path. Devices that - * don't want to follow the spec suggestion can declare all vectors as used. */ - -/* Mark vector as used. */ -int
[Qemu-devel] [RFC][PATCH 43/45] msix: Allow to customize capability on init
This enables fully configurable MSI-X initialization by taking config space offset, independent table and PBA BARs and the offset inside them on msix_init. Table and PBA are now realized as two memory subregions, either of the passed BAR regions or the single page container msix_init_simple creates and registers. Will be required for device assignment. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msix.c | 245 +--- hw/msix.h |7 ++- hw/pci.h | 12 ++- 3 files changed, 150 insertions(+), 114 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 258b9c1..548e712 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -25,18 +25,12 @@ #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE 8) #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL 8) -/* How much space does an MSIX table need. */ -/* The spec requires giving the table structure - * a 4K aligned region all by itself. */ #define MSIX_PAGE_SIZE 0x1000 -/* Reserve second half of the page for pending bits */ -#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) -#define MSIX_MAX_ENTRIES 32 static void msix_message_from_vector(PCIDevice *dev, unsigned vector, MSIMessage *msg) { -uint8_t *table_entry = dev-msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; +uint8_t *table_entry = dev-msix_table + vector * PCI_MSIX_ENTRY_SIZE; msg-address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); msg-data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); @@ -54,67 +48,6 @@ static void kvm_msix_free(PCIDevice *dev) } } -/* Add MSI-X capability to the config space for the device. */ -/* Given a bar and its size, add MSI-X table on top of it - * and fill MSI-X capability in the config space. - * Original bar size must be a power of 2 or 0. - * New bar size is returned. */ -static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, - unsigned bar_nr, unsigned bar_size) -{ -int config_offset; -uint32_t new_size; -uint8_t *config; - -if (nentries 1 || nentries PCI_MSIX_FLAGS_QSIZE + 1) { -return -EINVAL; -} -if (bar_size 0x8000) { -return -ENOSPC; -} - -/* Add space for MSI-X structures */ -if (!bar_size) { -new_size = MSIX_PAGE_SIZE; -} else if (bar_size MSIX_PAGE_SIZE) { -bar_size = MSIX_PAGE_SIZE; -new_size = MSIX_PAGE_SIZE * 2; -} else { -new_size = bar_size * 2; -} - -config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, 0, - MSIX_CAP_LENGTH); -if (config_offset 0) { -return config_offset; -} -pdev-msix_cap = config_offset; - -config = pdev-config + config_offset; -pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); -/* Table on top of BAR */ -pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr); -/* Pending bits on top of that */ -pci_set_long(config + PCI_MSIX_PBA, - (bar_size + MSIX_PAGE_PENDING) | bar_nr); - -/* Make flags bit writable. */ -pdev-wmask[config_offset + MSIX_CONTROL_OFFSET] |= -MSIX_ENABLE_MASK | MSIX_MASKALL_MASK; - -return 0; -} - -static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr, - unsigned size) -{ -PCIDevice *dev = opaque; -unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x3; -void *page = dev-msix_table_page; - -return pci_get_long(page + offset); -} - static uint8_t msix_pending_mask(int vector) { return 1 (vector % 8); @@ -122,7 +55,7 @@ static uint8_t msix_pending_mask(int vector) static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) { -return dev-msix_table_page + MSIX_PAGE_PENDING + vector / 8; +return dev-msix_pba + vector / 8; } static int msix_is_pending(PCIDevice *dev, int vector) @@ -150,7 +83,7 @@ static bool msix_is_masked(PCIDevice *dev, int vector) unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; return msix_function_masked(dev) || - dev-msix_table_page[offset] PCI_MSIX_ENTRY_CTRL_MASKBIT; +dev-msix_table[offset] PCI_MSIX_ENTRY_CTRL_MASKBIT; } static void msix_fire_vector_config_notifier(PCIDevice *dev, @@ -213,18 +146,25 @@ void msix_write_config(PCIDevice *dev, uint32_t addr, } } -static void msix_mmio_write(void *opaque, target_phys_addr_t addr, -uint64_t val, unsigned size) +static uint64_t msix_table_read(void *opaque, target_phys_addr_t addr, +unsigned size) +{ +PCIDevice *dev = opaque; + +return pci_get_long(dev-msix_table + addr); +} + +static void msix_table_write(void *opaque, target_phys_addr_t addr, + uint64_t val, unsigned size) { PCIDevice *dev = opaque; -unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x3; -unsigned int
[Qemu-devel] [RFC][PATCH 38/45] msi: Implement config notifiers for legacy MSI
Realize support for MSI config notifiers analogously to MSI-X. The logic is slightly more complex for legacy MSI as per-vector masking is option here. Device assignment will be the first user. Note that this change does not introduce per-vector masking support. This can to be added at some later point, using the notifications the MSI layer provides now. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msi.c | 171 -- hw/msi.h |7 ++- hw/pci.c |2 +- hw/pci.h |3 + 4 files changed, 166 insertions(+), 17 deletions(-) diff --git a/hw/msi.c b/hw/msi.c index 23d79dd..2380ee3 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -241,15 +241,15 @@ void msi_uninit(struct PCIDevice *dev) void msi_reset(PCIDevice *dev) { -uint16_t flags; +uint16_t flags, old_flags; bool msi64bit; if (!msi_present(dev)) { return; } -flags = pci_get_word(dev-config + msi_flags_off(dev)); -flags = ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); +old_flags = pci_get_word(dev-config + msi_flags_off(dev)); +flags = old_flags ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); msi64bit = flags PCI_MSI_FLAGS_64BIT; pci_set_word(dev-config + msi_flags_off(dev), flags); @@ -262,6 +262,8 @@ void msi_reset(PCIDevice *dev) pci_set_long(dev-config + msi_mask_off(dev, msi64bit), 0); pci_set_long(dev-config + msi_pending_off(dev, msi64bit), 0); } +/* trigger notifier on potential changes */ +msi_write_config(dev, msi_flags_off(dev), old_flags, 2); MSI_DEV_PRINTF(dev, reset\n); } @@ -306,16 +308,20 @@ void msi_notify(PCIDevice *dev, unsigned int vector) } /* Normally called by pci_default_write_config(). */ -void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t old_val, int len) { uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev)); bool msi64bit = flags PCI_MSI_FLAGS_64BIT; bool msi_per_vector_mask = flags PCI_MSI_FLAGS_MASKBIT; +bool fire_vector_notifier = false; unsigned int nr_vectors; uint8_t log_num_vecs; uint8_t log_max_vecs; unsigned int vector; uint32_t pending; +MSIMessage msg; +bool enabled; +int ret; if (!msi_present(dev) || !ranges_overlap(addr, len, dev-msi_cap, msi_cap_sizeof(flags))) { @@ -342,7 +348,35 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) fprintf(stderr, \n); #endif -if (!(flags PCI_MSI_FLAGS_ENABLE)) { +enabled = flags PCI_MSI_FLAGS_ENABLE; +nr_vectors = msi_nr_vectors(flags); + +if (dev-msi_enable_notifier +range_covers_byte(addr, len, msi_flags_off(dev))) { +old_val = (msi_flags_off(dev) - addr) * 8; +if ((old_val PCI_MSI_FLAGS_ENABLE) != enabled) { +dev-msi_enable_notifier(dev, enabled); +if (enabled dev-msi_vector_config_notifier) { +fire_vector_notifier = true; +} +} +} +if (dev-msi_vector_config_notifier) { +if (ranges_overlap(addr, len, msi_address_lo_off(dev), + msi64bit ? 10 : 6)) { +fire_vector_notifier = true; +} +} +if (fire_vector_notifier) { +for (vector = 0; vector nr_vectors; ++vector) { +msi_message_from_vector(dev, flags, vector, msg); +ret = dev-msi_vector_config_notifier(dev, vector, msg, + msi_is_masked(dev, vector)); +assert(ret = 0); +} +} + +if (!enabled) { kvm_msi_free(dev); return; } @@ -375,13 +409,12 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) pci_set_word(dev-config + msi_flags_off(dev), flags); } -if (!msi_per_vector_mask) { -/* if per vector masking isn't supported, - there is no pending interrupt. */ +if (!msi_per_vector_mask || +!ranges_overlap(addr, len, msi_mask_off(dev, msi64bit), 4)) { return; } -nr_vectors = msi_nr_vectors(flags); +old_val = (msi_mask_off(dev, msi64bit) - addr) * 8; /* This will discard pending interrupts, if any. */ pending = pci_get_long(dev-config + msi_pending_off(dev, msi64bit)); @@ -390,13 +423,22 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) /* deliver pending interrupts which are unmasked */ for (vector = 0; vector nr_vectors; ++vector) { -if (msi_is_masked(dev, vector) || !(pending (1U vector))) { -continue; +bool is_masked = msi_is_masked(dev, vector); +unsigned int vector_mask = 1U vector; + +if (!fire_vector_notifier dev-msi_vector_config_notifier +(bool)(old_val vector_mask) != is_masked) { +msi_message_from_vector(dev, flags, vector,
Re: [Qemu-devel] [PATCH RFC v1 1/2] hyper-v: introduce Hyper-V support infrastructure.
On 2011-10-17 11:17, Vadim Rozenfeld wrote: with the following series of patches we are starting to implement some basic Microsoft Hyper-V Enlightenment functionality, like relaxed timing, spinlock, and virtual apic support. For more Hyper-V related information please see: Hypervisor Functional Specification v2.0: For Windows Server 2008 R2 at http://www.microsoft.com/download/en/details.aspx?displaylang=enid=18673 --- Makefile.target|2 + default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + target-i386/cpuid.c| 14 +++ target-i386/hyperv.c | 69 target-i386/hyperv.h | 30 +++ 6 files changed, 117 insertions(+), 0 deletions(-) create mode 100644 target-i386/hyperv.c create mode 100644 target-i386/hyperv.h diff --git a/Makefile.target b/Makefile.target index 40cc592..2c8e1b8 100644 --- a/Makefile.target +++ b/Makefile.target @@ -202,6 +202,8 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o obj-y += memory.o LIBS+=-lz +obj-$(CONFIG_HYPERV) += hyperv.o obj-i386-y + QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) QEMU_CFLAGS += $(VNC_JPEG_CFLAGS) diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 55589fa..ee69a0a 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -21,3 +21,4 @@ CONFIG_PIIX_PCI=y CONFIG_SOUND=y CONFIG_HPET=y CONFIG_APPLESMC=y +CONFIG_HYPERV=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 8895028..35b1c00 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -21,3 +21,4 @@ CONFIG_PIIX_PCI=y CONFIG_SOUND=y CONFIG_HPET=y CONFIG_APPLESMC=y +CONFIG_HYPERV=y Useless config options (that do not work anyway as Kevin noted). diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index 1e8bcff..50b2d0e 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -27,6 +27,8 @@ #include qemu-option.h #include qemu-config.h +#include hyperv.h + /* feature flags taken from Intel Processor Identification and the CPUID * Instruction and AMD's CPUID Specification. In cases of disagreement * between feature naming conventions, aliases may be added. @@ -716,6 +718,14 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) goto error; } x86_cpu_def-tsc_khz = tsc_freq / 1000; +} else if (!strcmp(featurestr, hv_spinlocks)) { + char* err; + numvalue = strtoul(val, err, 0); + if (!*val || *err) { +fprintf(stderr, bad numerical value %s\n, val); +goto error; +} +hyperv_set_spinlock_retries(numvalue); } else { fprintf(stderr, unrecognized feature %s\n, featurestr); goto error; @@ -724,6 +734,10 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) check_cpuid = 1; } else if (!strcmp(featurestr, enforce)) { check_cpuid = enforce_cpuid = 1; +} else if (!strcmp(featurestr, hv_relaxed)) { +hyperv_set_relaxed_timing(1); +} else if (!strcmp(featurestr, hv_vapic)) { +hyperv_set_vapic_recommended(1); } else { fprintf(stderr, feature string `%s' not in format (+feature|-feature|feature=xyz)\n, featurestr); goto error; diff --git a/target-i386/hyperv.c b/target-i386/hyperv.c new file mode 100644 index 000..bed859e --- /dev/null +++ b/target-i386/hyperv.c @@ -0,0 +1,69 @@ +/* + * QEMU Hyper-V support + * + * Copyright Red Hat, Inc. 2011 + * + * Author: Vadim Rozenfeld vroze...@redhat.com + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include hyperv.h + +static int hyperv_vapic; +static int hyperv_relaxed_timing; +static int hyperv_spinlock_attempts = HYPERV_SPINLOCK_NEVER_RETRY; + +void hyperv_set_vapic_recommended(int val) +{ +hyperv_vapic = val; +} + +void hyperv_set_relaxed_timing(int val) +{ +hyperv_relaxed_timing = val; +} + +void hyperv_set_spinlock_retries(int val) +{ +hyperv_spinlock_attempts = val; +if (hyperv_spinlock_attempts 0xFFF) { +hyperv_spinlock_attempts = 0xFFF; +} +} hyperv_enabled_x(bool enable) would be nicer. + +int hyperv_enabled(void) +{ +return hyperv_hypercall_available() || hyperv_get_relaxed_timing(); +} + +int hyperv_hypercall_available(void) +{ +if (hyperv_vapic || +(hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY)) { + return 1; +
Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation
On 10/16/2011 05:39 PM, Avi Kivity wrote: On 10/14/2011 11:03 AM, Lai Jiangshan wrote: Currently, NMI interrupt is blindly sent to all the vCPUs when NMI button event happens. This doesn't properly emulate real hardware on which NMI button event triggers LINT1. Because of this, NMI is sent to the processor even when LINT1 is masked in LVT. For example, this causes the problem that kdump initiated by NMI sometimes doesn't work on KVM, because kdump assumes NMI is masked on CPUs other than CPU0. With this patch, we introduce introduce KVM_SET_LINT1, and we can use KVM_SET_LINT1 to correctly emulate NMI button without change the old KVM_NMI behavior. @@ -759,6 +762,8 @@ struct kvm_clock_data { #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_SET_LINT1 for x86 */ +#define KVM_SET_LINT1 _IO(KVMIO, 0xaa) LINT1 may have been programmed as a level -triggered interrupt instead of edge triggered (NMI or interrupt). We can use the ioctl argument for the level (and pressing the NMI button needs to pulse the level to 1 and back to 0). Hi, Avi, Jan, Which approach you prefer to? I need to know the result before wasting too much time to respin the approach. 1) Fix KVM_NMI emulation approach (which is v3 patchset) - It directly fixes the problem and matches the real hard ware more, but it changes KVM_NMI bahavior. - Require both kernel-site and userspace-site fix. 2) Get the LAPIC state from kernel irqchip, and inject NMI if it is allowed (which is v4 patchset) - Simple, don't changes any kernel behavior. - Only need the userspace-site fix 3) Add KVM_SET_LINT1 approach (which is v5 patchset) - don't changes the kernel's KVM_NMI behavior. - much complex - Require both kernel-site and userspace-site fix. - userspace-site should also handle the !KVM_SET_LINT1 condition, it uses all the 2) approach' code. it means this approach equals the 2) approach + KVM_SET_LINT1 ioctl. This is an urgent bug of us, we need to settle it down soon. Thanks, Lai
Re: [Qemu-devel] [PATCH RFC v1 2/2] hyper-v: initialize Hyper-V CPUID leafs.
On 10/17/2011 11:17 AM, Vadim Rozenfeld wrote: @@ -379,11 +380,16 @@ int kvm_arch_init_vcpu(CPUState *env) cpuid_i = 0; /* Paravirtualization CPUIDs */ -memcpy(signature, KVMKVMKVM\0\0\0, 12); c =cpuid_data.entries[cpuid_i++]; memset(c, 0, sizeof(*c)); c-function = KVM_CPUID_SIGNATURE; -c-eax = 0; +if (!hyperv_enabled()) { +memcpy(signature, KVMKVMKVM\0\0\0, 12); +c-eax = 0; +} else { +memcpy(signature, Microsoft Hv, 12); +c-eax = HYPERV_CPUID_MIN; +} Even not counting that hyper-v support should IMHO not be in KVM-specific code, I still think this shouldn't remove KVM leaves completely but rather move them to 0x4100. The KVM paravirtualization code then can similarly probe with 0x100 stride up to 0x40001000. This is what was done for Xen, and it allows to enable enlightenments independent of whether the guest is Linux or Windows. However, let's get a third opinion---Avi, what do you think? Paolo
[Qemu-devel] [RFC][PATCH 05/45] msi: Invoke msi/msix_write_config from PCI core
Also this functions is better invoked by the core than by each and every device. This allows to drop the config_write callbacks from ich and intel-hda. CC: Alexander Graf ag...@suse.de CC: Gerd Hoffmann kra...@redhat.com CC: Isaku Yamahata yamah...@valinux.co.jp Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/ide/ich.c|8 hw/intel-hda.c | 12 hw/ioh3420.c|1 - hw/msi.c|2 +- hw/pci.c|3 +++ hw/virtio-pci.c |2 -- hw/xio3130_downstream.c |1 - hw/xio3130_upstream.c |1 - 8 files changed, 4 insertions(+), 26 deletions(-) diff --git a/hw/ide/ich.c b/hw/ide/ich.c index 3f7510f..a470c01 100644 --- a/hw/ide/ich.c +++ b/hw/ide/ich.c @@ -139,13 +139,6 @@ static int pci_ich9_uninit(PCIDevice *dev) return 0; } -static void pci_ich9_write_config(PCIDevice *pci, uint32_t addr, - uint32_t val, int len) -{ -pci_default_write_config(pci, addr, val, len); -msi_write_config(pci, addr, val, len); -} - static PCIDeviceInfo ich_ahci_info[] = { { .qdev.name= ich9-ahci, @@ -154,7 +147,6 @@ static PCIDeviceInfo ich_ahci_info[] = { .qdev.vmsd= vmstate_ahci, .init = pci_ich9_ahci_init, .exit = pci_ich9_uninit, -.config_write = pci_ich9_write_config, .vendor_id= PCI_VENDOR_ID_INTEL, .device_id= PCI_DEVICE_ID_INTEL_82801IR, .revision = 0x02, diff --git a/hw/intel-hda.c b/hw/intel-hda.c index 4272204..0453039 100644 --- a/hw/intel-hda.c +++ b/hw/intel-hda.c @@ -1156,17 +1156,6 @@ static int intel_hda_exit(PCIDevice *pci) return 0; } -static void intel_hda_write_config(PCIDevice *pci, uint32_t addr, - uint32_t val, int len) -{ -IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci); - -pci_default_write_config(pci, addr, val, len); -if (d-msi) { -msi_write_config(pci, addr, val, len); -} -} - static int intel_hda_post_load(void *opaque, int version) { IntelHDAState* d = opaque; @@ -1250,7 +1239,6 @@ static PCIDeviceInfo intel_hda_info = { .qdev.reset = intel_hda_reset, .init = intel_hda_init, .exit = intel_hda_exit, -.config_write = intel_hda_write_config, .vendor_id= PCI_VENDOR_ID_INTEL, .device_id= 0x2668, .revision = 1, diff --git a/hw/ioh3420.c b/hw/ioh3420.c index fc2fb3b..886ede8 100644 --- a/hw/ioh3420.c +++ b/hw/ioh3420.c @@ -71,7 +71,6 @@ static void ioh3420_write_config(PCIDevice *d, pci_get_long(d-config + d-exp.aer_cap + PCI_ERR_ROOT_COMMAND); pci_bridge_write_config(d, address, val, len); -msi_write_config(d, address, val, len); ioh3420_aer_vector_update(d); pcie_cap_slot_write_config(d, address, val, len); pcie_aer_write_config(d, address, val, len); diff --git a/hw/msi.c b/hw/msi.c index b117f69..c924e38 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -369,7 +369,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector) stl_le_phys(address, data); } -/* call this function after updating configs by pci_default_write_config(). */ +/* Normally called by pci_default_write_config(). */ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) { uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev)); diff --git a/hw/pci.c b/hw/pci.c index 933d49e..6673989 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -1154,6 +1154,9 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) if (range_covers_byte(addr, l, PCI_COMMAND)) pci_update_irq_disabled(d, was_irq_disabled); + +msi_write_config(d, addr, val, l); +msix_write_config(d, addr, val, l); } /***/ diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 3fb250f..615295e 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -502,8 +502,6 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, virtio_set_status(proxy-vdev, proxy-vdev-status ~VIRTIO_CONFIG_S_DRIVER_OK); } - -msix_write_config(pci_dev, address, val, len); } static unsigned virtio_pci_get_features(void *opaque) diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c index 464eefa..8e9117d 100644 --- a/hw/xio3130_downstream.c +++ b/hw/xio3130_downstream.c @@ -41,7 +41,6 @@ static void xio3130_downstream_write_config(PCIDevice *d, uint32_t address, pci_bridge_write_config(d, address, val, len); pcie_cap_flr_write_config(d, address, val, len); pcie_cap_slot_write_config(d, address, val, len); -msi_write_config(d, address, val, len); pcie_aer_write_config(d, address, val, len); } diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c index 0d8d254..707401e 100644 --- a/hw/xio3130_upstream.c +++ b/hw/xio3130_upstream.c @@
Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation
On 10/17/2011 11:40 AM, Lai Jiangshan wrote: LINT1 may have been programmed as a level -triggered interrupt instead of edge triggered (NMI or interrupt). We can use the ioctl argument for the level (and pressing the NMI button needs to pulse the level to 1 and back to 0). Hi, Avi, Jan, Which approach you prefer to? I need to know the result before wasting too much time to respin the approach. Yes, sorry about the slow and sometimes conflicting feedback. 1) Fix KVM_NMI emulation approach (which is v3 patchset) - It directly fixes the problem and matches the real hard ware more, but it changes KVM_NMI bahavior. - Require both kernel-site and userspace-site fix. 2) Get the LAPIC state from kernel irqchip, and inject NMI if it is allowed (which is v4 patchset) - Simple, don't changes any kernel behavior. - Only need the userspace-site fix 3) Add KVM_SET_LINT1 approach (which is v5 patchset) - don't changes the kernel's KVM_NMI behavior. - much complex - Require both kernel-site and userspace-site fix. - userspace-site should also handle the !KVM_SET_LINT1 condition, it uses all the 2) approach' code. it means this approach equals the 2) approach + KVM_SET_LINT1 ioctl. This is an urgent bug of us, we need to settle it down soo While (1) is simple, it overloads a single ioctl with two meanings, that's not so good. Whether we do (1) or (3), we need (2) as well, for older kernels. So I recommend first focusing on (2) and merging it, then doing (3). (note an additional issue with 3 is whether to make it a vm or vcpu ioctl - we've been assuming vcpu ioctl but it's not necessarily the best choice). -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation
On 10/17/2011 11:17 AM, Lai Jiangshan wrote: On 10/16/2011 05:39 PM, Avi Kivity wrote: On 10/14/2011 11:03 AM, Lai Jiangshan wrote: Currently, NMI interrupt is blindly sent to all the vCPUs when NMI button event happens. This doesn't properly emulate real hardware on which NMI button event triggers LINT1. Because of this, NMI is sent to the processor even when LINT1 is masked in LVT. For example, this causes the problem that kdump initiated by NMI sometimes doesn't work on KVM, because kdump assumes NMI is masked on CPUs other than CPU0. With this patch, we introduce introduce KVM_SET_LINT1, and we can use KVM_SET_LINT1 to correctly emulate NMI button without change the old KVM_NMI behavior. @@ -759,6 +762,8 @@ struct kvm_clock_data { #define KVM_CREATE_SPAPR_TCE_IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA_IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_SET_LINT1 for x86 */ +#define KVM_SET_LINT1 _IO(KVMIO, 0xaa) LINT1 may have been programmed as a level -triggered interrupt instead of edge triggered (NMI or interrupt). We can use the ioctl argument for the level (and pressing the NMI button needs to pulse the level to 1 and back to 0). Hi, Avi, How to handle level=0 in the kernel? Or just ignore it? It needs to be handled according to the delivery mode, polarity, and trigger mode bits in the LVT. For example, a Fixed delivery mode with polarity 1 and level trigger mode will post the interrupt as long as it is in level 0 and not masked by the ISR. __apic_accept_irq() should handle this. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH] fix memory leak in aio_write_f
Am 28.09.2011 08:57, schrieb a...@redhat.com: From: Alex Jia a...@redhat.com Haven't released memory of 'ctx' before return. Signed-off-by: Alex Jia a...@redhat.com Thanks, applied to the block branch. Kevin
Re: [Qemu-devel] [PATCH v2 0/2] spice migration interface v2 (RHBZ 737921)
On 10/17/11 10:03, Yonit Halperin wrote: Same as the previous series with a small fix to allow compliation without Spice disabled. Replaced patches. Thanks, Gerd
[Qemu-devel] [PATCH] change free() to g_free() to pair with g_malloc() series.
Signed-off-by: Ray Wang rayw...@linux.vnet.ibm.com --- block/dmg.c | 14 +++--- target-arm/helper.c |2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/block/dmg.c b/block/dmg.c index 64c3cce..661f31b 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -284,14 +284,14 @@ static void dmg_close(BlockDriverState *bs) { BDRVDMGState *s = bs-opaque; if(s-n_chunks0) { - free(s-types); - free(s-offsets); - free(s-lengths); - free(s-sectors); - free(s-sectorcounts); + g_free(s-types); + g_free(s-offsets); + g_free(s-lengths); + g_free(s-sectors); + g_free(s-sectorcounts); } -free(s-compressed_chunk); -free(s-uncompressed_chunk); +g_free(s-compressed_chunk); +g_free(s-uncompressed_chunk); inflateEnd(s-zstream); } diff --git a/target-arm/helper.c b/target-arm/helper.c index e2428eb..2b17dc9 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -471,7 +471,7 @@ static uint32_t cpu_arm_find_by_name(const char *name) void cpu_arm_close(CPUARMState *env) { -free(env); +g_free(env); } uint32_t cpsr_read(CPUARMState *env) -- 1.7.4.1
[Qemu-devel] [RFC][PATCH 30/45] pci-assign: Rename assign_irq to assign_intx
The previous name may incorrectly suggest that this function assigns all types of IRQs though it's only dealing with legacy interrupts. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 14 +++--- 1 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 799b816..4e4349b 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -807,7 +807,7 @@ static int assign_device(AssignedDevice *dev) return r; } -static int assign_irq(AssignedDevice *dev) +static int assign_intx(AssignedDevice *dev) { struct kvm_assigned_irq assigned_irq_data; int irq, r = 0; @@ -829,7 +829,7 @@ static int assign_irq(AssignedDevice *dev) assigned_irq_data.flags = dev-irq_requested_type; r = kvm_deassign_irq(kvm_state, assigned_irq_data); if (r) { -perror(assign_irq: deassign); +perror(assign_intx: deassign); } dev-irq_requested_type = 0; } @@ -898,7 +898,7 @@ void assigned_dev_update_irqs(void) while (dev) { next = QLIST_NEXT(dev, next); if (dev-irq_requested_type KVM_DEV_IRQ_HOST_INTX) { -r = assign_irq(dev); +r = assign_intx(dev); if (r 0) { qdev_unplug(dev-dev.qdev); } @@ -967,7 +967,7 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev) assigned_dev-girq = -1; assigned_dev-irq_requested_type = assigned_irq_data.flags; } else { -assign_irq(assigned_dev); +assign_intx(assigned_dev); } } @@ -1102,7 +1102,7 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev) assigned_dev-girq = -1; assigned_dev-irq_requested_type = assigned_irq_data.flags; } else { -assign_irq(assigned_dev); +assign_intx(assigned_dev); } } @@ -1645,8 +1645,8 @@ static int assigned_initfn(struct PCIDevice *pci_dev) if (r 0) goto out; -/* assign irq for the device */ -r = assign_irq(dev); +/* assign legacy INTx to the device */ +r = assign_intx(dev); if (r 0) goto assigned_out; -- 1.7.3.4
Re: [Qemu-devel] [PATCHv3] ps2: migrate ledstate
On Mon, Oct 17, 2011 at 11:25:42AM +0200, Gerd Hoffmann wrote: static const VMStateDescription vmstate_ps2_common = { .name = PS2 Common State, -.version_id = 3, +.version_id = 4, .minimum_version_id = 2, .minimum_version_id_old = 2, .fields = (VMStateField []) { version_id in vmstate_ps2_keyboard must be updated too. Yeah, I somehow updated the field in the wrong struct, /me blushes and hides. I don't think this struct version needs to be updated. The version update in vmstate_ps2_common might not be needed, IIRC the versions for stuff referenced via VMSTATE_STRUCT() isn't used anyway, Juan? Ah, ok, I hoped it would help to handle migration between versions with and without this field, I guess I was too optimistic :) Thanks, Christophe pgpMro5SS9BOn.pgp Description: PGP signature
Re: [Qemu-devel] [PATCH v8 1/4] block: add the block queue support
Am 26.09.2011 10:01, schrieb Zhi Yong Wu: On Fri, Sep 23, 2011 at 11:32 PM, Kevin Wolf kw...@redhat.com wrote: Am 08.09.2011 12:11, schrieb Zhi Yong Wu: Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com --- Makefile.objs |2 +- block/blk-queue.c | 201 + block/blk-queue.h | 59 block_int.h | 27 +++ 4 files changed, 288 insertions(+), 1 deletions(-) create mode 100644 block/blk-queue.c create mode 100644 block/blk-queue.h diff --git a/Makefile.objs b/Makefile.objs index 26b885b..5dcf456 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -33,7 +33,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blk-queue.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o block-nested-$(CONFIG_CURL) += curl.o diff --git a/block/blk-queue.c b/block/blk-queue.c new file mode 100644 index 000..adef497 --- /dev/null +++ b/block/blk-queue.c @@ -0,0 +1,201 @@ +/* + * QEMU System Emulator queue definition for block layer + * + * Copyright (c) IBM, Corp. 2011 + * + * Authors: + * Zhi Yong Wu wu...@linux.vnet.ibm.com + * Stefan Hajnoczi stefa...@linux.vnet.ibm.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include block_int.h +#include block/blk-queue.h +#include qemu-common.h + +/* The APIs for block request queue on qemu block layer. + */ + +struct BlockQueueAIOCB { +BlockDriverAIOCB common; +QTAILQ_ENTRY(BlockQueueAIOCB) entry; +BlockRequestHandler *handler; +BlockDriverAIOCB *real_acb; + +int64_t sector_num; +QEMUIOVector *qiov; +int nb_sectors; +}; The idea is that each request is first queued on the QTAILQ, and at some point it's removed from the queue and gets a real_acb. But it never has both at the same time. Correct? NO. if block I/O throttling is enabled and I/O rate at runtime exceed this limits, this request will be enqueued. It represents the whole lifecycle of one enqueued request. What are the conditions under which the request will still be enqueued, but has a real_acb at the same time? + +typedef struct BlockQueueAIOCB BlockQueueAIOCB; + +struct BlockQueue { +QTAILQ_HEAD(requests, BlockQueueAIOCB) requests; +bool req_failed; +bool flushing; +}; I find req_failed pretty confusing. Needs documentation at least, but most probably also a better name. OK. request_has_failed? No, that doesn't describe what it's really doing. You set req_failed = true by default and then on some obscure condition clear it or not. It's tracking something, but I'm not sure what meaning it has during the whole process. + +static void qemu_block_queue_dequeue(BlockQueue *queue, + BlockQueueAIOCB *request) +{ +BlockQueueAIOCB *req; + +assert(queue); +while (!QTAILQ_EMPTY(queue-requests)) { +req = QTAILQ_FIRST(queue-requests); +if (req == request) { +QTAILQ_REMOVE(queue-requests, req, entry); +break; +} +} +} Is it just me or is this an endless loop if the request isn't the first element in the list? queue-requests is only used to store requests which exceed the limits. Why is the request not the first evlement? Why do you have a loop if it's always the first element? +void qemu_del_block_queue(BlockQueue *queue) +{ +BlockQueueAIOCB *request, *next; + +QTAILQ_FOREACH_SAFE(request, queue-requests,
[Qemu-devel] [RFC][PATCH 06/45] msix: Prevent bogus mask updates on MMIO accesses
Only accesses to the MSI-X table must trigger a call to msix_handle_mask_update or a notifier invocation. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msix.c | 16 ++-- 1 files changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 2c4de21..33cb716 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -264,18 +264,22 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, { PCIDevice *dev = opaque; unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x3; -int vector = offset / PCI_MSIX_ENTRY_SIZE; +unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE; int was_masked = msix_is_masked(dev, vector); pci_set_long(dev-msix_table_page + offset, val); if (kvm_enabled() kvm_irqchip_in_kernel()) { kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector)); } -if (was_masked != msix_is_masked(dev, vector) dev-msix_mask_notifier) { -int r = dev-msix_mask_notifier(dev, vector, - msix_is_masked(dev, vector)); -assert(r = 0); + +if (vector dev-msix_entries_nr) { +if (was_masked != msix_is_masked(dev, vector) +dev-msix_mask_notifier) { +int r = dev-msix_mask_notifier(dev, vector, +msix_is_masked(dev, vector)); +assert(r = 0); +} +msix_handle_mask_update(dev, vector); } -msix_handle_mask_update(dev, vector); } static const MemoryRegionOps msix_mmio_ops = { -- 1.7.3.4
Re: [Qemu-devel] [PATCH v8 2/4] block: add the command line support
Am 26.09.2011 08:15, schrieb Zhi Yong Wu: On Fri, Sep 23, 2011 at 11:54 PM, Kevin Wolf kw...@redhat.com wrote: +} + +static void bdrv_block_timer(void *opaque) +{ +BlockDriverState *bs = opaque; +BlockQueue *queue= bs-block_queue; + +qemu_block_queue_flush(queue); Hm, didn't really notice it while reading patch 1, but qemu_block_queue_flush() is misleading. It's really something like Why do you say this is misleading? qemu_block_queue_submit(). Right. It will resubmit all enqueued I/O requests. For me, flush sounds as if it waits for completion of all requests. Kevin
[Qemu-devel] [RFC][PATCH 29/45] pci-assign: Drop kvm_assigned_irq::host_irq initialization
real_device.irq is never set explicitly, thus remains 0. So we can simply drop this line as assigned_irq_data is zero-initialized anyway. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c |1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 07e9f5a..799b816 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -825,7 +825,6 @@ static int assign_irq(AssignedDevice *dev) memset(assigned_irq_data, 0, sizeof(assigned_irq_data)); assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(dev); assigned_irq_data.guest_irq = irq; -assigned_irq_data.host_irq = dev-real_device.irq; if (dev-irq_requested_type) { assigned_irq_data.flags = dev-irq_requested_type; r = kvm_deassign_irq(kvm_state, assigned_irq_data); -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 42/45] msix: Introduce msix_init_simple
Devices models are usually not interested in specifying MSI-X configuration details beyond the number of vectors to provide and the BAR number to use. Layout of an exclusively used BAR and its registration can also be handled centrally. This is the purpose of msix_init_simple. It provides handy services to the existing users. Future users like device assignment may require more detailed setup specification. For them we will (re-)introduce msix_init with the full list of configuration option (in contrast to the current code). Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/ivshmem.c|6 +- hw/msix.c | 35 ++- hw/msix.h |7 +++ hw/virtio-pci.c | 15 +-- hw/virtio-pci.h |1 - 5 files changed, 23 insertions(+), 41 deletions(-) diff --git a/hw/ivshmem.c b/hw/ivshmem.c index a402c98..d9dbd18 100644 --- a/hw/ivshmem.c +++ b/hw/ivshmem.c @@ -65,7 +65,6 @@ typedef struct IVShmemState { */ MemoryRegion bar; MemoryRegion ivshmem; -MemoryRegion msix_bar; uint64_t ivshmem_size; /* size of shared memory region */ int shm_fd; /* shared memory file descriptor */ @@ -539,10 +538,7 @@ static void ivshmem_setup_msi(IVShmemState *s) { /* allocate the MSI-X vectors */ -memory_region_init(s-msix_bar, ivshmem-msix, 4096); -if (!msix_init(s-dev, s-vectors, s-msix_bar, 1, 0)) { -pci_register_bar(s-dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, - s-msix_bar); +if (!msix_init_simple(s-dev, s-vectors, 1)) { IVSHMEM_DPRINTF(msix initialized (%d vectors)\n, s-vectors); } else { IVSHMEM_DPRINTF(msix initialization failed\n); diff --git a/hw/msix.c b/hw/msix.c index bccd8b1..258b9c1 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -244,17 +244,6 @@ static const MemoryRegionOps msix_mmio_ops = { }, }; -static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar) -{ -uint8_t *config = d-config + d-msix_cap; -uint32_t table = pci_get_long(config + PCI_MSIX_TABLE); -uint32_t offset = table ~(MSIX_PAGE_SIZE - 1); -/* TODO: for assigned devices, we'll want to make it possible to map - * pending bits separately in case they are in a separate bar. */ - -memory_region_add_subregion(bar, offset, d-msix_mmio); -} - static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) { int vector; @@ -272,11 +261,9 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) } } -/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is - * modified, it should be retrieved with msix_bar_size. */ -int msix_init(struct PCIDevice *dev, unsigned short nentries, - MemoryRegion *bar, - unsigned bar_nr, unsigned bar_size) +/* Initialize the MSI-X structures in a single dedicated BAR + * and register it. */ +int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr) { int ret; @@ -296,14 +283,16 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, msix, MSIX_PAGE_SIZE); dev-msix_entries_nr = nentries; -ret = msix_add_config(dev, nentries, bar_nr, bar_size); +ret = msix_add_config(dev, nentries, bar_nr, 0); if (ret) goto err_config; dev-msix_cache = g_malloc0(nentries * sizeof *dev-msix_cache); dev-cap_present |= QEMU_PCI_CAP_MSIX; -msix_mmio_setup(dev, bar); + +pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY, + dev-msix_mmio); return 0; err_config: @@ -315,10 +304,10 @@ err_config: } /* Clean up resources for the device. */ -int msix_uninit(PCIDevice *dev, MemoryRegion *bar) +void msix_uninit(PCIDevice *dev, MemoryRegion *bar) { if (!msix_present(dev)) { -return 0; +return; } pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); dev-msix_cap = 0; @@ -332,7 +321,11 @@ int msix_uninit(PCIDevice *dev, MemoryRegion *bar) g_free(dev-msix_cache); dev-cap_present = ~QEMU_PCI_CAP_MSIX; -return 0; +} + +void msix_uninit_simple(PCIDevice *dev) +{ +msix_uninit(dev, dev-msix_mmio); } void msix_save(PCIDevice *dev, QEMUFile *f) diff --git a/hw/msix.h b/hw/msix.h index dfc6087..56e7ba5 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -4,14 +4,13 @@ #include qemu-common.h #include pci.h -int msix_init(PCIDevice *pdev, unsigned short nentries, - MemoryRegion *bar, - unsigned bar_nr, unsigned bar_size); +int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr); void msix_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t old_val, int len); -int msix_uninit(PCIDevice *d, MemoryRegion *bar); +void msix_uninit(PCIDevice *d, MemoryRegion *bar); +void msix_uninit_simple(PCIDevice *d); void msix_save(PCIDevice *dev, QEMUFile *f); void msix_load(PCIDevice *dev, QEMUFile *f); diff --git
Re: [Qemu-devel] [PATCH 1/1 V5] kernel/kvm: introduce KVM_SET_LINT1 and fix improper nmi emulation
On 2011-10-17 11:54, Avi Kivity wrote: On 10/17/2011 11:17 AM, Lai Jiangshan wrote: On 10/16/2011 05:39 PM, Avi Kivity wrote: On 10/14/2011 11:03 AM, Lai Jiangshan wrote: Currently, NMI interrupt is blindly sent to all the vCPUs when NMI button event happens. This doesn't properly emulate real hardware on which NMI button event triggers LINT1. Because of this, NMI is sent to the processor even when LINT1 is masked in LVT. For example, this causes the problem that kdump initiated by NMI sometimes doesn't work on KVM, because kdump assumes NMI is masked on CPUs other than CPU0. With this patch, we introduce introduce KVM_SET_LINT1, and we can use KVM_SET_LINT1 to correctly emulate NMI button without change the old KVM_NMI behavior. @@ -759,6 +762,8 @@ struct kvm_clock_data { #define KVM_CREATE_SPAPR_TCE_IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA_IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_SET_LINT1 for x86 */ +#define KVM_SET_LINT1 _IO(KVMIO, 0xaa) LINT1 may have been programmed as a level -triggered interrupt instead of edge triggered (NMI or interrupt). We can use the ioctl argument for the level (and pressing the NMI button needs to pulse the level to 1 and back to 0). Hi, Avi, How to handle level=0 in the kernel? Or just ignore it? It needs to be handled according to the delivery mode, polarity, and trigger mode bits in the LVT. For example, a Fixed delivery mode with polarity 1 and level trigger mode will post the interrupt as long as it is in level 0 and not masked by the ISR. __apic_accept_irq() should handle this. But I think it's not yet fully prepared for this (level is only considered for APIC_DM_INIT e.g.). Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux
[Qemu-devel] [RFC][PATCH 26/45] qemu-kvm: Use g_realloc for irq_routes extension
Allows to drop checking for out-of-memory. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- qemu-kvm.c |7 +-- 1 files changed, 1 insertions(+), 6 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 6bdd7b5..eb8f176 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -258,7 +258,6 @@ int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry, { #ifdef KVM_CAP_IRQ_ROUTING KVMState *s = kvm_state; -struct kvm_irq_routing *z; struct kvm_irq_routing_entry *new; int n, size; @@ -269,12 +268,8 @@ int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry, } size = sizeof(struct kvm_irq_routing); size += n * sizeof(*new); -z = realloc(s-irq_routes, size); -if (!z) { -return -ENOMEM; -} +s-irq_routes = g_realloc(s-irq_routes, size); s-nr_allocated_irq_routes = n; -s-irq_routes = z; s-msi_cache = g_realloc(s-msi_cache, sizeof(*s-msi_cache) * n); } -- 1.7.3.4
Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm
Am 26.09.2011 09:24, schrieb Zhi Yong Wu: On Sat, Sep 24, 2011 at 12:19 AM, Kevin Wolf kw...@redhat.com wrote: Am 08.09.2011 12:11, schrieb Zhi Yong Wu: Note: 1.) When bps/iops limits are specified to a small value such as 511 bytes/s, this VM will hang up. We are considering how to handle this senario. 2.) When dd command is issued in guest, if its option bs is set to a large value such as bs=1024K, the result speed will slightly bigger than the limits. For these problems, if you have nice thought, pls let us know.:) Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com --- block.c | 259 --- block.h |1 - 2 files changed, 248 insertions(+), 12 deletions(-) One general comment: What about synchronous and/or coroutine I/O operations? Do you think they are just not important enough to consider here or were they forgotten? For sync ops, we assume that it will be converse into async mode at some point of future, right? For coroutine I/O, it is introduced in image driver layer, and behind bdrv_aio_readv/writev. I think that we need not consider them, right? Meanwhile the block layer has been changed to handle all requests in terms of coroutines. So you would best move your intercepting code into the coroutine functions. Also, do I understand correctly that you're always submitting the whole Right, when the block timer fire, it will flush whole request queue. queue at once? Does this effectively enforce the limit all the time or will it lead to some peaks and then no requests at all for a while until In fact, it only try to submit those enqueued request one by one. If fail to pass the limit, this request will be enqueued again. Right, I missed this. Makes sense. the average is right again? Yeah, it is possible. Do you better idea? Maybe some documentation on how it all works from a high level perspective would be helpful. +/* throttling disk read I/O */ +if (bs-io_limits_enabled) { +if (bdrv_exceed_io_limits(bs, nb_sectors, false, wait_time)) { +ret = qemu_block_queue_enqueue(bs-block_queue, bs, bdrv_aio_readv, + sector_num, qiov, nb_sectors, cb, opaque); +printf(wait_time=%ld\n, wait_time); +if (wait_time != -1) { +printf(reset block timer\n); +qemu_mod_timer(bs-block_timer, + wait_time + qemu_get_clock_ns(vm_clock)); +} + +if (ret) { +printf(ori ret is not null\n); +} else { +printf(ori ret is null\n); +} + +return ret; +} +} -return drv-bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, +ret = drv-bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, cb, opaque); +if (ret) { +if (bs-io_limits_enabled) { +bs-io_disps.bytes[BLOCK_IO_LIMIT_READ] += + (unsigned) nb_sectors * BDRV_SECTOR_SIZE; +bs-io_disps.ios[BLOCK_IO_LIMIT_READ]++; +} I wonder if you can't reuse bs-nr_bytes/nr_ops instead of introducing a second counting mechanism. Would have the advantage that numbers are NO, our counting variables will be reset to ZERO if current slice time(0.1ms) is used up. Instead of setting the counter to zero you could remember the base value and calculate the difference when you need it. The advantage is that we can share infrastructure instead of introducing several subtly different ways of I/O accounting. actually consistent (your metric counts slightly differently than the existing info blockstats one). Yeah, i notice this, and don't think there's wrong with it. and you? It's not really user friendly if a number that is called the same means this in one place and in another place that. Kevin
[Qemu-devel] [RFC][PATCH 14/45] qemu-kvm: Drop useless kvm_clear_gsi_routes
There are no routes to clear at this point, we are just creating the VM. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- qemu-kvm-x86.c |1 - qemu-kvm.c | 10 -- qemu-kvm.h |9 - 3 files changed, 0 insertions(+), 20 deletions(-) diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index a7981b1..bab4307 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -167,7 +167,6 @@ int kvm_arch_init_irq_routing(void) int i, r; if (kvm_has_gsi_routing()) { -kvm_clear_gsi_routes(); for (i = 0; i 8; ++i) { if (i == 2) { continue; diff --git a/qemu-kvm.c b/qemu-kvm.c index f5b129a..70481de 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -252,16 +252,6 @@ int kvm_has_gsi_routing(void) return r; } -int kvm_clear_gsi_routes(void) -{ -#ifdef KVM_CAP_IRQ_ROUTING -kvm_state-irq_routes-nr = 0; -return 0; -#else -return -EINVAL; -#endif -} - int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry) { #ifdef KVM_CAP_IRQ_ROUTING diff --git a/qemu-kvm.h b/qemu-kvm.h index 2bd5602..8032388 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -174,15 +174,6 @@ int kvm_deassign_pci_device(KVMState *s, struct kvm_assigned_pci_dev *assigned_dev); /*! - * \brief Clears the temporary irq routing table - * - * Clears the temporary irq routing table. Nothing is committed to the - * running VM. - * - */ -int kvm_clear_gsi_routes(void); - -/*! * \brief Adds an irq route to the temporary irq routing table * * Adds an irq route to the temporary irq routing table. Nothing is -- 1.7.3.4
[Qemu-devel] [PATCH] qxl: create slots on post_load in any state (fix RHBZ 740547)
If we migrate when the device is not in a native state the guest still believes the slots are created, and will cause operations that reference the slots, causing a panic: virtual address out of range on the first of them. Easy to see by migrating in vga mode (with a driver loaded, for instance windows cmd window in full screen mode) and then exiting vga mode back to native mode will cause said panic. Fixed by doing the slot recreation unconditionally at post_load Signed-off-by: Alon Levy al...@redhat.com --- hw/qxl.c | 14 -- 1 files changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/qxl.c b/hw/qxl.c index 03848ed..4e9f39f 100644 --- a/hw/qxl.c +++ b/hw/qxl.c @@ -1684,6 +1684,14 @@ static int qxl_post_load(void *opaque, int version) qxl_mode_to_string(d-mode)); newmode = d-mode; d-mode = QXL_MODE_UNDEFINED; +for (i = 0; i NUM_MEMSLOTS; i++) { +if (!d-guest_slots[i].active) { +continue; +} +dprint(d, 1, %s: restoring guest slot %d delta %PRIu64\n, + __func__, i, d-guest_slots[i].delta); +qxl_add_memslot(d, i, d-guest_slots[i].delta, QXL_SYNC); +} switch (newmode) { case QXL_MODE_UNDEFINED: break; @@ -1691,12 +1699,6 @@ static int qxl_post_load(void *opaque, int version) qxl_enter_vga_mode(d); break; case QXL_MODE_NATIVE: -for (i = 0; i NUM_MEMSLOTS; i++) { -if (!d-guest_slots[i].active) { -continue; -} -qxl_add_memslot(d, i, 0, QXL_SYNC); -} qxl_create_guest_primary(d, 1, QXL_SYNC); /* replay surface-create and cursor-set commands */ -- 1.7.6.4
[Qemu-devel] [RFC][PATCH 31/45] qemu-kvm: Refactor kvm_deassign_irq to kvm_device_irq_deassign
Don't pass kvm_assigned_irq struct, rather use the actually required fields in the interface. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 42 -- qemu-kvm.c | 15 ++- qemu-kvm.h | 11 +-- 3 files changed, 35 insertions(+), 33 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 4e4349b..e0b9cfe 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -810,7 +810,8 @@ static int assign_device(AssignedDevice *dev) static int assign_intx(AssignedDevice *dev) { struct kvm_assigned_irq assigned_irq_data; -int irq, r = 0; +uint32_t dev_id; +int irq, r; /* Interrupt PIN 0 means don't use INTx */ if (assigned_dev_pci_read_byte(dev-dev, PCI_INTERRUPT_PIN) == 0) @@ -819,21 +820,24 @@ static int assign_intx(AssignedDevice *dev) irq = pci_map_irq(dev-dev, dev-intpin); irq = piix_get_irq(irq); -if (dev-girq == irq) -return r; +if (dev-girq == irq) { +return 0; +} + +dev_id = calc_assigned_dev_id(dev); -memset(assigned_irq_data, 0, sizeof(assigned_irq_data)); -assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(dev); -assigned_irq_data.guest_irq = irq; if (dev-irq_requested_type) { -assigned_irq_data.flags = dev-irq_requested_type; -r = kvm_deassign_irq(kvm_state, assigned_irq_data); +r = kvm_device_irq_deassign(kvm_state, dev_id, +dev-irq_requested_type); if (r) { perror(assign_intx: deassign); } dev-irq_requested_type = 0; } +memset(assigned_irq_data, 0, sizeof(assigned_irq_data)); +assigned_irq_data.assigned_dev_id = dev_id; +assigned_irq_data.guest_irq = irq; assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX; if (dev-features ASSIGNED_DEVICE_PREFER_MSI_MASK dev-cap.available ASSIGNED_DEVICE_CAP_MSI) @@ -913,20 +917,19 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev) AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); uint8_t ctrl_byte = pci_get_byte(pci_dev-config + pci_dev-msi_cap + PCI_MSI_FLAGS); +uint32_t dev_id; int r; -memset(assigned_irq_data, 0, sizeof assigned_irq_data); -assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(assigned_dev); +dev_id = calc_assigned_dev_id(assigned_dev); /* Some guests gratuitously disable MSI even if they're not using it, * try to catch this by only deassigning irqs if the guest is using * MSI or intends to start. */ if ((assigned_dev-irq_requested_type KVM_DEV_IRQ_GUEST_MSI) || (ctrl_byte PCI_MSI_FLAGS_ENABLE)) { - -assigned_irq_data.flags = assigned_dev-irq_requested_type; free_dev_irq_entries(assigned_dev); -r = kvm_deassign_irq(kvm_state, assigned_irq_data); +r = kvm_device_irq_deassign(kvm_state, dev_id, +assigned_dev-irq_requested_type); /* -ENXIO means no assigned irq */ if (r r != -ENXIO) perror(assigned_dev_update_msi: deassign irq); @@ -958,6 +961,8 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev) } assigned_dev-irq_entries_nr = 1; +memset(assigned_irq_data, 0, sizeof assigned_irq_data); +assigned_irq_data.assigned_dev_id = dev_id; assigned_irq_data.guest_irq = assigned_dev-entry-gsi; assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI; if (kvm_assign_irq(kvm_state, assigned_irq_data) 0) { @@ -1066,20 +1071,19 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev) AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); uint16_t ctrl_word = pci_get_word(pci_dev-config + pci_dev-msix_cap + PCI_MSIX_FLAGS); +uint32_t dev_id; int r; -memset(assigned_irq_data, 0, sizeof assigned_irq_data); -assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(assigned_dev); +dev_id = calc_assigned_dev_id(assigned_dev); /* Some guests gratuitously disable MSIX even if they're not using it, * try to catch this by only deassigning irqs if the guest is using * MSIX or intends to start. */ if ((assigned_dev-irq_requested_type KVM_DEV_IRQ_GUEST_MSIX) || (ctrl_word PCI_MSIX_FLAGS_ENABLE)) { - -assigned_irq_data.flags = assigned_dev-irq_requested_type; free_dev_irq_entries(assigned_dev); -r = kvm_deassign_irq(kvm_state, assigned_irq_data); +r = kvm_device_irq_deassign(kvm_state, dev_id, +assigned_dev-irq_requested_type); /* -ENXIO means no assigned irq */ if (r r != -ENXIO) perror(assigned_dev_update_msix: deassign irq); @@ -1088,6 +1092,8 @@
[Qemu-devel] [RFC][PATCH 13/45] hpet: Use msi_deliver
Avoid the slow-path MSI delivery via stl_phys by switching to msi_deliver. This also allows to prepare these rarely changing messages in advance. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/hpet.c |7 ++- 1 files changed, 6 insertions(+), 1 deletions(-) diff --git a/hw/hpet.c b/hw/hpet.c index d8e6b8e..c6d6e35 100644 --- a/hw/hpet.c +++ b/hw/hpet.c @@ -31,6 +31,7 @@ #include hpet_emul.h #include sysbus.h #include mc146818rtc.h +#include msi.h //#define HPET_DEBUG #ifdef HPET_DEBUG @@ -55,6 +56,8 @@ typedef struct HPETTimer { /* timers */ uint8_t wrap_flag; /* timer pop will indicate wrap for one-shot 32-bit * mode. Next pop will be actual timer expiration. */ +MSIMessage msi_msg; +MSIRoutingCache msi_cache; } HPETTimer; typedef struct HPETState { @@ -192,7 +195,7 @@ static void update_irq(struct HPETTimer *timer, int set) qemu_irq_lower(s-irqs[route]); } } else if (timer_fsb_route(timer)) { -stl_le_phys(timer-fsb 32, timer-fsb 0x); +msi_deliver(timer-msi_msg, timer-msi_cache); } else if (timer-config HPET_TN_TYPE_LEVEL) { s-isr |= mask; qemu_irq_raise(s-irqs[route]); @@ -533,9 +536,11 @@ static void hpet_ram_writel(void *opaque, target_phys_addr_t addr, break; case HPET_TN_ROUTE: timer-fsb = (timer-fsb 0xULL) | new_val; +timer-msi_msg.data = new_val; break; case HPET_TN_ROUTE + 4: timer-fsb = (new_val 32) | (timer-fsb 0x); +timer-msi_msg.address = new_val; break; default: DPRINTF(qemu: invalid hpet_ram_writel\n); -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 27/45] qemu-kvm: Lazily update MSI caches
Instead of registering every possible MSI message that is prepared in some device's config space, this commit only registers those messages that are actually sent. Every message that runs through the delivery hook is first checked against its cached data. If there is a mismatch, then the registration is created or updated, if it matches, delivery is performed directly. To avoid exhausting limited KVM IRQ routes, devices are expected to flush their MSI caches whenever the content is no longer used or valid. If we run out of routes nevertheless, we flush all caches that were created dynamically, ie. via the MSI delivery hook. However, we keep all those cached routes intact that are static because they are associated with external sources (irqfds). Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/apic.c |4 +-- hw/msi.c | 93 ++-- hw/msi.h |2 +- hw/msix.c | 91 -- hw/pci.c |1 - hw/pci.h |3 -- kvm-stub.c | 13 + kvm.h |6 ++-- qemu-kvm.c | 69 ++- 9 files changed, 75 insertions(+), 207 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index cb6662c..2cafc49 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -807,9 +807,7 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache) { if (kvm_enabled() kvm_irqchip_in_kernel()) { -if (kvm_set_irq(cache-kvm_gsi, 1, NULL) 0) { -abort(); -} +kvm_msi_deliver(msg, cache); } else { uint8_t dest = (msg-address MSI_ADDR_DEST_ID_MASK) MSI_ADDR_DEST_ID_SHIFT; diff --git a/hw/msi.c b/hw/msi.c index 1328903..23d79dd 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -140,71 +140,18 @@ static void msi_message_from_vector(PCIDevice *dev, uint16_t msi_flags, } } -static void kvm_msi_update(PCIDevice *dev) -{ -uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev)); -unsigned int max_vectors = 1 -((flags PCI_MSI_FLAGS_QMASK) (ffs(PCI_MSI_FLAGS_QMASK) - 1)); -unsigned int nr_vectors = msi_nr_vectors(flags); -MSIRoutingCache *cache; -bool changed = false; -unsigned int vector; -MSIMessage msg; -int r; - -for (vector = 0; vector max_vectors; vector++) { -cache = dev-msi_cache[vector]; - -if (vector = nr_vectors) { -if (vector dev-msi_entries_nr) { -kvm_msi_message_del(cache); -changed = true; -} -} else if (vector = dev-msi_entries_nr) { -msi_message_from_vector(dev, flags, vector, msg); -r = kvm_msi_message_add(msg, cache); -if (r) { -fprintf(stderr, %s: kvm_msi_add failed: %s\n, __func__, -strerror(-r)); -exit(1); -} -changed = true; -} else { -msi_message_from_vector(dev, flags, vector, msg); -r = kvm_msi_message_update(msg, cache); -if (r 0) { -fprintf(stderr, %s: kvm_update_msi failed: %s\n, -__func__, strerror(-r)); -exit(1); -} -if (r 0) { -changed = true; -} -} -} -dev-msi_entries_nr = nr_vectors; -if (changed) { -r = kvm_commit_irq_routes(); -if (r) { -fprintf(stderr, %s: kvm_commit_irq_routes failed: %s\n, __func__, -strerror(-r)); -exit(1); -} -} -} - -/* KVM specific MSI helpers */ static void kvm_msi_free(PCIDevice *dev) { -unsigned int vector; +unsigned int vector, nr_vectors; -for (vector = 0; vector dev-msi_entries_nr; ++vector) { -kvm_msi_message_del(dev-msi_cache[vector]); +if (!kvm_enabled() || !kvm_irqchip_in_kernel()) { +return; } -if (dev-msi_entries_nr 0) { -kvm_commit_irq_routes(); +nr_vectors = +msi_nr_vectors(pci_get_word(dev-config + msi_flags_off(dev))); +for (vector = 0; vector nr_vectors; ++vector) { +kvm_msi_cache_invalidate(dev-msi_cache[vector]); } -dev-msi_entries_nr = 0; } int msi_init(struct PCIDevice *dev, uint8_t offset, @@ -283,10 +230,7 @@ void msi_uninit(struct PCIDevice *dev) flags = pci_get_word(dev-config + msi_flags_off(dev)); cap_size = msi_cap_sizeof(flags); -if (kvm_enabled() kvm_irqchip_in_kernel()) { -kvm_msi_free(dev); -} - +kvm_msi_free(dev); g_free(dev-msi_cache); pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); @@ -303,9 +247,6 @@ void msi_reset(PCIDevice *dev) if (!msi_present(dev)) { return; } -if (kvm_enabled() kvm_irqchip_in_kernel()) { -kvm_msi_free(dev); -} flags = pci_get_word(dev-config +
[Qemu-devel] [RFC][PATCH 09/45] msi: Factor out msi_message_from_vector
This helper will also be used by the upcoming config notifier. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msi.c | 43 +-- 1 files changed, 25 insertions(+), 18 deletions(-) diff --git a/hw/msi.c b/hw/msi.c index 2b7b6e3..3c7ebc3 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -113,6 +113,25 @@ bool msi_enabled(const PCIDevice *dev) PCI_MSI_FLAGS_ENABLE); } +static void msi_message_from_vector(PCIDevice *dev, uint16_t msi_flags, +unsigned vector, MSIMessage *msg) +{ +bool msi64bit = msi_flags PCI_MSI_FLAGS_64BIT; +unsigned int nr_vectors = msi_nr_vectors(msi_flags); + +msg-address = pci_get_long(dev-config + msi_address_lo_off(dev)); +if (msi64bit) { +msg-address |= (uint64_t)pci_get_long(dev-config + + msi_address_hi_off(dev)) 32; +} + +msg-data = pci_get_word(dev-config + msi_data_off(dev, msi64bit)); +if (nr_vectors 1) { +msg-data = ~(nr_vectors - 1); +msg-data |= vector; +} +} + static void kvm_msi_message_from_vector(PCIDevice *dev, unsigned vector, KVMMsiMessage *kmm) { @@ -339,11 +358,10 @@ void msi_notify(PCIDevice *dev, unsigned int vector) { uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev)); bool msi64bit = flags PCI_MSI_FLAGS_64BIT; -unsigned int nr_vectors = msi_nr_vectors(flags); -uint64_t address; -uint32_t data; +MSIMessage msg; + +assert(vector msi_nr_vectors(flags)); -assert(vector nr_vectors); if (msi_is_masked(dev, vector)) { assert(flags PCI_MSI_FLAGS_MASKBIT); pci_long_test_and_set_mask( @@ -357,24 +375,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector) return; } -if (msi64bit) { -address = pci_get_quad(dev-config + msi_address_lo_off(dev)); -} else { -address = pci_get_long(dev-config + msi_address_lo_off(dev)); -} - -/* upper bit 31:16 is zero */ -data = pci_get_word(dev-config + msi_data_off(dev, msi64bit)); -if (nr_vectors 1) { -data = ~(nr_vectors - 1); -data |= vector; -} +msi_message_from_vector(dev, flags, vector, msg); MSI_DEV_PRINTF(dev, notify vector 0x%x address: 0x%PRIx64 data: 0x%PRIx32\n, - vector, address, data); -stl_le_phys(address, data); + vector, msg.address, msg.data); +stl_le_phys(msg.address, msg.data); } /* Normally called by pci_default_write_config(). */ -- 1.7.3.4
[Qemu-devel] [PATCH v2 0/3] coroutinization of flush and discard (split out of NBD series)
This series, applying on top of block branch, enables drivers to use coroutines for flush and discard. I kept aio_discard after discussing with Kevin since it should be useful not only for raw-posix-aio, but also for the userspace iSCSI backend (and in general for backends relying on an external library that is designed around aio). BTW, with this patch we get for free the invariant that bdrv_aio_* never returns a NULL acb (Stefan's patches already got to that point for read/write, of course). v1-v2: add bdrv_co_flush and bdrv_co_discard entry points Paolo Bonzini (2): block: unify flush implementations block: add bdrv_co_discard and bdrv_aio_discard support Stefan Hajnoczi (1): block: drop redundant bdrv_flush implementation block.c | 258 + block.h |5 + block/blkdebug.c |6 -- block/blkverify.c |9 -- block/qcow.c |6 -- block/qcow2.c | 19 block/qed.c |6 -- block/raw-posix.c | 18 block/raw.c | 23 ++--- block_int.h | 10 ++- trace-events |1 + 11 files changed, 184 insertions(+), 177 deletions(-) -- 1.7.6
[Qemu-devel] [PATCH 2/3] block: drop redundant bdrv_flush implementation
From: Stefan Hajnoczi stefa...@linux.vnet.ibm.com Block drivers now only need to provide either of .bdrv_co_flush, .bdrv_aio_flush() or for legacy drivers .bdrv_flush(). Remove the redundant .bdrv_flush() implementations. [Paolo Bonzini: change raw driver to bdrv_co_flush] Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- block/blkdebug.c |6 -- block/blkverify.c |9 - block/qcow.c |6 -- block/qcow2.c | 19 --- block/qed.c |6 -- block/raw-posix.c | 18 -- block/raw.c | 13 +++-- 7 files changed, 3 insertions(+), 74 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index b3c5d42..9b88535 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -397,11 +397,6 @@ static void blkdebug_close(BlockDriverState *bs) } } -static int blkdebug_flush(BlockDriverState *bs) -{ -return bdrv_flush(bs-file); -} - static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { @@ -454,7 +449,6 @@ static BlockDriver bdrv_blkdebug = { .bdrv_file_open = blkdebug_open, .bdrv_close = blkdebug_close, -.bdrv_flush = blkdebug_flush, .bdrv_aio_readv = blkdebug_aio_readv, .bdrv_aio_writev= blkdebug_aio_writev, diff --git a/block/blkverify.c b/block/blkverify.c index c7522b4..483f3b3 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -116,14 +116,6 @@ static void blkverify_close(BlockDriverState *bs) s-test_file = NULL; } -static int blkverify_flush(BlockDriverState *bs) -{ -BDRVBlkverifyState *s = bs-opaque; - -/* Only flush test file, the raw file is not important */ -return bdrv_flush(s-test_file); -} - static int64_t blkverify_getlength(BlockDriverState *bs) { BDRVBlkverifyState *s = bs-opaque; @@ -368,7 +360,6 @@ static BlockDriver bdrv_blkverify = { .bdrv_file_open = blkverify_open, .bdrv_close = blkverify_close, -.bdrv_flush = blkverify_flush, .bdrv_aio_readv = blkverify_aio_readv, .bdrv_aio_writev= blkverify_aio_writev, diff --git a/block/qcow.c b/block/qcow.c index c8bfecc..9b71116 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -781,11 +781,6 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, return 0; } -static int qcow_flush(BlockDriverState *bs) -{ -return bdrv_flush(bs-file); -} - static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { @@ -826,7 +821,6 @@ static BlockDriver bdrv_qcow = { .bdrv_open = qcow_open, .bdrv_close= qcow_close, .bdrv_create = qcow_create, -.bdrv_flush= qcow_flush, .bdrv_is_allocated = qcow_is_allocated, .bdrv_set_key = qcow_set_key, .bdrv_make_empty = qcow_make_empty, diff --git a/block/qcow2.c b/block/qcow2.c index 510ff68..4dc980c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1092,24 +1092,6 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, return 0; } -static int qcow2_flush(BlockDriverState *bs) -{ -BDRVQcowState *s = bs-opaque; -int ret; - -ret = qcow2_cache_flush(bs, s-l2_table_cache); -if (ret 0) { -return ret; -} - -ret = qcow2_cache_flush(bs, s-refcount_block_cache); -if (ret 0) { -return ret; -} - -return bdrv_flush(bs-file); -} - static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) @@ -1242,7 +1224,6 @@ static BlockDriver bdrv_qcow2 = { .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, .bdrv_create= qcow2_create, -.bdrv_flush = qcow2_flush, .bdrv_is_allocated = qcow2_is_allocated, .bdrv_set_key = qcow2_set_key, .bdrv_make_empty= qcow2_make_empty, diff --git a/block/qed.c b/block/qed.c index e87dc4d..2e06992 100644 --- a/block/qed.c +++ b/block/qed.c @@ -533,11 +533,6 @@ static void bdrv_qed_close(BlockDriverState *bs) qemu_vfree(s-l1_table); } -static int bdrv_qed_flush(BlockDriverState *bs) -{ -return bdrv_flush(bs-file); -} - static int qed_create(const char *filename, uint32_t cluster_size, uint64_t image_size, uint32_t table_size, const char *backing_file, const char *backing_fmt) @@ -1479,7 +1474,6 @@ static BlockDriver bdrv_qed = { .bdrv_open= bdrv_qed_open, .bdrv_close = bdrv_qed_close, .bdrv_create = bdrv_qed_create, -.bdrv_flush = bdrv_qed_flush, .bdrv_is_allocated= bdrv_qed_is_allocated, .bdrv_make_empty = bdrv_qed_make_empty,
[Qemu-devel] [PATCH 1/3] block: unify flush implementations
Add coroutine support for flush and apply the same emulation that we already do for read/write. bdrv_aio_flush is simplified to always go through a coroutine. Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- block.c | 164 ++ block.h |1 + block_int.h |1 + 3 files changed, 76 insertions(+), 90 deletions(-) diff --git a/block.c b/block.c index 7184a0f..7b8b14d 100644 --- a/block.c +++ b/block.c @@ -53,17 +53,12 @@ static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); -static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, -BlockDriverCompletionFunc *cb, void *opaque); -static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, -BlockDriverCompletionFunc *cb, void *opaque); static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov); static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov); -static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs); static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, @@ -203,9 +198,6 @@ void bdrv_register(BlockDriver *bdrv) } } -if (!bdrv-bdrv_aio_flush) -bdrv-bdrv_aio_flush = bdrv_aio_flush_em; - QLIST_INSERT_HEAD(bdrv_drivers, bdrv, list); } @@ -1027,11 +1019,6 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, nb_sectors * BDRV_SECTOR_SIZE); } -static inline bool bdrv_has_async_flush(BlockDriver *drv) -{ -return drv-bdrv_aio_flush != bdrv_aio_flush_em; -} - typedef struct RwCo { BlockDriverState *bs; int64_t sector_num; @@ -1759,33 +1746,6 @@ const char *bdrv_get_device_name(BlockDriverState *bs) return bs-device_name; } -int bdrv_flush(BlockDriverState *bs) -{ -if (bs-open_flags BDRV_O_NO_FLUSH) { -return 0; -} - -if (bs-drv bdrv_has_async_flush(bs-drv) qemu_in_coroutine()) { -return bdrv_co_flush_em(bs); -} - -if (bs-drv bs-drv-bdrv_flush) { -return bs-drv-bdrv_flush(bs); -} - -/* - * Some block drivers always operate in either writethrough or unsafe mode - * and don't support bdrv_flush therefore. Usually qemu doesn't know how - * the server works (because the behaviour is hardcoded or depends on - * server-side configuration), so we can't ensure that everything is safe - * on disk. Returning an error doesn't work because that would break guests - * even if the server operates in writethrough mode. - * - * Let's hope the user knows what he's doing. - */ -return 0; -} - void bdrv_flush_all(void) { BlockDriverState *bs; @@ -2610,22 +2570,6 @@ fail: return -1; } -BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, -BlockDriverCompletionFunc *cb, void *opaque) -{ -BlockDriver *drv = bs-drv; - -trace_bdrv_aio_flush(bs, opaque); - -if (bs-open_flags BDRV_O_NO_FLUSH) { -return bdrv_aio_noop_em(bs, cb, opaque); -} - -if (!drv) -return NULL; -return drv-bdrv_aio_flush(bs, cb, opaque); -} - void bdrv_aio_cancel(BlockDriverAIOCB *acb) { acb-pool-cancel(acb); @@ -2785,41 +2729,28 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, return acb-common; } -static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, -BlockDriverCompletionFunc *cb, void *opaque) +static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) { -BlockDriverAIOCBSync *acb; - -acb = qemu_aio_get(bdrv_em_aio_pool, bs, cb, opaque); -acb-is_write = 1; /* don't bounce in the completion hadler */ -acb-qiov = NULL; -acb-bounce = NULL; -acb-ret = 0; - -if (!acb-bh) -acb-bh = qemu_bh_new(bdrv_aio_bh_cb, acb); +BlockDriverAIOCBCoroutine *acb = opaque; +BlockDriverState *bs = acb-common.bs; -bdrv_flush(bs); +acb-req.error = bdrv_co_flush(bs); +acb-bh = qemu_bh_new(bdrv_co_em_bh, acb); qemu_bh_schedule(acb-bh); -return acb-common; } -static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { -BlockDriverAIOCBSync *acb; +trace_bdrv_aio_flush(bs, opaque); -acb = qemu_aio_get(bdrv_em_aio_pool, bs, cb, opaque); -acb-is_write = 1; /* don't bounce in the
[Qemu-devel] [PATCH 3/3] block: add bdrv_co_discard and bdrv_aio_discard support
This similarly adds support for coroutine and asynchronous discard. Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- block.c | 102 +++-- block.h |4 ++ block/raw.c | 10 +++-- block_int.h |9 - trace-events |1 + 5 files changed, 109 insertions(+), 17 deletions(-) diff --git a/block.c b/block.c index 7b8b14d..28508f2 100644 --- a/block.c +++ b/block.c @@ -1768,17 +1768,6 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 1; } -int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) -{ -if (!bs-drv) { -return -ENOMEDIUM; -} -if (!bs-drv-bdrv_discard) { -return 0; -} -return bs-drv-bdrv_discard(bs, sector_num, nb_sectors); -} - /* * Returns true iff the specified sector is present in the disk image. Drivers * not implementing the functionality are assumed to not support backing files, @@ -2754,6 +2743,34 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, return acb-common; } +static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) +{ +BlockDriverAIOCBCoroutine *acb = opaque; +BlockDriverState *bs = acb-common.bs; + +acb-req.error = bdrv_co_discard(bs, acb-req.sector, acb-req.nb_sectors); +acb-bh = qemu_bh_new(bdrv_co_em_bh, acb); +qemu_bh_schedule(acb-bh); +} + +BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, +int64_t sector_num, int nb_sectors, +BlockDriverCompletionFunc *cb, void *opaque) +{ +Coroutine *co; +BlockDriverAIOCBCoroutine *acb; + +trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); + +acb = qemu_aio_get(bdrv_em_co_aio_pool, bs, cb, opaque); +acb-req.sector = sector_num; +acb-req.nb_sectors = nb_sectors; +co = qemu_coroutine_create(bdrv_aio_discard_co_entry); +qemu_coroutine_enter(co, acb); + +return acb-common; +} + void bdrv_init(void) { module_call_init(MODULE_INIT_BLOCK); @@ -2915,6 +2932,69 @@ int bdrv_flush(BlockDriverState *bs) return rwco.ret; } +static void coroutine_fn bdrv_discard_co_entry(void *opaque) +{ +RwCo *rwco = opaque; + +rwco-ret = bdrv_co_discard(rwco-bs, rwco-sector_num, rwco-nb_sectors); +} + +int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, + int nb_sectors) +{ +if (!bs-drv) { +return -ENOMEDIUM; +} else if (bdrv_check_request(bs, sector_num, nb_sectors)) { +return -EIO; +} else if (bs-read_only) { +return -EROFS; +} else if (bs-drv-bdrv_co_discard) { +return bs-drv-bdrv_co_discard(bs, sector_num, nb_sectors); +} else if (bs-drv-bdrv_aio_discard) { +BlockDriverAIOCB *acb; +CoroutineIOCompletion co = { +.coroutine = qemu_coroutine_self(), +}; + +acb = bs-drv-bdrv_aio_discard(bs, sector_num, nb_sectors, +bdrv_co_io_em_complete, co); +if (acb == NULL) { +return -EIO; +} else { +qemu_coroutine_yield(); +return co.ret; +} +} else if (bs-drv-bdrv_discard) { +return bs-drv-bdrv_discard(bs, sector_num, nb_sectors); +} else { +return 0; +} +} + +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +{ +Coroutine *co; +RwCo rwco = { +.bs = bs, +.sector_num = sector_num, +.nb_sectors = nb_sectors, +.ret = NOT_DONE, +}; + +if (qemu_in_coroutine()) { +/* Fast-path if already in coroutine context */ +bdrv_discard_co_entry(rwco); +} else { +co = qemu_coroutine_create(bdrv_discard_co_entry); +qemu_coroutine_enter(co, rwco); +while (rwco.ret == NOT_DONE) { +qemu_aio_wait(); +} +} + +return rwco.ret; +} + /**/ /* removable device support */ diff --git a/block.h b/block.h index 65c5166..5a042c9 100644 --- a/block.h +++ b/block.h @@ -166,6 +166,9 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, BlockDriverCompletionFunc *cb, void *opaque); BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque); +BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockDriverAIOCB *acb); typedef struct BlockRequest { @@ -196,6 +199,7 @@ void bdrv_flush_all(void); void bdrv_close_all(void); int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); +int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_has_zero_init(BlockDriverState *bs); int
Re: [Qemu-devel] [PATCH] Memory API bugfix - abolish addrrrange_end()
On 10/17/2011 07:31 AM, David Gibson wrote: In terms of how the code looks, it's seriously more ugly (see the patches I sent out). Conceptually it's cleaner, since we're not dodging the issue that we need to deal with a full 64-bit domain. We don't have to dodge that issue. I know how to remove the requirement for intermediate negative values, I just haven't made up a patch yet. With that we can change to uint64 and cover the full 64 bit range. In fact I think I can make it so that size==0 represents size=2^64 and even handle the full 64-bit, inclusive range properly. That means you can't do a real size == 0. But my main concern is maintainability. The 64-bit blanket is to short, if we keep pulling it in various directions we'll just expose ourselves in new ways. Nonsense, dealing with full X-bit range calculations in X-bit types is a fairly standard problem. The kernel does it in VMA handling for one. It just requires thinking about overflow cases. We discovered three bugs already (you found two, and I had one during development). Even if it can probably be done with extreme care, but is it worth spending all that development time on? I'm not sure there is a parallel with vmas, since we're offsetting in both the positive and negative directions. -- error compiling committee.c: too many arguments to function
[Qemu-devel] [RFC][PATCH 12/45] msi: Introduce MSIRoutingCache
This cache will help us implementing KVM in-kernel irqchip support without spreading hooks all over the place. KVM requires us to register it first and then deliver it by raising a pseudo IRQ line returned on registration. While this could be changed for QEMU-originated MSI messages by adding direct MSI injection, we will still need this translation for irqfd-originated messages. The MSIRoutingCache will allow to track those registrations and update them lazily before the actual delivery. This avoid having to track MSI vectors at device level (like qemu-kvm currently does). Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/apic.c |5 +++-- hw/apic.h |2 +- hw/msi.c | 10 +++--- hw/msi.h | 14 +- hw/msix.c |7 ++- hw/pc.c |4 ++-- hw/pci.h |4 qemu-common.h |1 + 8 files changed, 37 insertions(+), 10 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index c1d557d..6811ae1 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -804,7 +804,7 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) return val; } -void apic_deliver_msi(MSIMessage *msg) +void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache) { uint8_t dest = (msg-address MSI_ADDR_DEST_ID_MASK) MSI_ADDR_DEST_ID_SHIFT; @@ -829,8 +829,9 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) * Mapping them on the global bus happens to work because * MSI registers are reserved in APIC MMIO and vice versa. */ MSIMessage msg = { .address = addr, .data = val }; +static MSIRoutingCache cache; -msi_deliver(msg); +msi_deliver(msg, cache); return; } diff --git a/hw/apic.h b/hw/apic.h index fa848fd..353ea3a 100644 --- a/hw/apic.h +++ b/hw/apic.h @@ -18,7 +18,7 @@ void cpu_set_apic_tpr(DeviceState *s, uint8_t val); uint8_t cpu_get_apic_tpr(DeviceState *s); void apic_init_reset(DeviceState *s); void apic_sipi(DeviceState *s); -void apic_deliver_msi(MSIMessage *msg); +void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache); /* pc.c */ int cpu_is_bsp(CPUState *env); diff --git a/hw/msi.c b/hw/msi.c index 9055155..c8ccb17 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -40,13 +40,13 @@ /* Flag for interrupt controller to declare MSI/MSI-X support */ bool msi_supported; -static void msi_unsupported(MSIMessage *msg) +static void msi_unsupported(MSIMessage *msg, MSIRoutingCache *cache) { /* If we get here, the board failed to register a delivery handler. */ abort(); } -void (*msi_deliver)(MSIMessage *msg) = msi_unsupported; +void (*msi_deliver)(MSIMessage *msg, MSIRoutingCache *cache) = msi_unsupported; /* If we get rid of cap allocator, we won't need this. */ static inline uint8_t msi_cap_sizeof(uint16_t flags) @@ -288,6 +288,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, 0x (PCI_MSI_VECTORS_MAX - nr_vectors)); } +dev-msi_cache = g_malloc0(nr_vectors * sizeof(*dev-msi_cache)); + if (kvm_enabled() kvm_irqchip_in_kernel()) { dev-msi_irq_entries = g_malloc(nr_vectors * sizeof(*dev-msix_irq_entries)); @@ -312,6 +314,8 @@ void msi_uninit(struct PCIDevice *dev) g_free(dev-msi_irq_entries); } +g_free(dev-msi_cache); + pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); dev-cap_present = ~QEMU_PCI_CAP_MSI; @@ -389,7 +393,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector) notify vector 0x%x address: 0x%PRIx64 data: 0x%PRIx32\n, vector, msg.address, msg.data); -msi_deliver(msg); +msi_deliver(msg, dev-msi_cache[vector]); } /* Normally called by pci_default_write_config(). */ diff --git a/hw/msi.h b/hw/msi.h index f3152f3..20ae215 100644 --- a/hw/msi.h +++ b/hw/msi.h @@ -29,6 +29,18 @@ struct MSIMessage { uint32_t data; }; +typedef enum { +MSI_ROUTE_NONE = 0, +MSI_ROUTE_STATIC, +} MSIRouteType; + +struct MSIRoutingCache { +MSIMessage msg; +MSIRouteType type; +int kvm_gsi; +int kvm_irqfd; +}; + extern bool msi_supported; bool msi_enabled(const PCIDevice *dev); @@ -46,6 +58,6 @@ static inline bool msi_present(const PCIDevice *dev) return dev-cap_present QEMU_PCI_CAP_MSI; } -extern void (*msi_deliver)(MSIMessage *msg); +extern void (*msi_deliver)(MSIMessage *msg, MSIRoutingCache *cache); #endif /* QEMU_MSI_H */ diff --git a/hw/msix.c b/hw/msix.c index 08cc526..e824aef 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -358,6 +358,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, if (ret) goto err_config; +dev-msix_cache = g_malloc0(nentries * sizeof *dev-msix_cache); + if (kvm_enabled() kvm_irqchip_in_kernel()) { dev-msix_irq_entries = g_malloc(nentries * sizeof
Re: [Qemu-devel] [PATCH] ioapic: Convert to memory API
On 10/16/2011 07:21 PM, Jan Kiszka wrote: From: Jan Kiszka jan.kis...@siemens.com Dispatching byte and word accesses like dwords looks strange, but let's just convert mechanically. -static CPUReadMemoryFunc * const ioapic_mem_read[3] = { -ioapic_mem_readl, -ioapic_mem_readl, -ioapic_mem_readl, -}; - -static CPUWriteMemoryFunc * const ioapic_mem_write[3] = { -ioapic_mem_writel, -ioapic_mem_writel, -ioapic_mem_writel, +static const MemoryRegionOps ioapic_io_ops = { +.old_mmio = { +.read = { ioapic_mem_readl, ioapic_mem_readl, ioapic_mem_readl, }, +.write = { ioapic_mem_writel, ioapic_mem_writel, ioapic_mem_writel, }, +}, +.endianness = DEVICE_NATIVE_ENDIAN, }; Why use old_mmio? Use the ordinary .read and .write, and ignore the size parameter. -- error compiling committee.c: too many arguments to function
[Qemu-devel] [RFC][PATCH 39/45] pci-assign: Use generic MSI support
Implement MSI support of a assigned devices via the generic MSI layer of QEMU. Use config notifiers to update the vector route or switch back to INTx when MSI gets disabled again. Using the generic layer not only saves a bit code, it also fixes reset while legacy MSI is in use and adds 64 bit support. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 77 +++ 1 files changed, 31 insertions(+), 46 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 2484afd..10b30a3 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -699,10 +699,6 @@ static void free_assigned_device(AssignedDevice *dev) close(dev-real_device.config_fd); } -if (dev-dev.msi_cache) { -kvm_msi_cache_invalidate(dev-dev.msi_cache[0]); -g_free(dev-dev.msi_cache); -} invalidate_msix_vectors(dev); g_free(dev-dev.msix_cache); } @@ -847,7 +843,7 @@ static int assign_intx(AssignedDevice *dev) irq_type = KVM_DEV_IRQ_GUEST_INTX; if (dev-features ASSIGNED_DEVICE_PREFER_MSI_MASK -dev-cap.available ASSIGNED_DEVICE_CAP_MSI) { +msi_present(dev-dev)) { irq_type |= KVM_DEV_IRQ_HOST_MSI; } else { irq_type |= KVM_DEV_IRQ_HOST_INTX; @@ -920,31 +916,33 @@ void assigned_dev_update_irqs(void) } } -static void assigned_dev_update_msi(PCIDevice *pci_dev) +static void assigned_dev_update_msi(PCIDevice *pci_dev, bool enabled) { AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); -uint8_t ctrl_byte = pci_get_byte(pci_dev-config + pci_dev-msi_cap + - PCI_MSI_FLAGS); - -if (ctrl_byte PCI_MSI_FLAGS_ENABLE) { -uint8_t *pos = pci_dev-config + pci_dev-msi_cap; -MSIMessage msg; -deassign_irq(dev); +if (!enabled) { +assign_intx(dev); +} +} -msg.address = pci_get_long(pos + PCI_MSI_ADDRESS_LO); -msg.data = pci_get_word(pos + PCI_MSI_DATA_32); +static int assigned_dev_update_msi_vector(PCIDevice *pci_dev, + unsigned int vector, + MSIMessage *msg, bool masked) +{ +AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); +int ret; -if (kvm_device_msi_assign(kvm_state, calc_assigned_dev_id(dev), msg, - dev-dev.msi_cache[0]) 0) { -perror(assigned_dev_update_msi: assign msi); -return; +if (!masked) { +deassign_irq(dev); +ret = kvm_device_msi_assign(kvm_state, calc_assigned_dev_id(dev), msg, +dev-dev.msi_cache[0]); +if (ret 0) { +perror(assigned_dev_update_msi_vector: assign msi); +return ret; } dev-irq_requested_type = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI; -} else { -kvm_msi_cache_invalidate(dev-dev.msi_cache[0]); -assign_intx(dev); } +return 0; } static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev) @@ -1085,12 +1083,6 @@ static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, pci_default_write_config(pci_dev, address, val, len); -if (assigned_dev-cap.available ASSIGNED_DEVICE_CAP_MSI) { -if (range_covers_byte(address, len, - pci_dev-msi_cap + PCI_MSI_FLAGS)) { -assigned_dev_update_msi(pci_dev); -} -} if (assigned_dev-cap.available ASSIGNED_DEVICE_CAP_MSIX) { if (range_covers_byte(address, len, pci_dev-msix_cap + PCI_MSIX_FLAGS + 1)) { @@ -1136,26 +1128,19 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) * MSI capability is the 1st capability in capability config */ pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0); if (pos != 0 kvm_check_extension(kvm_state, KVM_CAP_ASSIGN_DEV_IRQ)) { -dev-cap.available |= ASSIGNED_DEVICE_CAP_MSI; -/* Only 32-bit/no-mask currently supported */ -if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10)) 0) { +uint16_t flags = pci_get_word(pci_dev-config + pos + PCI_MSI_FLAGS); + +/* Note: KVM does not support multiple messages */ +ret = msi_init(pci_dev, pos, 1, flags PCI_MSI_FLAGS_64BIT, + flags PCI_MSI_FLAGS_MASKBIT); +if (ret 0) { +return ret; +} +ret = msi_set_config_notifiers(pci_dev, assigned_dev_update_msi, + assigned_dev_update_msi_vector); +if (ret 0) { return ret; } -pci_dev-msi_cap = pos; - -pci_set_word(pci_dev-config + pos + PCI_MSI_FLAGS, - pci_get_word(pci_dev-config + pos + PCI_MSI_FLAGS) - PCI_MSI_FLAGS_QMASK); -pci_set_long(pci_dev-config + pos +
[Qemu-devel] [RFC][PATCH 23/45] qemu-kvm: Rework MSI-X mask notifier to generic MSI config notifiers
MSI config notifiers are supposed to be triggered on every relevant configuration change of MSI vectors or if MSI is enabled/disabled. Two notifiers are established, one for vector changes and one for general enabling. The former notifier additionally passes the currently active MSI message. This will allow to update potential in-kernel IRQ routes on changes. The latter notifier is optional and will only be used by a subset of clients. These notifiers are currently only available for MSI-X but will be extended to legacy MSI as well. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msix.c | 119 +- hw/msix.h |6 ++- hw/pci.h|8 ++- hw/virtio-pci.c | 24 ++-- 4 files changed, 102 insertions(+), 55 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 247b255..176bc76 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -219,16 +219,24 @@ static bool msix_is_masked(PCIDevice *dev, int vector) dev-msix_table_page[offset] PCI_MSIX_ENTRY_CTRL_MASKBIT; } -static void msix_handle_mask_update(PCIDevice *dev, int vector) +static void msix_fire_vector_config_notifier(PCIDevice *dev, + unsigned int vector, bool masked) { -bool masked = msix_is_masked(dev, vector); +MSIMessage msg; int ret; -if (dev-msix_mask_notifier) { -ret = dev-msix_mask_notifier(dev, vector, - msix_is_masked(dev, vector)); +if (dev-msix_vector_config_notifier) { +msix_message_from_vector(dev, vector, msg); +ret = dev-msix_vector_config_notifier(dev, vector, msg, masked); assert(ret = 0); } +} + +static void msix_handle_mask_update(PCIDevice *dev, int vector) +{ +bool masked = msix_is_masked(dev, vector); + +msix_fire_vector_config_notifier(dev, vector, masked); if (!masked msix_is_pending(dev, vector)) { msix_clr_pending(dev, vector); msix_notify(dev, vector); @@ -240,20 +248,27 @@ void msix_write_config(PCIDevice *dev, uint32_t addr, uint32_t old_val, int len) { unsigned enable_pos = dev-msix_cap + MSIX_CONTROL_OFFSET; -bool was_masked; +bool was_masked, was_enabled, is_enabled; int vector; if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) { return; } -if (!msix_enabled(dev)) { +old_val = (enable_pos - addr) * 8; + +was_enabled = old_val MSIX_ENABLE_MASK; +is_enabled = msix_enabled(dev); +if (was_enabled != is_enabled dev-msix_enable_notifier) { +dev-msix_enable_notifier(dev, is_enabled); +} + +if (!is_enabled) { return; } pci_device_deassert_intx(dev); -old_val = (enable_pos - addr) * 8; was_masked = (old_val (MSIX_MASKALL_MASK | MSIX_ENABLE_MASK)) != MSIX_ENABLE_MASK; if (was_masked != msix_function_masked(dev)) { @@ -270,15 +285,20 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x3; unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE; bool was_masked = msix_is_masked(dev, vector); +bool is_masked; pci_set_long(dev-msix_table_page + offset, val); if (kvm_enabled() kvm_irqchip_in_kernel()) { kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector)); } -if (vector dev-msix_entries_nr -was_masked != msix_is_masked(dev, vector)) { -msix_handle_mask_update(dev, vector); +if (vector dev-msix_entries_nr) { +is_masked = msix_is_masked(dev, vector); +if (was_masked != is_masked) { +msix_handle_mask_update(dev, vector); +} else { +msix_fire_vector_config_notifier(dev, vector, is_masked); +} } } @@ -305,17 +325,17 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar) static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) { -int vector, r; +int vector; + for (vector = 0; vector nentries; ++vector) { unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; bool was_masked = msix_is_masked(dev, vector); + dev-msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; -if (was_masked != msix_is_masked(dev, vector) -dev-msix_mask_notifier) { -r = dev-msix_mask_notifier(dev, vector, -msix_is_masked(dev, vector)); -assert(r = 0); + +if (!was_masked) { +msix_handle_mask_update(dev, vector); } } } @@ -337,7 +357,6 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, if (nentries MSIX_MAX_ENTRIES) return -EINVAL; -dev-msix_mask_notifier = NULL; dev-msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES * sizeof
[Qemu-devel] [RFC][PATCH 19/45] qemu-kvm: Factor out kvm_msi_irqfd_set
This makes the KVM core layer aware of the irqfd associated with some MSI cache. kvm_msi_irqfd_set is defined for this purpose, which avoids that virtio needs to peek into the cache for extracting the GSI. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/virtio-pci.c |6 +++--- kvm.h |2 ++ qemu-kvm.c | 14 +- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 23880e0..ad6a002 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -524,9 +524,9 @@ static int virtio_pci_mask_vq(PCIDevice *dev, unsigned vector, VirtQueue *vq, int masked) { EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); -int r = kvm_set_irqfd(dev-msix_cache[vector].kvm_gsi, - event_notifier_get_fd(notifier), - !masked); +int r = kvm_msi_irqfd_set(dev-msix_cache[vector], + event_notifier_get_fd(notifier), + !masked); if (r 0) { return (r == -ENOSYS) ? 0 : r; } diff --git a/kvm.h b/kvm.h index 3706fc6..fe2eec5 100644 --- a/kvm.h +++ b/kvm.h @@ -208,6 +208,8 @@ int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache *cache); int kvm_msi_message_del(MSIRoutingCache *cache); int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache); +int kvm_msi_irqfd_set(MSIRoutingCache *cache, int fd, bool assigned); + int kvm_commit_irq_routes(void); int kvm_irqchip_in_kernel(void); diff --git a/qemu-kvm.c b/qemu-kvm.c index 13d4f90..ab7703b 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -352,8 +352,11 @@ int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry) *e = *p; cache = s-msi_cache[i]; -if (cache) { +if (cache cache-type != MSI_ROUTE_NONE) { cache-type = MSI_ROUTE_NONE; +if (cache-kvm_irqfd = 0) { +kvm_set_irqfd(cache-kvm_gsi, cache-kvm_irqfd, false); +} } s-msi_cache[i] = s-msi_cache[s-irq_routes-nr]; @@ -521,6 +524,15 @@ int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache) } +int kvm_msi_irqfd_set(MSIRoutingCache *cache, int fd, bool assigned) +{ +if (cache-type == MSI_ROUTE_NONE) { +return assigned ? -EINVAL : 0; +} +cache-kvm_irqfd = assigned ? fd : -1; +return kvm_set_irqfd(cache-kvm_gsi, fd, assigned); +} + #ifdef KVM_CAP_DEVICE_MSIX int kvm_assign_set_msix_nr(KVMState *s, struct kvm_assigned_msix_nr *msix_nr) { -- 1.7.3.4
Re: [Qemu-devel] [RFC128 3/2] Adjust system and pci address spaces to full 64-bit
On 10/17/2011 07:33 AM, David Gibson wrote: On Sun, Oct 16, 2011 at 05:29:07PM +0200, Avi Kivity wrote: Now that the memory API supports full 64-bit buses, adjust the relevant callers to take advantage of it. Note that this doesn't, strictly speaking doesn't give you full 64-bit coverage, since the range covered is 2^64-1 bytes rather than 2^64 bytes. Cases where that will matter would be very rare, of course. An undocumented and indeed unmentioned feature of patch 2 is that UINT64_MAX sizes are expanded to UINT64_MAX+1. I did that to avoid introducing memory_region_init_128() (or perhaps memory_region_init_2_64() that doesn't take a size argument). That removes the ability to create container regions that span exactly UINT64_MAX bytes. It is strange in a patchset that tries to makes things more regular, I admit. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH v2 0/3] coroutinization of flush and discard (split out of NBD series)
Am 17.10.2011 12:32, schrieb Paolo Bonzini: This series, applying on top of block branch, enables drivers to use coroutines for flush and discard. I kept aio_discard after discussing with Kevin since it should be useful not only for raw-posix-aio, but also for the userspace iSCSI backend (and in general for backends relying on an external library that is designed around aio). BTW, with this patch we get for free the invariant that bdrv_aio_* never returns a NULL acb (Stefan's patches already got to that point for read/write, of course). Cool, I wasn't aware of that. That's a very nice side effect! Maybe we should write this down in a comment and remove the now unnecessary error handling from callers. Kevin
[Qemu-devel] [RFC][PATCH 22/45] qemu-kvm: msix: Fire mask notifier on global mask changes
Also invoke the mask notifier if the global MSI-X mask is modified. For this purpose, we push the notifier call from the per-vector mask update to the central msix_handle_mask_update. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msix.c | 16 +--- 1 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 739b56f..247b255 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -221,7 +221,15 @@ static bool msix_is_masked(PCIDevice *dev, int vector) static void msix_handle_mask_update(PCIDevice *dev, int vector) { -if (!msix_is_masked(dev, vector) msix_is_pending(dev, vector)) { +bool masked = msix_is_masked(dev, vector); +int ret; + +if (dev-msix_mask_notifier) { +ret = dev-msix_mask_notifier(dev, vector, + msix_is_masked(dev, vector)); +assert(ret = 0); +} +if (!masked msix_is_pending(dev, vector)) { msix_clr_pending(dev, vector); msix_notify(dev, vector); } @@ -262,7 +270,6 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x3; unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE; bool was_masked = msix_is_masked(dev, vector); -int r; pci_set_long(dev-msix_table_page + offset, val); if (kvm_enabled() kvm_irqchip_in_kernel()) { @@ -271,11 +278,6 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, if (vector dev-msix_entries_nr was_masked != msix_is_masked(dev, vector)) { -if (dev-msix_mask_notifier) { -r = dev-msix_mask_notifier(dev, vector, -msix_is_masked(dev, vector)); -assert(r = 0); -} msix_handle_mask_update(dev, vector); } } -- 1.7.3.4
Re: [Qemu-devel] [PATCH RFC v1 2/2] hyper-v: initialize Hyper-V CPUID leafs.
On 10/17/2011 11:40 AM, Paolo Bonzini wrote: On 10/17/2011 11:17 AM, Vadim Rozenfeld wrote: @@ -379,11 +380,16 @@ int kvm_arch_init_vcpu(CPUState *env) cpuid_i = 0; /* Paravirtualization CPUIDs */ -memcpy(signature, KVMKVMKVM\0\0\0, 12); c =cpuid_data.entries[cpuid_i++]; memset(c, 0, sizeof(*c)); c-function = KVM_CPUID_SIGNATURE; -c-eax = 0; +if (!hyperv_enabled()) { +memcpy(signature, KVMKVMKVM\0\0\0, 12); +c-eax = 0; +} else { +memcpy(signature, Microsoft Hv, 12); +c-eax = HYPERV_CPUID_MIN; +} Even not counting that hyper-v support should IMHO not be in KVM-specific code, I still think this shouldn't remove KVM leaves completely but rather move them to 0x4100. The KVM paravirtualization code then can similarly probe with 0x100 stride up to 0x40001000. This is what was done for Xen, and it allows to enable enlightenments independent of whether the guest is Linux or Windows. However, let's get a third opinion---Avi, what do you think? I agree with you, especially as this already works for Xen. Note it doesn't completely solve the issue (so we have two interfaces, which is the preferred one?), but it's better than nothing. -- error compiling committee.c: too many arguments to function
[Qemu-devel] [RFC][PATCH 18/45] qemu-kvm: Hook into MSI delivery at APIC level
Move the two hooks for MSI delivery to in-kernel irqchips from the MSI layer to a single place: the APIC. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/apic.c | 24 +++- hw/msi.c |5 - hw/msix.c |5 - 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index 6811ae1..cb6662c 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -806,15 +806,21 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache) { -uint8_t dest = -(msg-address MSI_ADDR_DEST_ID_MASK) MSI_ADDR_DEST_ID_SHIFT; -uint8_t vector = -(msg-data MSI_DATA_VECTOR_MASK) MSI_DATA_VECTOR_SHIFT; -uint8_t dest_mode = (msg-address MSI_ADDR_DEST_MODE_SHIFT) 0x1; -uint8_t trigger_mode = (msg-data MSI_DATA_TRIGGER_SHIFT) 0x1; -uint8_t delivery = (msg-data MSI_DATA_DELIVERY_MODE_SHIFT) 0x7; -/* XXX: Ignore redirection hint. */ -apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode); +if (kvm_enabled() kvm_irqchip_in_kernel()) { +if (kvm_set_irq(cache-kvm_gsi, 1, NULL) 0) { +abort(); +} +} else { +uint8_t dest = +(msg-address MSI_ADDR_DEST_ID_MASK) MSI_ADDR_DEST_ID_SHIFT; +uint8_t vector = +(msg-data MSI_DATA_VECTOR_MASK) MSI_DATA_VECTOR_SHIFT; +uint8_t dest_mode = (msg-address MSI_ADDR_DEST_MODE_SHIFT) 0x1; +uint8_t trigger_mode = (msg-data MSI_DATA_TRIGGER_SHIFT) 0x1; +uint8_t delivery = (msg-data MSI_DATA_DELIVERY_MODE_SHIFT) 0x7; +/* XXX: Ignore redirection hint. */ +apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode); +} } static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) diff --git a/hw/msi.c b/hw/msi.c index b947104..1328903 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -355,11 +355,6 @@ void msi_notify(PCIDevice *dev, unsigned int vector) return; } -if (kvm_enabled() kvm_irqchip_in_kernel()) { -kvm_set_irq(dev-msi_cache[vector].kvm_gsi, 1, NULL); -return; -} - msi_message_from_vector(dev, flags, vector, msg); MSI_DEV_PRINTF(dev, diff --git a/hw/msix.c b/hw/msix.c index 0be022e..6886255 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -457,11 +457,6 @@ void msix_notify(PCIDevice *dev, unsigned vector) return; } -if (kvm_enabled() kvm_irqchip_in_kernel()) { -kvm_set_irq(dev-msix_cache[vector].kvm_gsi, 1, NULL); -return; -} - msix_message_from_vector(dev, vector, msg); msi_deliver(msg, dev-msix_cache[vector]); -- 1.7.3.4
Re: [Qemu-devel] [PATCH] qxl: create slots on post_load in any state (fix RHBZ 740547)
ACK On 10/17/2011 12:24 PM, Alon Levy wrote: If we migrate when the device is not in a native state the guest still believes the slots are created, and will cause operations that reference the slots, causing a panic: virtual address out of range on the first of them. Easy to see by migrating in vga mode (with a driver loaded, for instance windows cmd window in full screen mode) and then exiting vga mode back to native mode will cause said panic. Fixed by doing the slot recreation unconditionally at post_load Signed-off-by: Alon Levyal...@redhat.com --- hw/qxl.c | 14 -- 1 files changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/qxl.c b/hw/qxl.c index 03848ed..4e9f39f 100644 --- a/hw/qxl.c +++ b/hw/qxl.c @@ -1684,6 +1684,14 @@ static int qxl_post_load(void *opaque, int version) qxl_mode_to_string(d-mode)); newmode = d-mode; d-mode = QXL_MODE_UNDEFINED; +for (i = 0; i NUM_MEMSLOTS; i++) { +if (!d-guest_slots[i].active) { +continue; +} +dprint(d, 1, %s: restoring guest slot %d delta %PRIu64\n, + __func__, i, d-guest_slots[i].delta); +qxl_add_memslot(d, i, d-guest_slots[i].delta, QXL_SYNC); +} switch (newmode) { case QXL_MODE_UNDEFINED: break; @@ -1691,12 +1699,6 @@ static int qxl_post_load(void *opaque, int version) qxl_enter_vga_mode(d); break; case QXL_MODE_NATIVE: -for (i = 0; i NUM_MEMSLOTS; i++) { -if (!d-guest_slots[i].active) { -continue; -} -qxl_add_memslot(d, i, 0, QXL_SYNC); -} qxl_create_guest_primary(d, 1, QXL_SYNC); /* replay surface-create and cursor-set commands */
Re: [Qemu-devel] [PATCH RFC v1 2/2] hyper-v: initialize Hyper-V CPUID leafs.
On 10/17/2011 12:41 PM, Avi Kivity wrote: Even not counting that hyper-v support should IMHO not be in KVM-specific code, I still think this shouldn't remove KVM leaves completely but rather move them to 0x4100. The KVM paravirtualization code then can similarly probe with 0x100 stride up to 0x40001000. This is what was done for Xen, and it allows to enable enlightenments independent of whether the guest is Linux or Windows. However, let's get a third opinion---Avi, what do you think? I agree with you, especially as this already works for Xen. Note it doesn't completely solve the issue (so we have two interfaces, which is the preferred one?), but it's better than nothing. Windows doesn't look beyond 0x4000, so Hyper-V stays there and KVM has to shift. So MS solved that part for us. :) Paolo
Re: [Qemu-devel] [PATCH v2 0/3] coroutinization of flush and discard (split out of NBD series)
On 10/17/2011 12:43 PM, Kevin Wolf wrote: Cool, I wasn't aware of that. That's a very nice side effect! Maybe we should write this down in a comment and remove the now unnecessary error handling from callers. Looks like I finally have an excuse to play with Coccinelle! Paolo
[Qemu-devel] [RFC][PATCH 33/45] qemu-kvm: Factor out kvm_device_intx_assign
Avoid passing kvm_assigned_irq on INTx assignment and separate this function from (to-be-refactored) MSI/MSI-X assignment. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 21 ++--- qemu-kvm.c | 17 + qemu-kvm.h |2 ++ 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index e5ac54c..f145a84 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -825,7 +825,7 @@ static void deassign_irq(AssignedDevice *dev) static int assign_intx(AssignedDevice *dev) { -struct kvm_assigned_irq assigned_irq_data; +uint32_t irq_type = 0; int irq, r; /* Interrupt PIN 0 means don't use INTx */ @@ -841,17 +841,16 @@ static int assign_intx(AssignedDevice *dev) deassign_irq(dev); -memset(assigned_irq_data, 0, sizeof(assigned_irq_data)); -assigned_irq_data.assigned_dev_id = calc_assigned_dev_id(dev); -assigned_irq_data.guest_irq = irq; -assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX; +irq_type = KVM_DEV_IRQ_GUEST_INTX; if (dev-features ASSIGNED_DEVICE_PREFER_MSI_MASK -dev-cap.available ASSIGNED_DEVICE_CAP_MSI) -assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_MSI; -else -assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_INTX; +dev-cap.available ASSIGNED_DEVICE_CAP_MSI) { +irq_type |= KVM_DEV_IRQ_HOST_MSI; +} else { +irq_type |= KVM_DEV_IRQ_HOST_INTX; +} -r = kvm_assign_irq(kvm_state, assigned_irq_data); +r = kvm_device_intx_assign(kvm_state, calc_assigned_dev_id(dev), irq_type, + irq); if (r 0) { fprintf(stderr, Failed to assign irq for \%s\: %s\n, dev-dev.qdev.id, strerror(-r)); @@ -861,7 +860,7 @@ static int assign_intx(AssignedDevice *dev) } dev-girq = irq; -dev-irq_requested_type = assigned_irq_data.flags; +dev-irq_requested_type = irq_type; return r; } diff --git a/qemu-kvm.c b/qemu-kvm.c index c24e93c..0086514 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -194,6 +194,23 @@ static int kvm_old_assign_irq(KVMState *s, return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, assigned_irq); } +int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, + uint32_t host_irq_type, uint32_t guest_irq) +{ +struct kvm_assigned_irq assigned_irq; + +assigned_irq.assigned_dev_id = dev_id; +assigned_irq.guest_irq = guest_irq; +assigned_irq.flags = KVM_DEV_IRQ_GUEST_INTX | +(host_irq_type (KVM_DEV_IRQ_HOST_INTX | KVM_DEV_IRQ_HOST_MSI)); +if (kvm_check_extension(s, KVM_CAP_ASSIGN_DEV_IRQ)) { +return kvm_vm_ioctl(s, KVM_ASSIGN_DEV_IRQ, assigned_irq); +} else { +assigned_irq.host_irq = 0; +return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, assigned_irq); +} +} + #ifdef KVM_CAP_ASSIGN_DEV_IRQ int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq) { diff --git a/qemu-kvm.h b/qemu-kvm.h index 7cdb5a8..783df7f 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -150,6 +150,8 @@ int kvm_assign_pci_device(KVMState *s, */ int kvm_assign_irq(KVMState *s, struct kvm_assigned_irq *assigned_irq); +int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, + uint32_t host_irq_type, uint32_t guest_irq); int kvm_device_irq_deassign(KVMState *s, uint32_t dev_id, uint32_t type); /*! -- 1.7.3.4
Re: [Qemu-devel] GPLv3 troubles (was: [PATCH 6/7] target-xtensa: add fsf core)
Am 15.10.2011 11:02, schrieb Blue Swirl: On Mon, Oct 10, 2011 at 2:26 AM, Max Filippov jcmvb...@gmail.com wrote: diff --git a/target-xtensa/core-fsf/gdb-config.c b/target-xtensa/core-fsf/gdb-config.c new file mode 100644 index 000..6705d9c --- /dev/null +++ b/target-xtensa/core-fsf/gdb-config.c @@ -0,0 +1,152 @@ +/* Configuration for the Xtensa architecture for GDB, the GNU debugger. + + Copyright (C) 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or Nack. GPLv3 is by design incompatible with GPLv2only (but not with GPLv2+ or IIRC BSD-like) licenses. Please only use code from GDB before v3 switch. As a side note, a quick grep shows that GPLv2only is a small minority in QEMU. In theory it should be possible to agree to switch from GPLv2only to some GPLv3 compatible license for all of QEMU code, or in a theory with alternative universes, even get FSF to relicense GDB under GPLv2only compatible way. Or, with the aid of infinite number of monkeys of Internet waiting to waste their time, rewrite incompatible but interesting parts of GDB or QEMU under The One True License of the day. Could we please draft some policy on this? This is not a GDB issue, it's very general. Whether we like it or not, there is GPLv3-licensed code and there will probably be a GPLv4 one day. IMO having old GPLv2-only code is one thing. But there's a lot of new GPLv2-only code cooking and occasionally pouring in, especially from qemu-kvm. Device assignment is a current example I encountered. If we could make checkpatch.pl detect new GPLv2-only code, then I would hope, given the dynamic QEMU development of the last few years, that the GPLv2-only portions become so small (both in relation and absolute) that they can either be replaced or the authors' permission be obtained to change the license to GPLv2-or-later. Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746, AG Nürnberg
Re: [Qemu-devel] GPLv3 troubles
On 10/17/2011 12:45 PM, Andreas Färber wrote: Could we please draft some policy on this? This is not a GDB issue, it's very general. Whether we like it or not, there is GPLv3-licensed code and there will probably be a GPLv4 one day. IMO having old GPLv2-only code is one thing. But there's a lot of new GPLv2-only code cooking and occasionally pouring in, especially from qemu-kvm. Device assignment is a current example I encountered. If we could make checkpatch.pl detect new GPLv2-only code, then I would hope, given the dynamic QEMU development of the last few years, that the GPLv2-only portions become so small (both in relation and absolute) that they can either be replaced or the authors' permission be obtained to change the license to GPLv2-or-later. That is close to impossible, you usually ask permission for all the authors in the history to avoid bigger problems. Paolo
[Qemu-devel] [PATCH] arm gic saving/loading fix
irq_target field saving/loading is in the wrong loop Signed-off-by: Dmitry Koshelev karagio...@gmail.com --- hw/arm_gic.c | 12 ++-- 1 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/arm_gic.c b/hw/arm_gic.c index 8286a28..ba05131 100644 --- a/hw/arm_gic.c +++ b/hw/arm_gic.c @@ -662,9 +662,6 @@ static void gic_save(QEMUFile *f, void *opaque) qemu_put_be32(f, s-enabled); for (i = 0; i NUM_CPU(s); i++) { qemu_put_be32(f, s-cpu_enabled[i]); -#ifndef NVIC -qemu_put_be32(f, s-irq_target[i]); -#endif for (j = 0; j 32; j++) qemu_put_be32(f, s-priority1[j][i]); for (j = 0; j GIC_NIRQ; j++) @@ -678,6 +675,9 @@ static void gic_save(QEMUFile *f, void *opaque) qemu_put_be32(f, s-priority2[i]); } for (i = 0; i GIC_NIRQ; i++) { +#ifndef NVIC +qemu_put_be32(f, s-irq_target[i]); +#endif qemu_put_byte(f, s-irq_state[i].enabled); qemu_put_byte(f, s-irq_state[i].pending); qemu_put_byte(f, s-irq_state[i].active); @@ -699,9 +699,6 @@ static int gic_load(QEMUFile *f, void *opaque, int version_id) s-enabled = qemu_get_be32(f); for (i = 0; i NUM_CPU(s); i++) { s-cpu_enabled[i] = qemu_get_be32(f); -#ifndef NVIC -s-irq_target[i] = qemu_get_be32(f); -#endif for (j = 0; j 32; j++) s-priority1[j][i] = qemu_get_be32(f); for (j = 0; j GIC_NIRQ; j++) @@ -715,6 +712,9 @@ static int gic_load(QEMUFile *f, void *opaque, int version_id) s-priority2[i] = qemu_get_be32(f); } for (i = 0; i GIC_NIRQ; i++) { +#ifndef NVIC +s-irq_target[i] = qemu_get_be32(f); +#endif s-irq_state[i].enabled = qemu_get_byte(f); s-irq_state[i].pending = qemu_get_byte(f); s-irq_state[i].active = qemu_get_byte(f);
[Qemu-devel] [RFC][PATCH 20/45] qemu-kvm: msix: Only invoke msix_handle_mask_update on changes
Reorganize msix_mmio_writel so that msix_handle_mask_update is only called on mask changes. Pass previous config space value to msix_write_config so that is can check if a mask change took place. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msix.c | 36 hw/msix.h |2 +- hw/pci.c |3 ++- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 6886255..57d0aac 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -206,12 +206,12 @@ static void msix_clr_pending(PCIDevice *dev, int vector) *msix_pending_byte(dev, vector) = ~msix_pending_mask(vector); } -static int msix_function_masked(PCIDevice *dev) +static bool msix_function_masked(PCIDevice *dev) { return dev-config[dev-msix_cap + MSIX_CONTROL_OFFSET] MSIX_MASKALL_MASK; } -static int msix_is_masked(PCIDevice *dev, int vector) +static bool msix_is_masked(PCIDevice *dev, int vector) { unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; @@ -229,9 +229,10 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector) /* Handle MSI-X capability config write. */ void msix_write_config(PCIDevice *dev, uint32_t addr, - uint32_t val, int len) + uint32_t old_val, int len) { unsigned enable_pos = dev-msix_cap + MSIX_CONTROL_OFFSET; +bool was_masked; int vector; if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) { @@ -244,12 +245,13 @@ void msix_write_config(PCIDevice *dev, uint32_t addr, pci_device_deassert_intx(dev); -if (msix_function_masked(dev)) { -return; -} - -for (vector = 0; vector dev-msix_entries_nr; ++vector) { -msix_handle_mask_update(dev, vector); +old_val = (enable_pos - addr) * 8; +was_masked = +(old_val (MSIX_MASKALL_MASK | MSIX_ENABLE_MASK)) != MSIX_ENABLE_MASK; +if (was_masked != msix_function_masked(dev)) { +for (vector = 0; vector dev-msix_entries_nr; ++vector) { +msix_handle_mask_update(dev, vector); +} } } @@ -259,17 +261,19 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, PCIDevice *dev = opaque; unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x3; unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE; -int was_masked = msix_is_masked(dev, vector); +bool was_masked = msix_is_masked(dev, vector); +int r; + pci_set_long(dev-msix_table_page + offset, val); if (kvm_enabled() kvm_irqchip_in_kernel()) { kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector)); } -if (vector dev-msix_entries_nr) { -if (was_masked != msix_is_masked(dev, vector) -dev-msix_mask_notifier) { -int r = dev-msix_mask_notifier(dev, vector, -msix_is_masked(dev, vector)); +if (vector dev-msix_entries_nr +was_masked != msix_is_masked(dev, vector)) { +if (dev-msix_mask_notifier) { +r = dev-msix_mask_notifier(dev, vector, +msix_is_masked(dev, vector)); assert(r = 0); } msix_handle_mask_update(dev, vector); @@ -303,7 +307,7 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) for (vector = 0; vector nentries; ++vector) { unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; -int was_masked = msix_is_masked(dev, vector); +bool was_masked = msix_is_masked(dev, vector); dev-msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; if (was_masked != msix_is_masked(dev, vector) dev-msix_mask_notifier) { diff --git a/hw/msix.h b/hw/msix.h index a8661e1..685dbe2 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -9,7 +9,7 @@ int msix_init(PCIDevice *pdev, unsigned short nentries, unsigned bar_nr, unsigned bar_size); void msix_write_config(PCIDevice *pci_dev, uint32_t address, - uint32_t val, int len); + uint32_t old_val, int len); int msix_uninit(PCIDevice *d, MemoryRegion *bar); diff --git a/hw/pci.c b/hw/pci.c index 6673989..39b2173 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -1129,6 +1129,7 @@ uint32_t pci_default_read_config(PCIDevice *d, void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) { +uint32_t old_val = pci_default_read_config(d, addr, l); int i, was_irq_disabled = pci_irq_disabled(d); for (i = 0; i l; val = 8, ++i) { @@ -1156,7 +1157,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) pci_update_irq_disabled(d, was_irq_disabled); msi_write_config(d, addr, val, l); -msix_write_config(d, addr, val, l); +msix_write_config(d, addr, old_val, l); } /***/
[Qemu-devel] [PATCH] arm cpu state loading fix
Floating registers loading fix. Signed-off-by: Dmitry Koshelev karaghio...@gmail.com --- target-arm/machine.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/target-arm/machine.c b/target-arm/machine.c index 3925d3a..73d82c9 100644 --- a/target-arm/machine.c +++ b/target-arm/machine.c @@ -175,7 +175,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) env-vfp.vec_stride = qemu_get_be32(f); if (arm_feature(env, ARM_FEATURE_VFP3)) { -for (i = 0; i 16; i++) { +for (i = 16; i 32; i++) { CPU_DoubleU u; u.l.upper = qemu_get_be32(f); u.l.lower = qemu_get_be32(f);
Re: [Qemu-devel] [PATCH] arm gic saving/loading fix
Am 17.10.2011 12:48, schrieb Dmitry Koshelev: irq_target field saving/loading is in the wrong loop Signed-off-by: Dmitry Koshelev karagio...@gmail.com Reviewed-by: Andreas Färber afaer...@suse.de Andreas --- hw/arm_gic.c | 12 ++-- 1 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/arm_gic.c b/hw/arm_gic.c index 8286a28..ba05131 100644 --- a/hw/arm_gic.c +++ b/hw/arm_gic.c @@ -662,9 +662,6 @@ static void gic_save(QEMUFile *f, void *opaque) qemu_put_be32(f, s-enabled); for (i = 0; i NUM_CPU(s); i++) { qemu_put_be32(f, s-cpu_enabled[i]); -#ifndef NVIC -qemu_put_be32(f, s-irq_target[i]); -#endif for (j = 0; j 32; j++) qemu_put_be32(f, s-priority1[j][i]); for (j = 0; j GIC_NIRQ; j++) @@ -678,6 +675,9 @@ static void gic_save(QEMUFile *f, void *opaque) qemu_put_be32(f, s-priority2[i]); } for (i = 0; i GIC_NIRQ; i++) { +#ifndef NVIC +qemu_put_be32(f, s-irq_target[i]); +#endif qemu_put_byte(f, s-irq_state[i].enabled); qemu_put_byte(f, s-irq_state[i].pending); qemu_put_byte(f, s-irq_state[i].active); @@ -699,9 +699,6 @@ static int gic_load(QEMUFile *f, void *opaque, int version_id) s-enabled = qemu_get_be32(f); for (i = 0; i NUM_CPU(s); i++) { s-cpu_enabled[i] = qemu_get_be32(f); -#ifndef NVIC -s-irq_target[i] = qemu_get_be32(f); -#endif for (j = 0; j 32; j++) s-priority1[j][i] = qemu_get_be32(f); for (j = 0; j GIC_NIRQ; j++) @@ -715,6 +712,9 @@ static int gic_load(QEMUFile *f, void *opaque, int version_id) s-priority2[i] = qemu_get_be32(f); } for (i = 0; i GIC_NIRQ; i++) { +#ifndef NVIC +s-irq_target[i] = qemu_get_be32(f); +#endif s-irq_state[i].enabled = qemu_get_byte(f); s-irq_state[i].pending = qemu_get_byte(f); s-irq_state[i].active = qemu_get_byte(f); -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746, AG Nürnberg
Re: [Qemu-devel] [PATCH v2 0/3] coroutinization of flush and discard (split out of NBD series)
Am 17.10.2011 12:32, schrieb Paolo Bonzini: This series, applying on top of block branch, enables drivers to use coroutines for flush and discard. I kept aio_discard after discussing with Kevin since it should be useful not only for raw-posix-aio, but also for the userspace iSCSI backend (and in general for backends relying on an external library that is designed around aio). BTW, with this patch we get for free the invariant that bdrv_aio_* never returns a NULL acb (Stefan's patches already got to that point for read/write, of course). v1-v2: add bdrv_co_flush and bdrv_co_discard entry points Paolo Bonzini (2): block: unify flush implementations block: add bdrv_co_discard and bdrv_aio_discard support Stefan Hajnoczi (1): block: drop redundant bdrv_flush implementation block.c | 258 + block.h |5 + block/blkdebug.c |6 -- block/blkverify.c |9 -- block/qcow.c |6 -- block/qcow2.c | 19 block/qed.c |6 -- block/raw-posix.c | 18 block/raw.c | 23 ++--- block_int.h | 10 ++- trace-events |1 + 11 files changed, 184 insertions(+), 177 deletions(-) Thanks, applied all to the block branch. Kevin
[Qemu-devel] [RFC][PATCH 36/45] qemu-kvm: Factor out kvm_device_msix_* services
Create kvm_device_msix_{supported,init_vectors,set_vector,assign}, replacing the old kvm_assign_set_msix_{nr,entry} services. The new API no longer requires direct fiddling with the KVM API data structures and just takes the required parameters. kvm_device_msix_set_vector also combines MSI route creation/update with registering the vector with the device assignment kernel part. The routing information is now stored in the msix_cache of the backing QEMU PCI device, maintained by the device assigment code until we switch to generic MSI-X support. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 103 +++ hw/device-assignment.h |1 - qemu-kvm.c | 42 +-- qemu-kvm.h | 11 +++-- 4 files changed, 76 insertions(+), 81 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 83951a3..2484afd 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -648,15 +648,13 @@ again: static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs); -static void free_dev_irq_entries(AssignedDevice *dev) +static void invalidate_msix_vectors(AssignedDevice *dev) { int i; -for (i = 0; i dev-irq_entries_nr; i++) -kvm_del_routing_entry(dev-entry[i]); -g_free(dev-entry); -dev-entry = NULL; -dev-irq_entries_nr = 0; +for (i = 0; i dev-irq_entries_nr; i++) { +kvm_msi_cache_invalidate(dev-dev.msix_cache[i]); +} } static void free_assigned_device(AssignedDevice *dev) @@ -701,12 +699,12 @@ static void free_assigned_device(AssignedDevice *dev) close(dev-real_device.config_fd); } -free_dev_irq_entries(dev); - if (dev-dev.msi_cache) { kvm_msi_cache_invalidate(dev-dev.msi_cache[0]); g_free(dev-dev.msi_cache); } +invalidate_msix_vectors(dev); +g_free(dev-dev.msix_cache); } static uint32_t calc_assigned_dev_id(AssignedDevice *dev) @@ -953,11 +951,12 @@ static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev) { AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev); uint16_t entries_nr = 0, entries_max_nr; -int pos = 0, i, r = 0; -uint32_t msg_addr, msg_upper_addr, msg_data; -struct kvm_assigned_msix_nr msix_nr; -struct kvm_assigned_msix_entry msix_entry; void *msix_page = adev-msix_table_page; +uint32_t dev_id; +MSIMessage msg; +int pos, i, r; + +assert(adev-irq_entries_nr == 0); pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); @@ -980,72 +979,40 @@ static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev) return -EINVAL; } -msix_nr.assigned_dev_id = calc_assigned_dev_id(adev); -msix_nr.entry_nr = entries_nr; -r = kvm_assign_set_msix_nr(kvm_state, msix_nr); -if (r != 0) { -fprintf(stderr, fail to set MSI-X entry number for MSIX! %s\n, -strerror(-r)); +dev_id = calc_assigned_dev_id(adev); + +r = kvm_device_msix_init_vectors(kvm_state, dev_id, entries_nr); +if (r 0) { return r; } - -free_dev_irq_entries(adev); +pci_dev-msix_cache = g_malloc0(entries_nr * sizeof(MSIRoutingCache)); adev-irq_entries_nr = entries_nr; -adev-entry = g_malloc0(entries_nr * sizeof(*(adev-entry))); -msix_entry.assigned_dev_id = msix_nr.assigned_dev_id; -entries_nr = 0; for (i = 0; i entries_max_nr; i++) { -if (entries_nr = msix_nr.entry_nr) { +if (entries_nr == 0) { break; } -msg_data = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE + +msg.data = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_DATA); -if (msg_data == 0) { +if (msg.data == 0) { continue; } -msg_addr = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE + -PCI_MSIX_ENTRY_LOWER_ADDR); -msg_upper_addr = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_UPPER_ADDR); +msg.address = pci_get_quad(msix_page + i * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_LOWER_ADDR); -r = kvm_get_irq_route_gsi(); +r = kvm_device_msix_set_vector(kvm_state, dev_id, i, msg, + pci_dev-msix_cache[i]); if (r 0) { return r; } - -adev-entry[entries_nr].gsi = r; -adev-entry[entries_nr].type = KVM_IRQ_ROUTING_MSI; -adev-entry[entries_nr].flags = 0; -adev-entry[entries_nr].u.msi.address_lo = msg_addr; -adev-entry[entries_nr].u.msi.address_hi = msg_upper_addr; -adev-entry[entries_nr].u.msi.data = msg_data; -DEBUG(MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!, msg_data, msg_addr); -kvm_add_routing_entry(adev-entry[entries_nr], NULL); - -
[Qemu-devel] [RFC][PATCH 25/45] qemu-kvm: Update MSI cache on kvm_msi_irqfd_set
Updating the MSI message registration on kvm_msi_irqfd_set will allow us to switch to a lazy mode and remove the need to track message changes in the device config space. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/virtio-pci.c | 10 ++ kvm.h |3 ++- qemu-kvm.c | 17 ++--- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 6718945..85d6771 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -521,10 +521,10 @@ static void virtio_pci_guest_notifier_read(void *opaque) } static int virtio_pci_mask_vq(PCIDevice *dev, unsigned int vector, - VirtQueue *vq, bool masked) + MSIMessage *msg, VirtQueue *vq, bool masked) { EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); -int r = kvm_msi_irqfd_set(dev-msix_cache[vector], +int r = kvm_msi_irqfd_set(msg, dev-msix_cache[vector], event_notifier_get_fd(notifier), !masked); if (r 0) { @@ -554,7 +554,8 @@ static int virtio_pci_msi_vector_config(PCIDevice *dev, unsigned int vector, if (virtio_queue_vector(vdev, n) != vector) { continue; } -r = virtio_pci_mask_vq(dev, vector, virtio_get_queue(vdev, n), masked); +r = virtio_pci_mask_vq(dev, vector, msg, virtio_get_queue(vdev, n), + masked); if (r 0) { goto undo; } @@ -565,7 +566,8 @@ undo: if (virtio_queue_vector(vdev, n) != vector) { continue; } -virtio_pci_mask_vq(dev, vector, virtio_get_queue(vdev, n), !masked); +virtio_pci_mask_vq(dev, vector, msg, virtio_get_queue(vdev, n), + !masked); } return r; } diff --git a/kvm.h b/kvm.h index fe2eec5..8647647 100644 --- a/kvm.h +++ b/kvm.h @@ -208,7 +208,8 @@ int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache *cache); int kvm_msi_message_del(MSIRoutingCache *cache); int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache); -int kvm_msi_irqfd_set(MSIRoutingCache *cache, int fd, bool assigned); +int kvm_msi_irqfd_set(MSIMessage *msg, MSIRoutingCache *cache, int fd, + bool assigned); int kvm_commit_irq_routes(void); diff --git a/qemu-kvm.c b/qemu-kvm.c index ab7703b..6bdd7b5 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -524,10 +524,21 @@ int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache) } -int kvm_msi_irqfd_set(MSIRoutingCache *cache, int fd, bool assigned) +int kvm_msi_irqfd_set(MSIMessage *msg, MSIRoutingCache *cache, int fd, + bool assigned) { -if (cache-type == MSI_ROUTE_NONE) { -return assigned ? -EINVAL : 0; +int ret; + +if (assigned) { +if (cache-type == MSI_ROUTE_NONE) { +return -EINVAL; +} +ret = kvm_msi_message_update(msg, cache); +if (ret 0) { +return ret; +} +} else if (cache-type == MSI_ROUTE_NONE) { +return 0; } cache-kvm_irqfd = assigned ? fd : -1; return kvm_set_irqfd(cache-kvm_gsi, fd, assigned); -- 1.7.3.4
Re: [Qemu-devel] [RFC][PATCH 11/45] msi: Factor out delivery hook
On 10/17/2011 11:27 AM, Jan Kiszka wrote: So far we deliver MSI messages by writing them into the target MMIO area. This reflects what happens on hardware, but imposes some limitations on the emulation when introducing KVM in-kernel irqchip models. For those we will need to track the message origin. Why do we need to track the message origin? Emulated interrupt remapping? Moreover, different architecture or accelerators may want to overload the delivery handler. Therefore, this commit introduces a delivery hook that is called by the MSI/MSI-X layer when devices send normal messages, but also on spurious deliveries that ended up on the APIC MMIO handler. Our default delivery handler for APIC-based PCs then dispatches between real MSIs and other DMA requests that happened to take the MSI patch. 'path' -static void apic_send_msi(target_phys_addr_t addr, uint32_t data) +void apic_deliver_msi(MSIMessage *msg) In general, it is better these days to pass small structures by value. Not sure what the gain is from intercepting the msi just before the stl_phys() vs. in the apic handler. -- error compiling committee.c: too many arguments to function
[Qemu-devel] [PATCH 1/2] hda: do not mix output and input streams, RHBZ #740493
Windows 7 may use the same stream number for input and output. That will result in lot of garbage on playback. The hardcoded value of 4 needs to be in sync with GCAP streams description and IN/OUT registers. Signed-off-by: Marc-André Lureau marcandre.lur...@redhat.com --- hw/intel-hda.c |9 + 1 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hw/intel-hda.c b/hw/intel-hda.c index 4272204..c6a3fec 100644 --- a/hw/intel-hda.c +++ b/hw/intel-hda.c @@ -389,14 +389,15 @@ static bool intel_hda_xfer(HDACodecDevice *dev, uint32_t stnr, bool output, { HDACodecBus *bus = DO_UPCAST(HDACodecBus, qbus, dev-qdev.parent_bus); IntelHDAState *d = container_of(bus, IntelHDAState, codecs); -IntelHDAStream *st = NULL; target_phys_addr_t addr; uint32_t s, copy, left; +IntelHDAStream *st; bool irq = false; -for (s = 0; s ARRAY_SIZE(d-st); s++) { -if (stnr == ((d-st[s].ctl 20) 0x0f)) { -st = d-st + s; +st = output ? d-st + 4 : d-st; +for (s = 0; s 4; s++) { +if (stnr == ((st[s].ctl 20) 0x0f)) { +st = st + s; break; } } -- 1.7.6.2
[Qemu-devel] [PATCH 2/2] hda: do not mix output and input stream states, RHBZ #740493
Windows 7 may use the same stream number for input and output. Current code will confuse streams. Changes since v1: - keep running_compat[] for migration version 1 - add running_real[] for migration version 2 Signed-off-by: Marc-André Lureau marcandre.lur...@redhat.com --- hw/hda-audio.c | 26 +++--- hw/intel-hda.c |9 + hw/intel-hda.h |2 +- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/hw/hda-audio.c b/hw/hda-audio.c index 03c0a24..a72b721 100644 --- a/hw/hda-audio.c +++ b/hw/hda-audio.c @@ -462,7 +462,8 @@ struct HDAAudioState { QEMUSoundCard card; const desc_codec *desc; HDAAudioStream st[4]; -bool running[16]; +bool running_compat[16]; +bool running_real[2 * 16]; /* properties */ uint32_t debug; @@ -659,7 +660,7 @@ static void hda_audio_command(HDACodecDevice *hda, uint32_t nid, uint32_t data) st-channel = payload 0x0f; dprint(a, 2, %s: stream %d, channel %d\n, st-node-name, st-stream, st-channel); -hda_audio_set_running(st, a-running[st-stream]); +hda_audio_set_running(st, a-running_real[st-output * 16 + st-stream]); hda_codec_response(hda, true, 0); break; case AC_VERB_GET_CONV: @@ -742,16 +743,20 @@ fail: hda_codec_response(hda, true, 0); } -static void hda_audio_stream(HDACodecDevice *hda, uint32_t stnr, bool running) +static void hda_audio_stream(HDACodecDevice *hda, uint32_t stnr, bool running, bool output) { HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda); int s; -a-running[stnr] = running; +a-running_compat[stnr] = running; +a-running_real[output * 16 + stnr] = running; for (s = 0; s ARRAY_SIZE(a-st); s++) { if (a-st[s].node == NULL) { continue; } +if (a-st[s].output != output) { +continue; +} if (a-st[s].stream != stnr) { continue; } @@ -833,6 +838,12 @@ static int hda_audio_post_load(void *opaque, int version) int i; dprint(a, 1, %s\n, __FUNCTION__); +if (version == 1) { +/* assume running_compat[] is for output streams */ +for (i = 0; i ARRAY_SIZE(a-running_compat); i++) +a-running_real[16 + i] = a-running_compat[i]; +} + for (i = 0; i ARRAY_SIZE(a-st); i++) { st = a-st + i; if (st-node == NULL) @@ -840,7 +851,7 @@ static int hda_audio_post_load(void *opaque, int version) hda_codec_parse_fmt(st-format, st-as); hda_audio_setup(st); hda_audio_set_amp(st); -hda_audio_set_running(st, a-running[st-stream]); +hda_audio_set_running(st, a-running_real[st-output * 16 + st-stream]); } return 0; } @@ -864,13 +875,14 @@ static const VMStateDescription vmstate_hda_audio_stream = { static const VMStateDescription vmstate_hda_audio = { .name = hda-audio, -.version_id = 1, +.version_id = 2, .post_load = hda_audio_post_load, .fields = (VMStateField []) { VMSTATE_STRUCT_ARRAY(st, HDAAudioState, 4, 0, vmstate_hda_audio_stream, HDAAudioStream), -VMSTATE_BOOL_ARRAY(running, HDAAudioState, 16), +VMSTATE_BOOL_ARRAY(running_compat, HDAAudioState, 16), +VMSTATE_BOOL_ARRAY_V(running_real, HDAAudioState, 2 * 16, 2), VMSTATE_END_OF_LIST() } }; diff --git a/hw/intel-hda.c b/hw/intel-hda.c index c6a3fec..f97775c 100644 --- a/hw/intel-hda.c +++ b/hw/intel-hda.c @@ -485,7 +485,7 @@ static void intel_hda_parse_bdl(IntelHDAState *d, IntelHDAStream *st) st-bp= 0; } -static void intel_hda_notify_codecs(IntelHDAState *d, uint32_t stream, bool running) +static void intel_hda_notify_codecs(IntelHDAState *d, uint32_t stream, bool running, bool output) { DeviceState *qdev; HDACodecDevice *cdev; @@ -493,7 +493,7 @@ static void intel_hda_notify_codecs(IntelHDAState *d, uint32_t stream, bool runn QLIST_FOREACH(qdev, d-codecs.qbus.children, sibling) { cdev = DO_UPCAST(HDACodecDevice, qdev, qdev); if (cdev-info-stream) { -cdev-info-stream(cdev, stream, running); +cdev-info-stream(cdev, stream, running, output); } } } @@ -567,6 +567,7 @@ static void intel_hda_set_ics(IntelHDAState *d, const IntelHDAReg *reg, uint32_t static void intel_hda_set_st_ctl(IntelHDAState *d, const IntelHDAReg *reg, uint32_t old) { +bool output = reg-stream = 4; IntelHDAStream *st = d-st + reg-stream; if (st-ctl 0x01) { @@ -582,11 +583,11 @@ static void intel_hda_set_st_ctl(IntelHDAState *d, const IntelHDAReg *reg, uint3 dprint(d, 1, st #%d: start %d (ring buf %d bytes)\n, reg-stream, stnr, st-cbl); intel_hda_parse_bdl(d, st); -intel_hda_notify_codecs(d, stnr, true); +intel_hda_notify_codecs(d, stnr, true, output);
[Qemu-devel] [RFC][PATCH 08/45] Introduce MSIMessage structure
Will be used for generating and distributing MSI messages, both in emulation mode and under KVM. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msi.h |5 + qemu-common.h |1 + 2 files changed, 6 insertions(+), 0 deletions(-) diff --git a/hw/msi.h b/hw/msi.h index e5e821f..22e3932 100644 --- a/hw/msi.h +++ b/hw/msi.h @@ -24,6 +24,11 @@ #include qemu-common.h #include pci.h +struct MSIMessage { +uint64_t address; +uint32_t data; +}; + extern bool msi_supported; bool msi_enabled(const PCIDevice *dev); diff --git a/qemu-common.h b/qemu-common.h index 5e87bdf..d3901bd 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -15,6 +15,7 @@ typedef struct QEMUTimer QEMUTimer; typedef struct QEMUFile QEMUFile; typedef struct QEMUBH QEMUBH; typedef struct DeviceState DeviceState; +typedef struct MSIMessage MSIMessage; struct Monitor; typedef struct Monitor Monitor; -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 03/45] msi: Use msi/msix_present more consistently
Replace some open-coded msi/msix_present checks and drop redundant msix_supported tests (present implies supported). Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msi.c |2 +- hw/msix.c | 20 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/hw/msi.c b/hw/msi.c index 5db..b117f69 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -266,7 +266,7 @@ void msi_uninit(struct PCIDevice *dev) uint16_t flags; uint8_t cap_size; -if (!(dev-cap_present QEMU_PCI_CAP_MSI)) { +if (!msi_present(dev)) { return; } flags = pci_get_word(dev-config + msi_flags_off(dev)); diff --git a/hw/msix.c b/hw/msix.c index ebd5aee..2c4de21 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -383,8 +383,9 @@ static void msix_free_irq_entries(PCIDevice *dev) /* Clean up resources for the device. */ int msix_uninit(PCIDevice *dev, MemoryRegion *bar) { -if (!(dev-cap_present QEMU_PCI_CAP_MSIX)) +if (!msix_present(dev)) { return 0; +} pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); dev-msix_cap = 0; msix_free_irq_entries(dev); @@ -405,11 +406,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f) { unsigned n = dev-msix_entries_nr; -if (!msix_supported) { -return; -} - -if (!(dev-cap_present QEMU_PCI_CAP_MSIX)) { +if (!msix_present(dev)) { return; } qemu_put_buffer(f, dev-msix_table_page, n * PCI_MSIX_ENTRY_SIZE); @@ -421,10 +418,7 @@ void msix_load(PCIDevice *dev, QEMUFile *f) { unsigned n = dev-msix_entries_nr; -if (!msix_supported) -return; - -if (!(dev-cap_present QEMU_PCI_CAP_MSIX)) { +if (!msix_present(dev)) { return; } @@ -480,8 +474,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) void msix_reset(PCIDevice *dev) { -if (!(dev-cap_present QEMU_PCI_CAP_MSIX)) +if (!msix_present(dev)) { return; +} msix_free_irq_entries(dev); dev-config[dev-msix_cap + MSIX_CONTROL_OFFSET] = ~dev-wmask[dev-msix_cap + MSIX_CONTROL_OFFSET]; @@ -531,8 +526,9 @@ void msix_vector_unuse(PCIDevice *dev, unsigned vector) void msix_unuse_all_vectors(PCIDevice *dev) { -if (!(dev-cap_present QEMU_PCI_CAP_MSIX)) +if (!msix_present(dev)) { return; +} msix_free_irq_entries(dev); } -- 1.7.3.4
[Qemu-devel] [RFC][PATCH 45/45] pci-assign: Fix coding style issues
Also remove the dead get_assigned_device at this chance. No functional changes. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 199 hw/device-assignment.h | 14 ++-- 2 files changed, 107 insertions(+), 106 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index df554b3..c7930e4 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -58,10 +58,10 @@ #ifdef DEVICE_ASSIGNMENT_DEBUG #define DEBUG(fmt, ...) \ do { \ - fprintf(stderr, %s: fmt, __func__ , __VA_ARGS__);\ +fprintf(stderr, %s: fmt, __func__ , __VA_ARGS__); \ } while (0) #else -#define DEBUG(fmt, ...) do { } while(0) +#define DEBUG(fmt, ...) do { } while (0) #endif static void assigned_dev_load_option_rom(AssignedDevice *dev); @@ -97,27 +97,27 @@ static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, DEBUG(out val=%x, len=%d, e_phys=%x, host=%x\n, *val, len, addr, port); switch (len) { -case 1: -outb(*val, port); -break; -case 2: -outw(*val, port); -break; -case 4: -outl(*val, port); -break; +case 1: +outb(*val, port); +break; +case 2: +outw(*val, port); +break; +case 4: +outl(*val, port); +break; } } else { switch (len) { -case 1: -ret = inb(port); -break; -case 2: -ret = inw(port); -break; -case 4: -ret = inl(port); -break; +case 1: +ret = inb(port); +break; +case 2: +ret = inw(port); +break; +case 4: +ret = inl(port); +break; } DEBUG(in val=%x, len=%d, e_phys=%x, host=%x\n, ret, len, addr, port); @@ -130,21 +130,18 @@ static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, uint32_t value) { assigned_dev_ioport_rw(opaque, addr, 1, value); -return; } static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, uint32_t value) { assigned_dev_ioport_rw(opaque, addr, 2, value); -return; } static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, uint32_t value) { assigned_dev_ioport_rw(opaque, addr, 4, value); -return; } static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr) @@ -295,13 +292,13 @@ static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len) again: ret = pread(fd, val, len, pos); if (ret != len) { - if ((ret 0) (errno == EINTR || errno == EAGAIN)) - goto again; - - fprintf(stderr, %s: pread failed, ret = %zd errno = %d\n, - __func__, ret, errno); +if ((ret 0) (errno == EINTR || errno == EAGAIN)) { +goto again; +} +fprintf(stderr, %s: pread failed, ret = %zd errno = %d\n, +__func__, ret, errno); - exit(1); +exit(1); } return val; @@ -321,16 +318,14 @@ static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) again: ret = pwrite(fd, val, len, pos); if (ret != len) { - if ((ret 0) (errno == EINTR || errno == EAGAIN)) - goto again; - - fprintf(stderr, %s: pwrite failed, ret = %zd errno = %d\n, - __func__, ret, errno); +if ((ret 0) (errno == EINTR || errno == EAGAIN)) { +goto again; +} +fprintf(stderr, %s: pwrite failed, ret = %zd errno = %d\n, +__func__, ret, errno); - exit(1); +exit(1); } - -return; } static void assigned_dev_emulate_config_read(AssignedDevice *dev, @@ -359,22 +354,24 @@ static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) int status; status = assigned_dev_pci_read_byte(d, PCI_STATUS); -if ((status PCI_STATUS_CAP_LIST) == 0) +if ((status PCI_STATUS_CAP_LIST) == 0) { return 0; +} while (max_cap--) { pos = assigned_dev_pci_read_byte(d, pos); -if (pos 0x40) +if (pos 0x40) { break; - +} pos = ~3; id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID); -if (id == 0xff) +if (id == 0xff) { break; -if (id
Re: [Qemu-devel] [RFC][PATCH 12/45] msi: Introduce MSIRoutingCache
On 10/17/2011 11:27 AM, Jan Kiszka wrote: This cache will help us implementing KVM in-kernel irqchip support without spreading hooks all over the place. KVM requires us to register it first and then deliver it by raising a pseudo IRQ line returned on registration. While this could be changed for QEMU-originated MSI messages by adding direct MSI injection, we will still need this translation for irqfd-originated messages. The MSIRoutingCache will allow to track those registrations and update them lazily before the actual delivery. This avoid having to track MSI vectors at device level (like qemu-kvm currently does). +typedef enum { +MSI_ROUTE_NONE = 0, +MSI_ROUTE_STATIC, +} MSIRouteType; + +struct MSIRoutingCache { +MSIMessage msg; +MSIRouteType type; +int kvm_gsi; +int kvm_irqfd; +}; + diff --git a/hw/pci.h b/hw/pci.h index 329ab32..5b5d2fd 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -197,6 +197,10 @@ struct PCIDevice { MemoryRegion rom; uint32_t rom_bar; +/* MSI routing chaches */ +MSIRoutingCache *msi_cache; +MSIRoutingCache *msix_cache; + /* MSI entries */ int msi_entries_nr; struct KVMMsiMessage *msi_irq_entries; IMO this needlessly leaks kvm information into core qemu. The cache should be completely hidden in kvm code. I think msi_deliver() can hide the use of the cache completely. For pre-registered events like kvm's irqfd, you can use something like qemu_irq qemu_msi_irq(MSIMessage msg) for non-kvm, it simply returns a qemu_irq that triggers a stl_phys(); for kvm, it allocates an irqfd and a permanent entry in the cache and returns a qemu_irq that triggers the irqfd. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] GPLv3 troubles
Am 17.10.2011 12:47, schrieb Paolo Bonzini: On 10/17/2011 12:45 PM, Andreas Färber wrote: Could we please draft some policy on this? This is not a GDB issue, it's very general. Whether we like it or not, there is GPLv3-licensed code and there will probably be a GPLv4 one day. IMO having old GPLv2-only code is one thing. But there's a lot of new GPLv2-only code cooking and occasionally pouring in, especially from qemu-kvm. Device assignment is a current example I encountered. If we could make checkpatch.pl detect new GPLv2-only code, then I would hope, given the dynamic QEMU development of the last few years, that the GPLv2-only portions become so small (both in relation and absolute) that they can either be replaced or the authors' permission be obtained to change the license to GPLv2-or-later. That is close to impossible, you usually ask permission for all the authors in the history to avoid bigger problems. I did refer to authors in history, in case that was unclear. I was thinking of how much code we rewrote for TCG, qdev, etc. In the end it'll depend on which files are affected, and I don't have a list - hard to grep due to varying formulations and line breaks. Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746, AG Nürnberg
Re: [Qemu-devel] [RFC][PATCH 06/45] msix: Prevent bogus mask updates on MMIO accesses
On Mon, Oct 17, 2011 at 11:27:40AM +0200, Jan Kiszka wrote: Only accesses to the MSI-X table must trigger a call to msix_handle_mask_update or a notifier invocation. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Why would msix_mmio_write be called on an access outside the table? --- hw/msix.c | 16 ++-- 1 files changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 2c4de21..33cb716 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -264,18 +264,22 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, { PCIDevice *dev = opaque; unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x3; -int vector = offset / PCI_MSIX_ENTRY_SIZE; +unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE; Why the int/unsigned change? this has no chance to overflow, and using unsigned causes signed/unsigned comparison below, and unsigned/signed conversion on calls such as msix_is_masked. int was_masked = msix_is_masked(dev, vector); pci_set_long(dev-msix_table_page + offset, val); if (kvm_enabled() kvm_irqchip_in_kernel()) { kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector)); } I would say if we need to check the address, check it first thing and return if the address is out of a sensible range. For example, are you worried about kvm_msix_update calls with a sensible mask? -if (was_masked != msix_is_masked(dev, vector) dev-msix_mask_notifier) { -int r = dev-msix_mask_notifier(dev, vector, - msix_is_masked(dev, vector)); -assert(r = 0); + +if (vector dev-msix_entries_nr) { +if (was_masked != msix_is_masked(dev, vector) +dev-msix_mask_notifier) { +int r = dev-msix_mask_notifier(dev, vector, +msix_is_masked(dev, vector)); +assert(r = 0); +} +msix_handle_mask_update(dev, vector); } -msix_handle_mask_update(dev, vector); } static const MemoryRegionOps msix_mmio_ops = { -- 1.7.3.4
Re: [Qemu-devel] GPLv3 troubles
On 10/17/2011 01:07 PM, Andreas Färber wrote: That is close to impossible, you usually ask permission for all the authors in the history to avoid bigger problems. I did refer to authors in history, in case that was unclear. Authors in history (unlike authors in git blame, but you cannot trust that) almost never disappear, no matter how much you rewrite. Even dyngen-TCG kept a lot of the target-* code unchanged. Making a list of GPLv2 files would be a start, though. Paolo
[Qemu-devel] [RFC][PATCH 24/45] qemu-kvm: msix: Don't handle mask updated while disabled
As long as MSI-X is disabled, it's incorrect to invoke msix_handle_mask_update on per-vector mask changes. That may misguide the config notifier callback or spuriously trigger an MSI event. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msix.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 176bc76..7d45760 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -292,7 +292,7 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector)); } -if (vector dev-msix_entries_nr) { +if (msix_enabled(dev) vector dev-msix_entries_nr) { is_masked = msix_is_masked(dev, vector); if (was_masked != is_masked) { msix_handle_mask_update(dev, vector); -- 1.7.3.4
[Qemu-devel] [PATCH v2 1/2] ioapic: Convert to memory API
This maintains the old imprecise access size handling. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Changes in v2: - use new-style handlers hw/ioapic.c | 28 +++- 1 files changed, 11 insertions(+), 17 deletions(-) diff --git a/hw/ioapic.c b/hw/ioapic.c index 61991d7..56b1612 100644 --- a/hw/ioapic.c +++ b/hw/ioapic.c @@ -86,6 +86,7 @@ typedef struct IOAPICState IOAPICState; struct IOAPICState { SysBusDevice busdev; +MemoryRegion io_memory; uint8_t id; uint8_t ioregsel; uint32_t irr; @@ -195,7 +196,8 @@ void ioapic_eoi_broadcast(int vector) } } -static uint32_t ioapic_mem_readl(void *opaque, target_phys_addr_t addr) +static uint64_t +ioapic_mem_read(void *opaque, target_phys_addr_t addr, unsigned int size) { IOAPICState *s = opaque; int index; @@ -234,7 +236,8 @@ static uint32_t ioapic_mem_readl(void *opaque, target_phys_addr_t addr) } static void -ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) +ioapic_mem_write(void *opaque, target_phys_addr_t addr, uint64_t val, + unsigned int size) { IOAPICState *s = opaque; int index; @@ -309,32 +312,23 @@ static void ioapic_reset(DeviceState *d) } } -static CPUReadMemoryFunc * const ioapic_mem_read[3] = { -ioapic_mem_readl, -ioapic_mem_readl, -ioapic_mem_readl, -}; - -static CPUWriteMemoryFunc * const ioapic_mem_write[3] = { -ioapic_mem_writel, -ioapic_mem_writel, -ioapic_mem_writel, +static const MemoryRegionOps ioapic_io_ops = { +.read = ioapic_mem_read, +.write = ioapic_mem_write, +.endianness = DEVICE_NATIVE_ENDIAN, }; static int ioapic_init1(SysBusDevice *dev) { IOAPICState *s = FROM_SYSBUS(IOAPICState, dev); -int io_memory; static int ioapic_no; if (ioapic_no = MAX_IOAPICS) { return -1; } -io_memory = cpu_register_io_memory(ioapic_mem_read, - ioapic_mem_write, s, - DEVICE_NATIVE_ENDIAN); -sysbus_init_mmio(dev, 0x1000, io_memory); +memory_region_init_io(s-io_memory, ioapic_io_ops, s, ioapic, 0x1000); +sysbus_init_mmio_region(dev, s-io_memory); qdev_init_gpio_in(dev-qdev, ioapic_set_irq, IOAPIC_NUM_PINS); -- 1.7.3.4
[Qemu-devel] [PATCH 2/2] ioapic: Reject non-dword accesses to IOWIN register
Aligns the model with the spec. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/ioapic.c |6 ++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/hw/ioapic.c b/hw/ioapic.c index 56b1612..eb75766 100644 --- a/hw/ioapic.c +++ b/hw/ioapic.c @@ -208,6 +208,9 @@ ioapic_mem_read(void *opaque, target_phys_addr_t addr, unsigned int size) val = s-ioregsel; break; case IOAPIC_IOWIN: +if (size != 4) { +break; +} switch (s-ioregsel) { case IOAPIC_REG_ID: val = s-id IOAPIC_ID_SHIFT; @@ -247,6 +250,9 @@ ioapic_mem_write(void *opaque, target_phys_addr_t addr, uint64_t val, s-ioregsel = val; break; case IOAPIC_IOWIN: +if (size != 4) { +break; +} DPRINTF(write: %08x = %08x\n, s-ioregsel, val); switch (s-ioregsel) { case IOAPIC_REG_ID: -- 1.7.3.4
Re: [Qemu-devel] [RFC][PATCH 17/45] qemu-kvm: Track MSIRoutingCache in KVM routing table
On 10/17/2011 11:27 AM, Jan Kiszka wrote: Keep a link from the internal KVM routing table to potential MSI routing cache entries. The link is used so far whenever the entry is dropped to invalidate the cache content. It will allow us to build MSI routing entries on demand and flush existing ones on table overflow. Does this not require a destructor for MSIRoutingCache? -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [RFC][PATCH 11/45] msi: Factor out delivery hook
On 2011-10-17 12:56, Avi Kivity wrote: On 10/17/2011 11:27 AM, Jan Kiszka wrote: So far we deliver MSI messages by writing them into the target MMIO area. This reflects what happens on hardware, but imposes some limitations on the emulation when introducing KVM in-kernel irqchip models. For those we will need to track the message origin. Why do we need to track the message origin? Emulated interrupt remapping? The origin holds the routing cache which we need to track if the message already has a route (and that without searching long lists) and to update that route instead of add another one. Moreover, different architecture or accelerators may want to overload the delivery handler. Therefore, this commit introduces a delivery hook that is called by the MSI/MSI-X layer when devices send normal messages, but also on spurious deliveries that ended up on the APIC MMIO handler. Our default delivery handler for APIC-based PCs then dispatches between real MSIs and other DMA requests that happened to take the MSI patch. 'path' -static void apic_send_msi(target_phys_addr_t addr, uint32_t data) +void apic_deliver_msi(MSIMessage *msg) In general, it is better these days to pass small structures by value. OK, will adjust this. Not sure what the gain is from intercepting the msi just before the stl_phys() vs. in the apic handler. APIC is x86-specific, MSI is not. I think Xen will also want to make use of this hook. I originally though of using it for the KVM in-kernel models as well, but I will now establish a callback at APIC-level (upstream will look differently from qemu-kvm in this regard). Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [PATCH] arm gic saving/loading fix
On 17 October 2011 11:54, Andreas Färber afaer...@suse.de wrote: Am 17.10.2011 12:48, schrieb Dmitry Koshelev: irq_target field saving/loading is in the wrong loop Signed-off-by: Dmitry Koshelev karagio...@gmail.com Reviewed-by: Andreas Färber afaer...@suse.de Doesn't it need a vmstate version bump too? -- PMM Andreas --- hw/arm_gic.c | 12 ++-- 1 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/arm_gic.c b/hw/arm_gic.c index 8286a28..ba05131 100644 --- a/hw/arm_gic.c +++ b/hw/arm_gic.c @@ -662,9 +662,6 @@ static void gic_save(QEMUFile *f, void *opaque) qemu_put_be32(f, s-enabled); for (i = 0; i NUM_CPU(s); i++) { qemu_put_be32(f, s-cpu_enabled[i]); -#ifndef NVIC - qemu_put_be32(f, s-irq_target[i]); -#endif for (j = 0; j 32; j++) qemu_put_be32(f, s-priority1[j][i]); for (j = 0; j GIC_NIRQ; j++) @@ -678,6 +675,9 @@ static void gic_save(QEMUFile *f, void *opaque) qemu_put_be32(f, s-priority2[i]); } for (i = 0; i GIC_NIRQ; i++) { +#ifndef NVIC + qemu_put_be32(f, s-irq_target[i]); +#endif qemu_put_byte(f, s-irq_state[i].enabled); qemu_put_byte(f, s-irq_state[i].pending); qemu_put_byte(f, s-irq_state[i].active); @@ -699,9 +699,6 @@ static int gic_load(QEMUFile *f, void *opaque, int version_id) s-enabled = qemu_get_be32(f); for (i = 0; i NUM_CPU(s); i++) { s-cpu_enabled[i] = qemu_get_be32(f); -#ifndef NVIC - s-irq_target[i] = qemu_get_be32(f); -#endif for (j = 0; j 32; j++) s-priority1[j][i] = qemu_get_be32(f); for (j = 0; j GIC_NIRQ; j++) @@ -715,6 +712,9 @@ static int gic_load(QEMUFile *f, void *opaque, int version_id) s-priority2[i] = qemu_get_be32(f); } for (i = 0; i GIC_NIRQ; i++) { +#ifndef NVIC + s-irq_target[i] = qemu_get_be32(f); +#endif s-irq_state[i].enabled = qemu_get_byte(f); s-irq_state[i].pending = qemu_get_byte(f); s-irq_state[i].active = qemu_get_byte(f); -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746, AG Nürnberg -- 12345678901234567890123456789012345678901234567890123456789012345678901234567890 1 2 3 4 5 6 7 8
[Qemu-devel] [RFC][PATCH 16/45] qemu-kvm: Use MSIMessage and MSIRoutingCache
Start benefiting from the new abstractions and drop the KVM-specific vector tracking to generic MSIMessage and MSIRoutingCache data structures and helpers, also reducing the diff to upstream. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/msi.c| 49 +++-- hw/msix.c | 37 + hw/pci.h|4 hw/virtio-pci.c |3 ++- kvm-stub.c |6 +++--- kvm.h | 13 +++-- qemu-kvm.c | 46 +- 7 files changed, 57 insertions(+), 101 deletions(-) diff --git a/hw/msi.c b/hw/msi.c index c8ccb17..b947104 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -140,49 +140,29 @@ static void msi_message_from_vector(PCIDevice *dev, uint16_t msi_flags, } } -static void kvm_msi_message_from_vector(PCIDevice *dev, unsigned vector, -KVMMsiMessage *kmm) -{ -uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev)); -bool msi64bit = flags PCI_MSI_FLAGS_64BIT; -unsigned int nr_vectors = msi_nr_vectors(flags); - -kmm-addr_lo = pci_get_long(dev-config + msi_address_lo_off(dev)); -if (msi64bit) { -kmm-addr_hi = pci_get_long(dev-config + msi_address_hi_off(dev)); -} else { -kmm-addr_hi = 0; -} - -kmm-data = pci_get_word(dev-config + msi_data_off(dev, msi64bit)); -if (nr_vectors 1) { -kmm-data = ~(nr_vectors - 1); -kmm-data |= vector; -} -} - static void kvm_msi_update(PCIDevice *dev) { uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev)); unsigned int max_vectors = 1 ((flags PCI_MSI_FLAGS_QMASK) (ffs(PCI_MSI_FLAGS_QMASK) - 1)); unsigned int nr_vectors = msi_nr_vectors(flags); -KVMMsiMessage new_entry, *entry; +MSIRoutingCache *cache; bool changed = false; unsigned int vector; +MSIMessage msg; int r; for (vector = 0; vector max_vectors; vector++) { -entry = dev-msi_irq_entries + vector; +cache = dev-msi_cache[vector]; if (vector = nr_vectors) { if (vector dev-msi_entries_nr) { -kvm_msi_message_del(entry); +kvm_msi_message_del(cache); changed = true; } } else if (vector = dev-msi_entries_nr) { -kvm_msi_message_from_vector(dev, vector, entry); -r = kvm_msi_message_add(entry); +msi_message_from_vector(dev, flags, vector, msg); +r = kvm_msi_message_add(msg, cache); if (r) { fprintf(stderr, %s: kvm_msi_add failed: %s\n, __func__, strerror(-r)); @@ -190,15 +170,14 @@ static void kvm_msi_update(PCIDevice *dev) } changed = true; } else { -kvm_msi_message_from_vector(dev, vector, new_entry); -r = kvm_msi_message_update(entry, new_entry); +msi_message_from_vector(dev, flags, vector, msg); +r = kvm_msi_message_update(msg, cache); if (r 0) { fprintf(stderr, %s: kvm_update_msi failed: %s\n, __func__, strerror(-r)); exit(1); } if (r 0) { -*entry = new_entry; changed = true; } } @@ -220,7 +199,7 @@ static void kvm_msi_free(PCIDevice *dev) unsigned int vector; for (vector = 0; vector dev-msi_entries_nr; ++vector) { -kvm_msi_message_del(dev-msi_irq_entries[vector]); +kvm_msi_message_del(dev-msi_cache[vector]); } if (dev-msi_entries_nr 0) { kvm_commit_irq_routes(); @@ -290,11 +269,6 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, dev-msi_cache = g_malloc0(nr_vectors * sizeof(*dev-msi_cache)); -if (kvm_enabled() kvm_irqchip_in_kernel()) { -dev-msi_irq_entries = g_malloc(nr_vectors * -sizeof(*dev-msix_irq_entries)); -} - return config_offset; } @@ -311,7 +285,6 @@ void msi_uninit(struct PCIDevice *dev) if (kvm_enabled() kvm_irqchip_in_kernel()) { kvm_msi_free(dev); -g_free(dev-msi_irq_entries); } g_free(dev-msi_cache); @@ -383,7 +356,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector) } if (kvm_enabled() kvm_irqchip_in_kernel()) { -kvm_set_irq(dev-msi_irq_entries[vector].gsi, 1, NULL); +kvm_set_irq(dev-msi_cache[vector].kvm_gsi, 1, NULL); return; } @@ -504,7 +477,7 @@ void msi_post_load(PCIDevice *dev) { uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev)); -if (kvm_enabled() dev-msi_irq_entries) { +if (kvm_enabled() kvm_irqchip_in_kernel()) { kvm_msi_free(dev); if (flags PCI_MSI_FLAGS_ENABLE) { diff --git a/hw/msix.c b/hw/msix.c index e824aef..0be022e 100644 --- a/hw/msix.c
[Qemu-devel] [RFC][PATCH 35/45] pci-assign: Polish assigned_dev_update_msix_mmio
- rename to assigned_dev_set_msix_vectors - drop unused msg_ctrl - use pci_get_* accessors - rename variable va to msix_page - clarify comment on msg_data == 0 optimization - fix coding style Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/device-assignment.c | 53 ++- 1 files changed, 29 insertions(+), 24 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 7a8f702..83951a3 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -949,42 +949,43 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev) } } -static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) +static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev) { AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev); uint16_t entries_nr = 0, entries_max_nr; int pos = 0, i, r = 0; -uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl; +uint32_t msg_addr, msg_upper_addr, msg_data; struct kvm_assigned_msix_nr msix_nr; struct kvm_assigned_msix_entry msix_entry; -void *va = adev-msix_table_page; +void *msix_page = adev-msix_table_page; pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); -entries_max_nr = *(uint16_t *)(pci_dev-config + pos + 2); +entries_max_nr = pci_get_word(pci_dev-config + pos + PCI_MSIX_FLAGS); entries_max_nr = PCI_MSIX_FLAGS_QSIZE; entries_max_nr += 1; /* Get the usable entry number for allocating */ for (i = 0; i entries_max_nr; i++) { -memcpy(msg_ctrl, va + i * 16 + 12, 4); -memcpy(msg_data, va + i * 16 + 8, 4); -/* Ignore unused entry even it's unmasked */ -if (msg_data == 0) +/* Assuming IA-32 MSI message format: + * Ignore unused entry (invalid vector) */ +if (pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_DATA) == 0) { continue; -entries_nr ++; +} +entries_nr++; } - if (entries_nr == 0) { fprintf(stderr, MSI-X entry number is zero!\n); return -EINVAL; } + msix_nr.assigned_dev_id = calc_assigned_dev_id(adev); msix_nr.entry_nr = entries_nr; r = kvm_assign_set_msix_nr(kvm_state, msix_nr); if (r != 0) { fprintf(stderr, fail to set MSI-X entry number for MSIX! %s\n, - strerror(-r)); +strerror(-r)); return r; } @@ -995,19 +996,23 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) msix_entry.assigned_dev_id = msix_nr.assigned_dev_id; entries_nr = 0; for (i = 0; i entries_max_nr; i++) { -if (entries_nr = msix_nr.entry_nr) +if (entries_nr = msix_nr.entry_nr) { break; -memcpy(msg_ctrl, va + i * 16 + 12, 4); -memcpy(msg_data, va + i * 16 + 8, 4); -if (msg_data == 0) +} +msg_data = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE + +PCI_MSIX_ENTRY_DATA); +if (msg_data == 0) { continue; - -memcpy(msg_addr, va + i * 16, 4); -memcpy(msg_upper_addr, va + i * 16 + 4, 4); +} +msg_addr = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE + +PCI_MSIX_ENTRY_LOWER_ADDR); +msg_upper_addr = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_UPPER_ADDR); r = kvm_get_irq_route_gsi(); -if (r 0) +if (r 0) { return r; +} adev-entry[entries_nr].gsi = r; adev-entry[entries_nr].type = KVM_IRQ_ROUTING_MSI; @@ -1026,13 +1031,13 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) break; } DEBUG(MSI-X entry gsi 0x%x, entry %d\n!, -msix_entry.gsi, msix_entry.entry); -entries_nr ++; + msix_entry.gsi, msix_entry.entry); +entries_nr++; } if (r == 0 kvm_commit_irq_routes() 0) { - perror(assigned_dev_update_msix_mmio: kvm_commit_irq_routes); - return -EINVAL; +perror(assigned_dev_update_msix_mmio: kvm_commit_irq_routes); +return -EINVAL; } return r; @@ -1070,7 +1075,7 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev) assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX | KVM_DEV_IRQ_GUEST_MSIX; -if (assigned_dev_update_msix_mmio(pci_dev) 0) { +if (assigned_dev_set_msix_vectors(pci_dev) 0) { perror(assigned_dev_update_msix_mmio); return; } -- 1.7.3.4
Re: [Qemu-devel] [PATCH] arm cpu state loading fix
On 17 October 2011 11:53, Dmitry Koshelev karaghio...@gmail.com wrote: Floating registers loading fix. Signed-off-by: Dmitry Koshelev karaghio...@gmail.com --- target-arm/machine.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/target-arm/machine.c b/target-arm/machine.c index 3925d3a..73d82c9 100644 --- a/target-arm/machine.c +++ b/target-arm/machine.c @@ -175,7 +175,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) env-vfp.vec_stride = qemu_get_be32(f); if (arm_feature(env, ARM_FEATURE_VFP3)) { - for (i = 0; i 16; i++) { + for (i = 16; i 32; i++) { CPU_DoubleU u; u.l.upper = qemu_get_be32(f); u.l.lower = qemu_get_be32(f); Reviewed-by: Peter Maydell peter.mayd...@linaro.org -- PMM
Re: [Qemu-devel] [RFC][PATCH 12/45] msi: Introduce MSIRoutingCache
On 2011-10-17 13:06, Avi Kivity wrote: On 10/17/2011 11:27 AM, Jan Kiszka wrote: This cache will help us implementing KVM in-kernel irqchip support without spreading hooks all over the place. KVM requires us to register it first and then deliver it by raising a pseudo IRQ line returned on registration. While this could be changed for QEMU-originated MSI messages by adding direct MSI injection, we will still need this translation for irqfd-originated messages. The MSIRoutingCache will allow to track those registrations and update them lazily before the actual delivery. This avoid having to track MSI vectors at device level (like qemu-kvm currently does). +typedef enum { +MSI_ROUTE_NONE = 0, +MSI_ROUTE_STATIC, +} MSIRouteType; + +struct MSIRoutingCache { +MSIMessage msg; +MSIRouteType type; +int kvm_gsi; +int kvm_irqfd; +}; + diff --git a/hw/pci.h b/hw/pci.h index 329ab32..5b5d2fd 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -197,6 +197,10 @@ struct PCIDevice { MemoryRegion rom; uint32_t rom_bar; +/* MSI routing chaches */ +MSIRoutingCache *msi_cache; +MSIRoutingCache *msix_cache; + /* MSI entries */ int msi_entries_nr; struct KVMMsiMessage *msi_irq_entries; IMO this needlessly leaks kvm information into core qemu. The cache should be completely hidden in kvm code. I think msi_deliver() can hide the use of the cache completely. For pre-registered events like kvm's irqfd, you can use something like qemu_irq qemu_msi_irq(MSIMessage msg) for non-kvm, it simply returns a qemu_irq that triggers a stl_phys(); for kvm, it allocates an irqfd and a permanent entry in the cache and returns a qemu_irq that triggers the irqfd. See my previously mail: you want to track the life-cycle of an MSI source to avoid generating routes for identical sources. A messages is not a source. Two identical messages can come from different sources. So we need a separate data structure for that purpose. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux