[Qemu-devel] [PATCH v16 9/9] Add XBZRLE statistics
Signed-off-by: Benoit Hudzia benoit.hud...@sap.com Signed-off-by: Petter Svard pett...@cs.umu.se Signed-off-by: Aidan Shribman aidan.shrib...@sap.com Signed-off-by: Orit Wasserman owass...@redhat.com --- arch_init.c | 66 ++ hmp.c| 13 ++ migration.c | 49 migration.h |9 +++ qapi-schema.json | 37 ++ qmp-commands.hx | 35 +++- 6 files changed, 203 insertions(+), 6 deletions(-) diff --git a/arch_init.c b/arch_init.c index c9c3fe0..53ae2b2 100644 --- a/arch_init.c +++ b/arch_init.c @@ -202,6 +202,64 @@ int64_t xbzrle_cache_resize(int64_t new_size) return pow2floor(new_size); } +/* accounting for migration statistics */ +typedef struct AccountingInfo { +uint64_t dup_pages; +uint64_t norm_pages; +uint64_t xbzrle_bytes; +uint64_t xbzrle_pages; +uint64_t xbzrle_cache_miss; +uint64_t iterations; +uint64_t xbzrle_overflows; +} AccountingInfo; + +static AccountingInfo acct_info; + +static void acct_clear(void) +{ +memset(acct_info, 0, sizeof(acct_info)); +} + +uint64_t dup_mig_bytes_transferred(void) +{ +return acct_info.dup_pages * TARGET_PAGE_SIZE; +} + +uint64_t dup_mig_pages_transferred(void) +{ +return acct_info.dup_pages; +} + +uint64_t norm_mig_bytes_transferred(void) +{ +return acct_info.norm_pages * TARGET_PAGE_SIZE; +} + +uint64_t norm_mig_pages_transferred(void) +{ +return acct_info.norm_pages; +} + +uint64_t xbzrle_mig_bytes_transferred(void) +{ +return acct_info.xbzrle_bytes; +} + +uint64_t xbzrle_mig_pages_transferred(void) +{ +return acct_info.xbzrle_pages; +} + +uint64_t xbzrle_mig_pages_cache_miss(void) +{ +return acct_info.xbzrle_cache_miss; +} + +uint64_t xbzrle_mig_pages_overflow(void) +{ +return acct_info.xbzrle_overflows; +} + static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, int cont, int flag) { @@ -236,6 +294,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data, TARGET_PAGE_SIZE)); } +acct_info.xbzrle_cache_miss++; return -1; } @@ -250,6 +309,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, return 0; } else if (encoded_len == -1) { DPRINTF(Overflow\n); +acct_info.xbzrle_overflows++; /* update data in the cache */ memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); return -1; @@ -269,7 +329,9 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, qemu_put_byte(f, hdr.xh_flags); qemu_put_be16(f, hdr.xh_len); qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); +acct_info.xbzrle_pages++; bytes_sent = encoded_len + sizeof(hdr); +acct_info.xbzrle_bytes += bytes_sent; return bytes_sent; } @@ -301,6 +363,7 @@ static int ram_save_block(QEMUFile *f, int stage) p = memory_region_get_ram_ptr(mr) + offset; if (is_dup_page(p)) { +acct_info.dup_pages++; save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, *p); bytes_sent = 1; @@ -323,6 +386,7 @@ static int ram_save_block(QEMUFile *f, int stage) save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); qemu_put_buffer(f, p, TARGET_PAGE_SIZE); bytes_sent = TARGET_PAGE_SIZE; +acct_info.norm_pages++; } /* if page is unmodified, continue to the next */ @@ -449,6 +513,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) return -1; } XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); +acct_clear(); } /* Make sure all dirty bits are set */ @@ -484,6 +549,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) bytes_sent -1 represent no more blocks*/ if (bytes_sent 0) { bytes_transferred += bytes_sent; +acct_info.iterations++; } else if (bytes_sent == -1) { /* no more blocks */ break; } diff --git a/hmp.c b/hmp.c index 5833e1c..5c56f4a 100644 --- a/hmp.c +++ b/hmp.c @@ -168,6 +168,19 @@ void hmp_info_migrate(Monitor *mon) info-disk-total 10); } +if (info-has_cache) { +monitor_printf(mon, cache size: % PRIu64 bytes\n, + info-cache-cache_size); +monitor_printf(mon, xbzrle transferred: % PRIu64 kbytes\n, + info-cache-xbzrle_bytes 10); +monitor_printf(mon, xbzrle pages: % PRIu64 pages\n, + info-cache-xbzrle_pages); +monitor_printf(mon, xbzrle cache miss: % PRIu64 \n,
[Qemu-devel] [PATCH v16 6/9] Add xbzrle_encode_buffer and xbzrle_decode_buffer functions
For performance we are encoding long word at a time. For nzrun we use long-word-at-a-time NULL-detection tricks from strcmp(): using ((lword - 0x0101010101010101) (~lword) 0x8080808080808080) test to find out if any byte in the long word is zero. Signed-off-by: Benoit Hudzia benoit.hud...@sap.com Signed-off-by: Petter Svard pett...@cs.umu.se Signed-off-by: Aidan Shribman aidan.shrib...@sap.com Signed-off-by: Orit Wasserman owass...@redhat.com Signed-off-by: Eric Blake ebl...@redhat.com --- migration.h |4 ++ savevm.c| 159 +++ 2 files changed, 163 insertions(+), 0 deletions(-) diff --git a/migration.h b/migration.h index acc0b94..c46af82 100644 --- a/migration.h +++ b/migration.h @@ -100,4 +100,8 @@ void migrate_add_blocker(Error *reason); */ void migrate_del_blocker(Error *reason); +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); + #endif diff --git a/savevm.c b/savevm.c index a15c163..26d 100644 --- a/savevm.c +++ b/savevm.c @@ -2385,3 +2385,162 @@ void vmstate_register_ram_global(MemoryRegion *mr) { vmstate_register_ram(mr, NULL); } + +/* + page = zrun nzrun + | zrun nzrun page + + zrun = length + + nzrun = length byte... + + length = uleb128 encoded integer + */ +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen) +{ +uint32_t zrun_len = 0, nzrun_len = 0; +int d = 0, i = 0; +long res, xor; +uint8_t *nzrun_start = NULL; + +g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) % + sizeof(long))); + +while (i slen) { +/* overflow */ +if (d + 2 dlen) { +return -1; +} + +/* not aligned to sizeof(long) */ +res = (slen - i) % sizeof(long); +while (res old_buf[i] == new_buf[i]) { +zrun_len++; +i++; +res--; +} + +/* word at a time for speed */ +if (!res) { +while (i slen + (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) { +i += sizeof(long); +zrun_len += sizeof(long); +} + +/* go over the rest */ +while (i slen old_buf[i] == new_buf[i]) { +zrun_len++; +i++; +} +} + +/* buffer unchanged */ +if (zrun_len == slen) { +return 0; +} + +/* skip last zero run */ +if (i == slen) { +return d; +} + +d += uleb128_encode_small(dst + d, zrun_len); + +zrun_len = 0; +nzrun_start = new_buf + i; + +/* overflow */ +if (d + 2 dlen) { +return -1; +} +/* not aligned to sizeof(long) */ +res = (slen - i) % sizeof(long); +while (res old_buf[i] != new_buf[i]) { +i++; +nzrun_len++; +res--; +} + +/* word at a time for speed, use of 32-bit long okay */ +if (!res) { +/* truncation to 32-bit long okay */ +long mask = 0x0101010101010101ULL; +while (i slen) { +xor = *(long *)(old_buf + i) ^ *(long *)(new_buf + i); +if ((xor - mask) ~xor (mask 7)) { +/* found the end of an nzrun within the current long */ +while (old_buf[i] != new_buf[i]) { +nzrun_len++; +i++; +} +break; +} else { +i += sizeof(long); +nzrun_len += sizeof(long); +} +} +} + +d += uleb128_encode_small(dst + d, nzrun_len); +/* overflow */ +if (d + nzrun_len dlen) { +return -1; +} +memcpy(dst + d, nzrun_start, nzrun_len); +d += nzrun_len; +nzrun_len = 0; +} + +return d; +} + +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) +{ +int i = 0, d = 0; +int ret; +uint32_t count = 0; + +while (i slen) { + +/* zrun */ +if ((slen - i) 2) { +return -1; +} + +ret = uleb128_decode_small(src + i, count); +if (ret 0 || (i !count)) { +return -1; +} +i += ret; +d += count; + +/* overflow */ +if (d dlen) { +return -1; +} + +/* nzrun */ +if ((slen - i) 2) { +return -1; +} + +ret = uleb128_decode_small(src + i, count); +if (ret 0 || !count) { +return -1; +} +i += ret; + +/* overflow */ +if (d + count dlen || i + count slen) { +
[Qemu-devel] [PATCH] make: Remove 'build-all' rule
It is not needed, because the 'all' rule does the same. Signed-off-by: Stefan Weil s...@weilnetz.de --- Makefile |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index bad0e31..76dae56 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BUILD_DIR=$(CURDIR) # All following code might depend on configuration variables ifneq ($(wildcard config-host.mak),) # Put the all: rule here so that config-host.mak can contain dependencies. -all: build-all +all: include config-host.mak include $(SRC_PATH)/rules.mak config-host.mak: $(SRC_PATH)/configure @@ -31,7 +31,7 @@ Makefile: ; configure: ; .PHONY: all clean cscope distclean dvi html info install install-doc \ - pdf recurse-all speed tar tarbin test build-all + pdf recurse-all speed tar tarbin test $(call set-vpath, $(SRC_PATH)) @@ -82,7 +82,7 @@ defconfig: -include config-all-devices.mak -build-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all +all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all config-host.h: config-host.h-timestamp config-host.h-timestamp: config-host.mak -- 1.7.10
Re: [Qemu-devel] [PATCH v4 0/7] file descriptor passing using pass-fd
On 06/26/2012 04:10 AM, Daniel P. Berrange wrote: On Fri, Jun 22, 2012 at 02:36:07PM -0400, Corey Bryant wrote: libvirt's sVirt security driver provides SELinux MAC isolation for Qemu guest processes and their corresponding image files. In other words, sVirt uses SELinux to prevent a QEMU process from opening files that do not belong to it. sVirt provides this support by labeling guests and resources with security labels that are stored in file system extended attributes. Some file systems, such as NFS, do not support the extended attribute security namespace, and therefore cannot support sVirt isolation. A solution to this problem is to provide fd passing support, where libvirt opens files and passes file descriptors to QEMU. This, along with SELinux policy to prevent QEMU from opening files, can provide image file isolation for NFS files stored on the same NFS mount. This patch series adds the pass-fd QMP monitor command, which allows an fd to be passed via SCM_RIGHTS, and returns the received file descriptor. Support is also added to the block layer to allow QEMU to dup the fd when the filename is of the /dev/fd/X format. This is useful if MAC policy prevents QEMU from opening specific types of files. I was thinking about some of the sources complexity when using FD passing from libvirt and wanted to raise one idea for discussion before we continue. With this proposed series, we have usage akin to: 1. pass_fd FDSET={M} - returns a string /dev/fd/N showing QEMU's view of the FD 2. drive_add file=/dev/fd/N 3. if failure: close_fd /dev/fd/N My problem is that none of this FD passing is transactional. My original patch series did not suffer from this problem. QEMU owned the file descriptor once it received it from libvirt. I don't think the cited problem (QEMU failing an operation if libvirt was down) is really an actual problem since it would be libvirt that would be issuing the command in the first place (so the command would just fail which libvirt would have to assume anyway if it crashed). I really dislike where this thread has headed with /dev/fdset. This has become extremely complex and cumbersome. Perhaps we should reconsider using an RPC for QEMU to request an fd as this solves all the cited problems in a much simpler fashion. Regards, Anthony Liguori
Re: [Qemu-devel] [PATCH v3] sheepdog: do not blindly memset all read buffers
At Mon, 9 Jul 2012 16:34:13 +0200, Christoph Hellwig wrote: Only buffers that map to unallocated blocks need to be zeroed. Signed-off-by: Christoph Hellwig h...@lst.de --- block/sheepdog.c | 37 ++--- 1 file changed, 18 insertions(+), 19 deletions(-) Acked-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
[Qemu-devel] [PATCH v16 8/9] Add set_cachesize command
Change XBZRLE cache size in bytes (the size should be a power of 2). If XBZRLE cache size is too small there will be many cache miss. Signed-off-by: Benoit Hudzia benoit.hud...@sap.com Signed-off-by: Petter Svard pett...@cs.umu.se Signed-off-by: Aidan Shribman aidan.shrib...@sap.com Signed-off-by: Orit Wasserman owass...@redhat.com --- arch_init.c | 10 ++ hmp-commands.hx | 20 hmp.c| 13 + hmp.h|1 + migration.c | 14 ++ migration.h |2 ++ qapi-schema.json | 16 qmp-commands.hx | 23 +++ 8 files changed, 99 insertions(+), 0 deletions(-) diff --git a/arch_init.c b/arch_init.c index 15f0790..c9c3fe0 100644 --- a/arch_init.c +++ b/arch_init.c @@ -192,6 +192,16 @@ static struct { .cache = NULL, }; + +int64_t xbzrle_cache_resize(int64_t new_size) +{ +if (XBZRLE.cache != NULL) { +return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * +TARGET_PAGE_SIZE; +} +return pow2floor(new_size); +} + static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, int cont, int flag) { diff --git a/hmp-commands.hx b/hmp-commands.hx index 9245bef..7ff1d77 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -829,6 +829,26 @@ STEXI @item migrate_cancel @findex migrate_cancel Cancel the current VM migration. + +ETEXI + +{ +.name = migrate_set_cachesize, +.args_type = value:o, +.params = value, +.help = set cache size (in bytes) for XBZRLE migrations, + the cache size will be rounded down to the nearest + power of 2.\n + The cache size effects the number of cache misses. + In case of a high cache miss ratio you need to increase + the cache size, +.mhandler.cmd = hmp_migrate_set_cachesize, +}, + +STEXI +@item migrate_set_cachesize @var{value} +@findex migrate_set_cache +Set cache size to @var{value} (in bytes) for xbzrle migrations. ETEXI { diff --git a/hmp.c b/hmp.c index b0440e6..5833e1c 100644 --- a/hmp.c +++ b/hmp.c @@ -758,6 +758,19 @@ void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict) qmp_migrate_set_downtime(value, NULL); } +void hmp_migrate_set_cachesize(Monitor *mon, const QDict *qdict) +{ +int64_t value = qdict_get_int(qdict, value); +Error *err = NULL; + +qmp_migrate_set_cachesize(value, err); +if (err) { +monitor_printf(mon, %s\n, error_get_pretty(err)); +error_free(err); +return; +} +} + void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict) { int64_t value = qdict_get_int(qdict, value); diff --git a/hmp.h b/hmp.h index 09ba198..7c5117d 100644 --- a/hmp.h +++ b/hmp.h @@ -53,6 +53,7 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict); +void hmp_migrate_set_cachesize(Monitor *mon, const QDict *qdict); void hmp_set_password(Monitor *mon, const QDict *qdict); void hmp_expire_password(Monitor *mon, const QDict *qdict); void hmp_eject(Monitor *mon, const QDict *qdict); diff --git a/migration.c b/migration.c index 1a264a9..b3cdf8c 100644 --- a/migration.c +++ b/migration.c @@ -533,6 +533,20 @@ void qmp_migrate_cancel(Error **errp) migrate_fd_cancel(migrate_get_current()); } +void qmp_migrate_set_cachesize(int64_t value, Error **errp) +{ +MigrationState *s = migrate_get_current(); + +/* Check for truncation */ +if (value != (size_t)value) { +error_set(errp, QERR_INVALID_PARAMETER_VALUE, cache size, + exceeding address space); +return; +} + +s-xbzrle_cache_size = xbzrle_cache_resize(value); +} + void qmp_migrate_set_speed(int64_t value, Error **errp) { MigrationState *s; diff --git a/migration.h b/migration.h index 9b61e70..a73a34a 100644 --- a/migration.h +++ b/migration.h @@ -108,4 +108,6 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); int migrate_use_xbzrle(void); int64_t migrate_xbzrle_cache_size(void); +int64_t xbzrle_cache_resize(int64_t new_size); + #endif diff --git a/qapi-schema.json b/qapi-schema.json index a8408fd..c13577d 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1390,6 +1390,22 @@ { 'command': 'migrate_set_speed', 'data': {'value': 'int'} } ## +# @migrate_set_cachesize +# +# Set XBZRLE cache size +# +# @value: cache size in bytes +# +# The size will be rounded down to the nearest power of 2. +# The cache size can be modified before and during ongoing migration +# +# Returns: nothing on success +# +# Since: 1.2 +## +{ 'command': 'migrate_set_cachesize', 'data': {'value': 'int'} } + +## #
Re: [Qemu-devel] [PATCH v4 0/7] file descriptor passing using pass-fd
On Mon, 09 Jul 2012 13:40:34 -0500 Anthony Liguori aligu...@us.ibm.com wrote: On 06/26/2012 04:10 AM, Daniel P. Berrange wrote: On Fri, Jun 22, 2012 at 02:36:07PM -0400, Corey Bryant wrote: libvirt's sVirt security driver provides SELinux MAC isolation for Qemu guest processes and their corresponding image files. In other words, sVirt uses SELinux to prevent a QEMU process from opening files that do not belong to it. sVirt provides this support by labeling guests and resources with security labels that are stored in file system extended attributes. Some file systems, such as NFS, do not support the extended attribute security namespace, and therefore cannot support sVirt isolation. A solution to this problem is to provide fd passing support, where libvirt opens files and passes file descriptors to QEMU. This, along with SELinux policy to prevent QEMU from opening files, can provide image file isolation for NFS files stored on the same NFS mount. This patch series adds the pass-fd QMP monitor command, which allows an fd to be passed via SCM_RIGHTS, and returns the received file descriptor. Support is also added to the block layer to allow QEMU to dup the fd when the filename is of the /dev/fd/X format. This is useful if MAC policy prevents QEMU from opening specific types of files. I was thinking about some of the sources complexity when using FD passing from libvirt and wanted to raise one idea for discussion before we continue. With this proposed series, we have usage akin to: 1. pass_fd FDSET={M} - returns a string /dev/fd/N showing QEMU's view of the FD 2. drive_add file=/dev/fd/N 3. if failure: close_fd /dev/fd/N My problem is that none of this FD passing is transactional. My original patch series did not suffer from this problem. QEMU owned the file descriptor once it received it from libvirt. I don't think the cited problem (QEMU failing an operation if libvirt was down) is really an actual problem since it would be libvirt that would be issuing the command in the first place (so the command would just fail which libvirt would have to assume anyway if it crashed). I really dislike where this thread has headed with /dev/fdset. This has become extremely complex and cumbersome. I agree, maybe it's time to start over and discuss the original problem again. Perhaps we should reconsider using an RPC for QEMU to request an fd as this solves all the cited problems in a much simpler fashion. Regards, Anthony Liguori
[Qemu-devel] [PATCH v16 7/9] Add XBZRLE to ram_save_block and ram_save_live
In the outgoing migration check to see if the page is cached and changed than send compressed page by using save_xbrle_page function. In the incoming migration check to see if RAM_SAVE_FLAG_XBZRLE is set and decompress the page (by using load_xbrle function). Signed-off-by: Benoit Hudzia benoit.hud...@sap.com Signed-off-by: Petter Svard pett...@cs.umu.se Signed-off-by: Aidan Shribman aidan.shrib...@sap.com Signed-off-by: Orit Wasserman owass...@redhat.com --- arch_init.c | 187 +-- migration.c | 24 migration.h |4 + 3 files changed, 210 insertions(+), 5 deletions(-) diff --git a/arch_init.c b/arch_init.c index e36899e..15f0790 100644 --- a/arch_init.c +++ b/arch_init.c @@ -43,6 +43,7 @@ #include hw/smbios.h #include exec-memory.h #include hw/pcspk.h +#include qemu/page_cache.h #ifdef DEBUG_ARCH_INIT #define DPRINTF(fmt, ...) \ @@ -102,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH; #define RAM_SAVE_FLAG_PAGE 0x08 #define RAM_SAVE_FLAG_EOS 0x10 #define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 #ifdef __ALTIVEC__ #include altivec.h @@ -169,6 +171,27 @@ static int is_dup_page(uint8_t *page) return 1; } +/* XBZRLE (Xor Based Zero Length Encoding */ +typedef struct XBZRLEHeader { +uint16_t xh_len; +uint8_t xh_flags; +} XBZRLEHeader; + +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { +/* buffer used for XBZRLE encoding */ +uint8_t *encoded_buf; +/* buffer used for XBZRLE decoding */ +uint8_t *decoded_buf; +/* Cache for XBZRLE */ +PageCache *cache; +} XBZRLE = { +.encoded_buf = NULL, +.decoded_buf = NULL, +.cache = NULL, +}; + static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, int cont, int flag) { @@ -181,15 +204,76 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, } +#define ENCODING_FLAG_XBZRLE 0x1 + +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, +ram_addr_t current_addr, RAMBlock *block, +ram_addr_t offset, int cont, int stage) +{ +int encoded_len = 0, bytes_sent = -1, ret = -1; +XBZRLEHeader hdr = { +.xh_len = 0, +.xh_flags = 0, +}; +uint8_t *prev_cached_page; + +/* Stage 1 cache the page and exit. + Stage 2 check to see if page is cached, if not cache the page. + Stage 3 check if the page is cached and if not exit. +*/ +if (stage == 1 || !cache_is_cached(XBZRLE.cache, current_addr)) { +if (stage != 3) { +cache_insert(XBZRLE.cache, current_addr, + g_memdup(current_data, TARGET_PAGE_SIZE)); +} +return -1; +} + +prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + +/* XBZRLE encoding (if there is no overflow) */ +encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); +if (encoded_len == 0) { +DPRINTF(Skipping unmodified page\n); +return 0; +} else if (encoded_len == -1) { +DPRINTF(Overflow\n); +/* update data in the cache */ +memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); +return -1; +} + +/* we need to update the data in the cache, in order to get the same data + we cached we decode the encoded page on the cached data */ +ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len, + prev_cached_page, TARGET_PAGE_SIZE); +g_assert(ret != -1); + +hdr.xh_len = encoded_len; +hdr.xh_flags |= ENCODING_FLAG_XBZRLE; + +/* Send XBZRLE based compressed page */ +save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); +qemu_put_byte(f, hdr.xh_flags); +qemu_put_be16(f, hdr.xh_len); +qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); +bytes_sent = encoded_len + sizeof(hdr); + +return bytes_sent; +} + static RAMBlock *last_block; static ram_addr_t last_offset; -static int ram_save_block(QEMUFile *f) +static int ram_save_block(QEMUFile *f, int stage) { RAMBlock *block = last_block; ram_addr_t offset = last_offset; int bytes_sent = -1; MemoryRegion *mr; +ram_addr_t current_addr; if (!block) block = QLIST_FIRST(ram_list.blocks); @@ -210,13 +294,31 @@ static int ram_save_block(QEMUFile *f) save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, *p); bytes_sent = 1; -} else { +} else if (migrate_use_xbzrle() stage != 3) { +current_addr = block-offset + offset; +/* In stage 1 we only cache the pages before sending them + from
[Qemu-devel] [PATCH v16 2/9] Add XBZRLE documentation
Signed-off-by: Orit Wasserman owass...@redhat.com --- docs/xbzrle.txt | 136 +++ 1 files changed, 136 insertions(+), 0 deletions(-) create mode 100644 docs/xbzrle.txt diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt new file mode 100644 index 000..f70e851 --- /dev/null +++ b/docs/xbzrle.txt @@ -0,0 +1,136 @@ +XBZRLE (Xor Based Zero Run Length Encoding) +=== + +Using XBZRLE (Xor Based Zero Run Length Encoding) allows for the reduction +of VM downtime and the total live-migration time of Virtual machines. +It is particularly useful for virtual machines running memory write intensive +workloads that are typical of large enterprise applications such as SAP ERP +Systems, and generally speaking for any application that uses a sparse memory +update pattern. + +Instead of sending the changed guest memory page this solution will send a +compressed version of the updates, thus reducing the amount of data sent during +live migration. +In order to be able to calculate the update, the previous memory pages need to +be stored on the source. Those pages are stored in a dedicated cache +(hash table) and are +accessed by their address. +The larger the cache size the better the chances are that the page has already +been stored in the cache. +A small cache size will result in high cache miss rate. +Cache size can be changed before and during migration. + +Format +=== + +The compression format performs a XOR between the previous and current content +of the page, where zero represents an unchanged value. +The page data delta is represented by zero and non zero runs. +A zero run is represented by its length (in bytes). +A non zero run is represented by its length (in bytes) and the new data. +The run length is encoded using ULEB128 (http://en.wikipedia.org/wiki/LEB128) + +There can be more than one valid encoding, the sender may send a longer encoding +for the benefit of reducing computation cost. + +page = zrun nzrun + | zrun nzrun page + +zrun = length + +nzrun = length byte... + +length = uleb128 encoded integer + +On the sender side XBZRLE is used as a compact delta encoding of page updates, +retrieving the old page content from the cache (default size of 512 MB). The +receiving side uses the existing page's content and XBZRLE to decode the new +page's content. + +This work was originally based on research results published +VEE 2011: Evaluation of Delta Compression Techniques for Efficient Live +Migration of Large Virtual Machines by Benoit, Svard, Tordsson and Elmroth. +Additionally the delta encoder XBRLE was improved further using the XBZRLE +instead. + +XBZRLE has a sustained bandwidth of 2-2.5 GB/s for typical workloads making it +ideal for in-line, real-time encoding such as is needed for live-migration. + +Example +old buffer: +1001 zeros +05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 68 00 00 6b 00 6d +3074 zeros + +new buffer: +1001 zeros +01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 68 00 00 67 00 69 +3074 zeros + +encoded buffer: + +encoded length 24 +e9 07 0f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 03 01 67 01 01 69 + +Migration Capabilities +== +In order to use XBZRLE the destination QEMU version should be able to +decode the new format. +Adding a new migration capabilities command that will allow external management +to query for it support. +A typical use for the destination +{qemu} info migrate_capabilities +{qemu} xbzrle, ... + +In order to enable capabilities for future live migration, +a new command migrate_set_parameter is introduced: +{qemu} migrate_set_parameter xbzrle + +Usage +== + +1. Activate xbzrle +2. Set the XBZRLE cache size - the cache size is in MBytes and should be a +power of 2. The cache default value is 64MBytes. +3. start outgoing migration + +A typical usage scenario: +On the incoming QEMU: +{qemu} migrate_set_parameter xbzrle on +On the outgoing QEMU: +{qemu} migrate_set_parameter xbzrle on +{qemu} migrate_set_cachesize 256m +{qemu} migrate -d tcp:destination.host: +{qemu} info migrate +... +cache size: 67108864 bytes +transferred ram-duplicate: A kbytes +transferred ram-normal: B kbytes +transferred ram-xbrle: C kbytes +overflow ram-xbrle: D pages +cache-miss ram-xbrle: E pages + +cache-miss: the number of cache misses to date - high cache-miss rate +indicates that the cache size is set too low. +overflow: the number of overflows in the decoding which where the delta could +not be compressed. This can happen if the changes in the pages are too large +or there are many short changes; for example, changing every second byte (half a +page). + +Testing: Testing indicated that live migration with XBZRLE was completed in 110 +seconds, whereas without it would not be able to complete. + +A simple synthetic memory r/w load generator: +..include stdlib.h +..include stdio.h +..int
[Qemu-devel] First shot at adding IPMI to qemu
I had asked about getting an IPMI device into qemu and received some interest, and it's useful to me, so I've done some work to add it. The following patch set has a set of patches to add an IPMI KCS device, and IPMI BT device, a built-in BMC (IPMI management controller), and a way to attach an external BMC through a chardev. There was some discussion on whether to make the BMC internal or external, but I went ahead and added both. The internal one is fairly basic and not extensible, at least without adding code. I've modified the OpenIPMI library simulator to work with the external interface to allow it to receive connections from the qemu external simulator with a fairly basic protocol. I've also added the ability for the OpenIPMI library to manage a VM to power it on, power it off, reset it, and handle an IPMI watchdog timer. So it looks quite like a real system. Instructions for using it are in the OpenIPMI release candidate I uploaded to https://sourceforge.net/projects/openipmi Since IPMI can advertise its presence via SMBIOS, I added a way for a driver to add an SMBIOS entry. I also added a way to query a free interrupt from the ISA bus, since the interrupt is in the SMBIOS entry and nobody really cares which one is used.
[Qemu-devel] [PATCH 1/9] smbios: Add a function to directly add an entry
From: Corey Minyard cminy...@mvista.com There was no way to directly add a table entry to the SMBIOS table, even though the BIOS supports this. So add a function to do this. This is in preparation for the IPMI handler adding it's SMBIOS table entry. Signed-off-by: Corey Minyard cminy...@mvista.com --- hw/smbios.c | 27 +++ hw/smbios.h | 15 --- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/hw/smbios.c b/hw/smbios.c index c57237d..98c7f99 100644 --- a/hw/smbios.c +++ b/hw/smbios.c @@ -178,6 +178,33 @@ static void smbios_build_type_1_fields(const char *t) strlen(buf) + 1, buf); } +int smbios_table_entry_add(struct smbios_structure_header *entry) +{ +struct smbios_table *table; +struct smbios_structure_header *header; +unsigned int size = entry-length; + +if (!smbios_entries) { +smbios_entries_len = sizeof(uint16_t); +smbios_entries = g_malloc0(smbios_entries_len); +} +smbios_entries = g_realloc(smbios_entries, smbios_entries_len + + sizeof(*table) + size); +table = (struct smbios_table *)(smbios_entries + smbios_entries_len); +table-header.type = SMBIOS_TABLE_ENTRY; +table-header.length = cpu_to_le16(sizeof(*table) + size); + +header = (struct smbios_structure_header *)(table-data); +memcpy(header, entry, size); + +smbios_check_collision(header-type, SMBIOS_TABLE_ENTRY); + +smbios_entries_len += sizeof(*table) + size; +(*(uint16_t *)smbios_entries) = + cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); +return 0; +} + int smbios_entry_add(const char *t) { char buf[1024]; diff --git a/hw/smbios.h b/hw/smbios.h index 94e3641..6431a15 100644 --- a/hw/smbios.h +++ b/hw/smbios.h @@ -13,21 +13,22 @@ * */ +/* This goes at the beginning of every SMBIOS structure. */ +struct smbios_structure_header { +uint8_t type; +uint8_t length; +uint16_t handle; +} QEMU_PACKED; + int smbios_entry_add(const char *t); void smbios_add_field(int type, int offset, int len, void *data); uint8_t *smbios_get_table(size_t *length); +int smbios_table_entry_add(struct smbios_structure_header *entry); /* * SMBIOS spec defined tables */ -/* This goes at the beginning of every SMBIOS structure. */ -struct smbios_structure_header { -uint8_t type; -uint8_t length; -uint16_t handle; -} QEMU_PACKED; - /* SMBIOS type 0 - BIOS Information */ struct smbios_type_0 { struct smbios_structure_header header; -- 1.7.4.1
[Qemu-devel] [PATCH 7/9] IPMI: Add a BT low-level interface
From: Corey Minyard cminy...@mvista.com This provides the simulation of the BT hardware interface for IPMI. Signed-off-by: Corey Minyard cminy...@mvista.com --- default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + hw/Makefile.objs |1 + hw/ipmi_bt.c | 265 hw/ipmi_bt.h | 99 + 5 files changed, 367 insertions(+), 0 deletions(-) create mode 100644 hw/ipmi_bt.c create mode 100644 hw/ipmi_bt.h diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index b549389..f8f8e6d 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -10,6 +10,7 @@ CONFIG_VMMOUSE=y CONFIG_IPMI=y CONFIG_ISA_IPMI=y CONFIG_IPMI_KCS=y +CONFIG_IPMI_BT=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index af7d2a9..8c1177d 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -10,6 +10,7 @@ CONFIG_VMMOUSE=y CONFIG_IPMI=y CONFIG_ISA_IPMI=y CONFIG_IPMI_KCS=y +CONFIG_IPMI_BT=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/hw/Makefile.objs b/hw/Makefile.objs index 99e5d1e..e1d30cc 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -23,6 +23,7 @@ hw-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o hw-obj-$(CONFIG_IPMI) += ipmi.o hw-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o hw-obj-$(CONFIG_IPMI_KCS) += ipmi_kcs.o +hw-obj-$(CONFIG_IPMI_BT) += ipmi_bt.o hw-obj-$(CONFIG_SERIAL) += serial.o hw-obj-$(CONFIG_PARALLEL) += parallel.o diff --git a/hw/ipmi_bt.c b/hw/ipmi_bt.c new file mode 100644 index 000..39f099e --- /dev/null +++ b/hw/ipmi_bt.c @@ -0,0 +1,265 @@ +/* + * QEMU IPMI BT emulation + * + * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include hw.h + +#include ipmi.h +#include ipmi_bt.h + +#define IPMI_CMD_GET_BT_INTF_CAP 0x36 + +static void ipmi_bt_handle_event(IPMIState *s) +{ +IPMIBtState *bt = s-typeinfo; + +ipmi_lock(); +if (s-inlen 4) + goto out; +/* Note that overruns are handled by handle_command */ +if (s-inmsg[0] != (s-inlen - 1)) { + /* Length mismatch, just ignore. */ + IPMI_BT_SET_BBUSY(bt-control_reg, 1); + s-inlen = 0; + goto out; +} +if ((s-inmsg[1] == (IPMI_NETFN_APP 2)) + (s-inmsg[3] == IPMI_CMD_GET_BT_INTF_CAP)) { + /* We handle this one ourselves. */ + s-outmsg[0] = 9; + s-outmsg[1] = s-inmsg[1] | 0x04; + s-outmsg[2] = s-inmsg[2]; + s-outmsg[3] = s-inmsg[3]; + s-outmsg[4] = 0; + s-outmsg[5] = 1; /* Only support 1 outstanding request. */ + if (sizeof(s-inmsg) 0xff) /* Input buffer size */ + s-outmsg[6] = 0xff; + else + s-outmsg[6] = (unsigned char ) sizeof(s-inmsg); + if (sizeof(s-outmsg) 0xff) /* Output buffer size */ + s-outmsg[7] = 0xff; + else + s-outmsg[7] = (unsigned char) sizeof(s-outmsg); + s-outmsg[8] = 10; /* Max request to response time */ + s-outmsg[9] = 0; /* Don't recommend retries */ + s-outlen = 10; + IPMI_BT_SET_BBUSY(bt-control_reg, 0); + IPMI_BT_SET_B2H_ATN(bt-control_reg, 1); + if (s-use_irq s-irqs_enabled + !IPMI_BT_GET_B2H_IRQ(bt-mask_reg) + IPMI_BT_GET_B2H_IRQ_EN(bt-mask_reg)) { + IPMI_BT_SET_B2H_IRQ(bt-mask_reg, 1); + qemu_irq_raise(s-irq); + } + goto out; +} +bt-waiting_seq = s-inmsg[2]; +s-inmsg[2] = s-inmsg[1]; +s-handle_command(s, s-inmsg + 2, s-inlen - 2, sizeof(s-inmsg), + bt-waiting_rsp); + out: +ipmi_unlock(); +} + +static void ipmi_bt_handle_rsp(IPMIState *s, uint8_t msg_id, +
[Qemu-devel] [PATCH 4/9] Add a base IPMI interface
From: Corey Minyard cminy...@mvista.com Add the basic IPMI types and infrastructure to QEMU. Low-level interfaces and simulation interfaces will register with this; it's kind of the go-between to tie them together. Signed-off-by: Corey Minyard cminy...@mvista.com --- default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + hw/Makefile.objs |2 + hw/ipmi.c | 147 +++ hw/ipmi.h | 192 qemu-doc.texi |2 + qemu-options.hx| 35 +++ sysemu.h |8 ++ vl.c | 46 + 9 files changed, 434 insertions(+), 0 deletions(-) create mode 100644 hw/ipmi.c create mode 100644 hw/ipmi.h diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 2c78175..eb17afc 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -7,6 +7,7 @@ CONFIG_VGA_ISA=y CONFIG_VGA_CIRRUS=y CONFIG_VMWARE_VGA=y CONFIG_VMMOUSE=y +CONFIG_IPMI=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 233a856..e4e3e4f 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -7,6 +7,7 @@ CONFIG_VGA_ISA=y CONFIG_VGA_CIRRUS=y CONFIG_VMWARE_VGA=y CONFIG_VMMOUSE=y +CONFIG_IPMI=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/hw/Makefile.objs b/hw/Makefile.objs index 30c0b78..0d55997 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -20,6 +20,8 @@ hw-obj-$(CONFIG_M48T59) += m48t59.o hw-obj-$(CONFIG_ESCC) += escc.o hw-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o +hw-obj-$(CONFIG_IPMI) += ipmi.o + hw-obj-$(CONFIG_SERIAL) += serial.o hw-obj-$(CONFIG_PARALLEL) += parallel.o hw-obj-$(CONFIG_I8254) += i8254_common.o i8254.o diff --git a/hw/ipmi.c b/hw/ipmi.c new file mode 100644 index 000..86a097b --- /dev/null +++ b/hw/ipmi.c @@ -0,0 +1,147 @@ +/* + * QEMU IPMI emulation + * + * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include hw.h +#include ipmi.h +#include sysemu.h +#include qmp-commands.h + +static void (*ipmi_init_handlers[4])(IPMIState *s); + +void register_ipmi_type(unsigned int type, void (*init)(IPMIState *s)) +{ +ipmi_init_handlers[type] = init; +} + +static void (*ipmi_sim_init_handlers[2])(IPMIState *s); + +void register_ipmi_sim(unsigned int iftype, void (*init)(IPMIState *s)) +{ +ipmi_sim_init_handlers[iftype] = init; +} + + +#ifdef DO_IPMI_THREAD +static void *ipmi_thread(void *opaque) +{ +IPMIState *s = opaque; +int64_t wait_until; + +ipmi_lock(); +for (;;) { + qemu_cond_wait(s-waker, s-lock); + wait_until = 0; + while (s-do_wake) { + s-do_wake = 0; + s-handle_if_event(s); + } +} +ipmi_unlock(); +return NULL; +} +#endif + +static int ipmi_do_hw_op(IPMIState *s, enum ipmi_op op, int checkonly) +{ +switch(op) { +case IPMI_RESET_CHASSIS: + if (checkonly) + return 0; + qemu_system_reset_request(); + return 0; + +case IPMI_POWEROFF_CHASSIS: + if (checkonly) + return 0; + qemu_system_powerdown_request(); + return 0; + +case IPMI_SEND_NMI: + if (checkonly) + return 0; + qemu_mutex_lock_iothread(); + qmp_inject_nmi(NULL); + qemu_mutex_unlock_iothread(); + return 0; + +case IPMI_POWERCYCLE_CHASSIS: +case IPMI_PULSE_DIAG_IRQ: +case IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP: +case IPMI_POWERON_CHASSIS: +default: + return IPMI_CC_COMMAND_NOT_SUPPORTED; +} +} + +static void ipmi_set_irq_enable(IPMIState *s, int val) +{ +s-irqs_enabled = val; +} + +static
[Qemu-devel] [PATCH 3/9] isa: Add a way to query for a free interrupt
From: Corey Minyard cminy...@mvista.com This lets devices that don't care about their interrupt number, like IPMI, just grab any unused interrupt. Signed-off-by: Corey Minyard cminy...@mvista.com --- hw/isa-bus.c | 13 + hw/isa.h |2 ++ 2 files changed, 15 insertions(+), 0 deletions(-) diff --git a/hw/isa-bus.c b/hw/isa-bus.c index 5a43f03..f561f21 100644 --- a/hw/isa-bus.c +++ b/hw/isa-bus.c @@ -71,6 +71,7 @@ qemu_irq isa_get_irq(ISADevice *dev, int isairq) if (isairq 0 || isairq 15) { hw_error(isa irq %d invalid, isairq); } +isabus-irq_inuse[isairq] = 1; return isabus-irqs[isairq]; } @@ -82,6 +83,18 @@ void isa_init_irq(ISADevice *dev, qemu_irq *p, int isairq) dev-nirqs++; } +int isa_find_free_irq(ISABus *bus) +{ +unsigned int i; + +/* 0 and 1 are called for, 2 is the chain interrupt */ +for (i = 3; i ISA_NUM_IRQS; i++) { + if (!bus-irq_inuse[i]) + return i; +} +return 0; +} + static inline void isa_init_ioport(ISADevice *dev, uint16_t ioport) { if (dev (dev-ioport_id == 0 || ioport dev-ioport_id)) { diff --git a/hw/isa.h b/hw/isa.h index f7bc4b5..9447296 100644 --- a/hw/isa.h +++ b/hw/isa.h @@ -28,6 +28,7 @@ struct ISABus { BusState qbus; MemoryRegion *address_space_io; qemu_irq *irqs; +int irq_inuse[ISA_NUM_IRQS]; }; struct ISADevice { @@ -41,6 +42,7 @@ ISABus *isa_bus_new(DeviceState *dev, MemoryRegion *address_space_io); void isa_bus_irqs(ISABus *bus, qemu_irq *irqs); qemu_irq isa_get_irq(ISADevice *dev, int isairq); void isa_init_irq(ISADevice *dev, qemu_irq *p, int isairq); +int isa_find_free_irq(ISABus *bus); MemoryRegion *isa_address_space(ISADevice *dev); ISADevice *isa_create(ISABus *bus, const char *name); ISADevice *isa_try_create(ISABus *bus, const char *name); -- 1.7.4.1
[Qemu-devel] [PATCH 5/9] IPMI: Add a PC ISA type structure
From: Corey Minyard cminy...@mvista.com This provides the base infrastructure to tie IPMI low-level interfaces into a PC ISA bus. Signed-off-by: Corey Minyard cminy...@mvista.com --- default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + hw/Makefile.objs |1 + hw/isa_ipmi.c | 138 hw/pc.c| 12 +++ hw/pc.h| 18 + hw/smbios.h| 12 +++ 7 files changed, 183 insertions(+), 0 deletions(-) create mode 100644 hw/isa_ipmi.c diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index eb17afc..c0aff0d 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -8,6 +8,7 @@ CONFIG_VGA_CIRRUS=y CONFIG_VMWARE_VGA=y CONFIG_VMMOUSE=y CONFIG_IPMI=y +CONFIG_ISA_IPMI=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index e4e3e4f..615e4f2 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -8,6 +8,7 @@ CONFIG_VGA_CIRRUS=y CONFIG_VMWARE_VGA=y CONFIG_VMMOUSE=y CONFIG_IPMI=y +CONFIG_ISA_IPMI=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/hw/Makefile.objs b/hw/Makefile.objs index 0d55997..8f27ffe 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -21,6 +21,7 @@ hw-obj-$(CONFIG_ESCC) += escc.o hw-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o hw-obj-$(CONFIG_IPMI) += ipmi.o +hw-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o hw-obj-$(CONFIG_SERIAL) += serial.o hw-obj-$(CONFIG_PARALLEL) += parallel.o diff --git a/hw/isa_ipmi.c b/hw/isa_ipmi.c new file mode 100644 index 000..cad78b0 --- /dev/null +++ b/hw/isa_ipmi.c @@ -0,0 +1,138 @@ +/* + * QEMU ISA IPMI KCS emulation + * + * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include hw.h +#include isa.h +#include pc.h +#include qemu-timer.h +#include sysemu.h +#include smbios.h +#include ipmi.h + + +typedef struct ISAIPMIState { +ISADevice dev; +uint32_t type; +uint32_t iobase; +uint32_t isairq; +uint8_t slave_addr; +IPMIState state; +} ISAIPMIState; + +static int ipmi_isa_initfn(ISADevice *dev) +{ +ISAIPMIState *isa = DO_UPCAST(ISAIPMIState, dev, dev); +struct smbios_type_38 smb38; + +if (isa-iobase == -1) { + /* If no I/O base is specified, set the defaults */ + switch (isa-type) { + case IPMI_KCS: + isa-iobase = 0xca2; + break; + case IPMI_BT: + isa-iobase = 0xe4; + break; + case IPMI_SMIC: + isa-iobase = 0xca9; + break; + default: + fprintf(stderr, Unknown IPMI type: %d\n, isa-type); + abort(); + } +} + +isa-state.slave_addr = isa-slave_addr; + +qdev_set_legacy_instance_id(dev-qdev, isa-iobase, 3); + +ipmi_init(isa-type, isa-state); + +if (isa-isairq 0) { + isa_init_irq(dev, isa-state.irq, isa-isairq); + isa-state.use_irq = 1; +} + +isa_register_ioport(dev, isa-state.io, isa-iobase); + +smb38.header.type = 38; +smb38.header.length = sizeof(smb38); +smb38.header.handle = 0x3000; +smb38.interface_type = isa-state.smbios_type; +smb38.ipmi_version = 0x20; +smb38.i2c_slave_addr = isa-state.slave_addr; +smb38.nv_storage_dev_addr = 0; + +/* or 1 to set it to I/O space */ +smb38.base_addr = isa-iobase | 1; + + /* 1-byte boundaries, addr bit0=0, level triggered irq */ +smb38.base_addr_mod_and_irq_info = 1; +smb38.interrupt_number = isa-isairq; +smbios_table_entry_add((struct smbios_structure_header *) smb38); + +return 0; +} + +static Property ipmi_isa_properties[] = { +DEFINE_PROP_HEX32(type, ISAIPMIState, type, IPMI_KCS), +
[Qemu-devel] [PATCH 6/9] IPMI: Add a KCS low-level interface
From: Corey Minyard cminy...@mvista.com This provides the simulation of the KCS hardware interface. Signed-off-by: Corey Minyard cminy...@mvista.com --- default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + hw/Makefile.objs |1 + hw/ipmi_kcs.c | 259 hw/ipmi_kcs.h | 82 +++ 5 files changed, 344 insertions(+), 0 deletions(-) create mode 100644 hw/ipmi_kcs.c create mode 100644 hw/ipmi_kcs.h diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index c0aff0d..b549389 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -9,6 +9,7 @@ CONFIG_VMWARE_VGA=y CONFIG_VMMOUSE=y CONFIG_IPMI=y CONFIG_ISA_IPMI=y +CONFIG_IPMI_KCS=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 615e4f2..af7d2a9 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -9,6 +9,7 @@ CONFIG_VMWARE_VGA=y CONFIG_VMMOUSE=y CONFIG_IPMI=y CONFIG_ISA_IPMI=y +CONFIG_IPMI_KCS=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/hw/Makefile.objs b/hw/Makefile.objs index 8f27ffe..99e5d1e 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -22,6 +22,7 @@ hw-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o hw-obj-$(CONFIG_IPMI) += ipmi.o hw-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o +hw-obj-$(CONFIG_IPMI_KCS) += ipmi_kcs.o hw-obj-$(CONFIG_SERIAL) += serial.o hw-obj-$(CONFIG_PARALLEL) += parallel.o diff --git a/hw/ipmi_kcs.c b/hw/ipmi_kcs.c new file mode 100644 index 000..61188c9 --- /dev/null +++ b/hw/ipmi_kcs.c @@ -0,0 +1,259 @@ +/* + * QEMU IPMI KCS emulation + * + * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include hw.h + +#include ipmi.h +#include ipmi_kcs.h + +#define SET_OBF() \ +do { \ + IPMI_KCS_SET_OBF(kcs-status_reg, 1); \ + if (s-use_irq s-irqs_enabled !s-obf_irq_set) { \ + s-obf_irq_set = 1; \ + if (!s-atn_irq_set)\ + qemu_irq_raise(s-irq); \ + } \ +} while (0) + +static void ipmi_kcs_handle_event(IPMIState *s) +{ +IPMIKcsState *kcs = s-typeinfo; +if (kcs-cmd_reg == IPMI_KCS_ABORT_STATUS_CMD) { + if (IPMI_KCS_GET_STATE(kcs-status_reg) != IPMI_KCS_ERROR_STATE) { + kcs-waiting_rsp++; /* Invalidate the message */ + s-outmsg[0] = IPMI_KCS_STATUS_ABORTED_ERR; + s-outlen = 1; + s-outpos = 0; + IPMI_KCS_SET_STATE(kcs-status_reg, IPMI_KCS_ERROR_STATE); + SET_OBF(); + } + goto out; +} + +switch (IPMI_KCS_GET_STATE(kcs-status_reg)) { +case IPMI_KCS_IDLE_STATE: + if (kcs-cmd_reg == IPMI_KCS_WRITE_START_CMD) { + IPMI_KCS_SET_STATE(kcs-status_reg, IPMI_KCS_WRITE_STATE); + kcs-cmd_reg = -1; + s-write_end = 0; + s-inlen = 0; + SET_OBF(); + } + break; + +case IPMI_KCS_READ_STATE: +handle_read: + if (s-outpos = s-outlen) { + IPMI_KCS_SET_STATE(kcs-status_reg, IPMI_KCS_IDLE_STATE); + SET_OBF(); + } else if (kcs-data_in_reg == IPMI_KCS_READ_CMD) { + kcs-data_out_reg = s-outmsg[s-outpos]; + s-outpos++; + SET_OBF(); + } else { + s-outmsg[0] = IPMI_KCS_STATUS_BAD_CC_ERR; + s-outlen = 1; + s-outpos = 0; + IPMI_KCS_SET_STATE(kcs-status_reg, IPMI_KCS_ERROR_STATE); + SET_OBF(); + goto out;
[Qemu-devel] [PATCH 2/9] pc: move SMBIOS setup to after device init
From: Corey Minyard cminy...@mvista.com Setting up the firmware interface for the SMBIOS table needs to be done later in the process, after device initialization, so that devices can add entries to the table. Signed-off-by: Corey Minyard cminy...@mvista.com --- hw/pc.c | 22 +- hw/pc.h |9 + hw/pc_piix.c | 12 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index fb04c8b..c0acb6a 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -971,20 +971,12 @@ void pc_cpus_init(const char *cpu_model) } void pc_memory_init(MemoryRegion *system_memory, -const char *kernel_filename, -const char *kernel_cmdline, -const char *initrd_filename, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, -MemoryRegion *rom_memory, MemoryRegion **ram_memory) { -int linux_boot, i; -MemoryRegion *ram, *option_rom_mr; +MemoryRegion *ram; MemoryRegion *ram_below_4g, *ram_above_4g; -void *fw_cfg; - -linux_boot = (kernel_filename != NULL); /* Allocate RAM. We allocate it as a single memory region and use * aliases to address portions of it, mostly for backwards compatibility @@ -1006,7 +998,17 @@ void pc_memory_init(MemoryRegion *system_memory, memory_region_add_subregion(system_memory, 0x1ULL, ram_above_4g); } +} +void pc_bios_init(const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename, + ram_addr_t below_4g_mem_size, + MemoryRegion *rom_memory) +{ +MemoryRegion *option_rom_mr; +void *fw_cfg; +int linux_boot, i; /* Initialize PC system firmware */ pc_system_firmware_init(rom_memory); @@ -1019,6 +1021,8 @@ void pc_memory_init(MemoryRegion *system_memory, option_rom_mr, 1); +linux_boot = (kernel_filename != NULL); + fw_cfg = bochs_bios_init(); rom_set_fw(fw_cfg); diff --git a/hw/pc.h b/hw/pc.h index 8ccf202..33ab689 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -107,13 +107,14 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); void pc_cpus_init(const char *cpu_model); void pc_memory_init(MemoryRegion *system_memory, -const char *kernel_filename, -const char *kernel_cmdline, -const char *initrd_filename, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, -MemoryRegion *rom_memory, MemoryRegion **ram_memory); +void pc_bios_init(const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename, + ram_addr_t below_4g_mem_size, + MemoryRegion *rom_memory); qemu_irq *pc_allocate_cpu_irq(void); DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, diff --git a/hw/pc_piix.c b/hw/pc_piix.c index fb86f27..21b4f59 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -176,10 +176,8 @@ static void pc_init1(MemoryRegion *system_memory, /* allocate ram and load rom/bios */ if (!xen_enabled()) { -pc_memory_init(system_memory, - kernel_filename, kernel_cmdline, initrd_filename, - below_4g_mem_size, above_4g_mem_size, - pci_enabled ? rom_memory : system_memory, ram_memory); +pc_memory_init(system_memory, below_4g_mem_size, above_4g_mem_size, + ram_memory); } gsi_state = g_malloc0(sizeof(*gsi_state)); @@ -287,6 +285,12 @@ static void pc_init1(MemoryRegion *system_memory, if (pci_enabled) { pc_pci_device_init(pci_bus); } + +if (!xen_enabled()) { + pc_bios_init(kernel_filename, kernel_cmdline, initrd_filename, +below_4g_mem_size, +pci_enabled ? rom_memory : system_memory); +} } static void pc_init_pci(ram_addr_t ram_size, -- 1.7.4.1
[Qemu-devel] [PATCH 8/9] IPMI: Add a local BMC simulation
From: Corey Minyard cminy...@mvista.com This provides a minimal local BMC, basically enough to comply with the spec and provide a complete watchdog timer (including a sensor, SDR, and event). Signed-off-by: Corey Minyard cminy...@mvista.com --- default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + hw/Makefile.objs |1 + hw/ipmi_sim.c | 1273 hw/ipmi_sim.h | 270 5 files changed, 1546 insertions(+), 0 deletions(-) create mode 100644 hw/ipmi_sim.c create mode 100644 hw/ipmi_sim.h diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index f8f8e6d..8c99d5d 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -11,6 +11,7 @@ CONFIG_IPMI=y CONFIG_ISA_IPMI=y CONFIG_IPMI_KCS=y CONFIG_IPMI_BT=y +CONFIG_IPMI_LOCAL=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 8c1177d..4d01883 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -11,6 +11,7 @@ CONFIG_IPMI=y CONFIG_ISA_IPMI=y CONFIG_IPMI_KCS=y CONFIG_IPMI_BT=y +CONFIG_IPMI_LOCAL=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/hw/Makefile.objs b/hw/Makefile.objs index e1d30cc..193227d 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -24,6 +24,7 @@ hw-obj-$(CONFIG_IPMI) += ipmi.o hw-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o hw-obj-$(CONFIG_IPMI_KCS) += ipmi_kcs.o hw-obj-$(CONFIG_IPMI_BT) += ipmi_bt.o +hw-obj-$(CONFIG_IPMI_LOCAL) += ipmi_sim.o hw-obj-$(CONFIG_SERIAL) += serial.o hw-obj-$(CONFIG_PARALLEL) += parallel.o diff --git a/hw/ipmi_sim.c b/hw/ipmi_sim.c new file mode 100644 index 000..6813a86 --- /dev/null +++ b/hw/ipmi_sim.c @@ -0,0 +1,1273 @@ +/* + * IPMI BMC emulation + * + * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include stdio.h +#include string.h +#include ipmi_sim.h + +static void ipmi_sim_handle_timeout(IPMIState *s); + +static void ipmi_gettime(struct ipmi_time *time) +{ +int64_t stime; + +stime = get_clock_realtime(); +time-tv_sec = stime / 10LL; +time-tv_nsec = stime % 10LL; +} + +static int64_t ipmi_getmonotime(void) +{ +return qemu_get_clock_ns(vm_clock); +} + +static void ipmi_timeout(void *opaque) +{ +IPMIState *s = opaque; + +ipmi_sim_handle_timeout(s); +} + +static void set_timestamp(IPMISimState *ss, uint8_t *ts) +{ +unsigned int val; +struct ipmi_time now; + +ipmi_gettime(now); +val = now.tv_sec + ss-sel.time_offset; +ts[0] = val 0xff; +ts[1] = (val 8) 0xff; +ts[2] = (val 16) 0xff; +ts[3] = (val 24) 0xff; +} + +static void sdr_inc_reservation(IPMISdr *sdr) +{ +sdr-reservation++; +if (sdr-reservation == 0) + sdr-reservation = 1; +} + +static int sdr_add_entry(IPMISimState *ss, const uint8_t *entry, +unsigned int len, uint16_t *recid) +{ +if ((len 5) || (len 255)) + return 1; + +if (entry[ss-sdr.next_free + 4] != len) + return 1; + +if (ss-sdr.next_free + len MAX_SDR_SIZE) { + ss-sdr.overflow = 1; + return 1; +} + +memcpy(ss-sdr.sdr + ss-sdr.next_free, entry, len); +ss-sdr.sdr[ss-sdr.next_free] = ss-sdr.next_rec_id 0xff; +ss-sdr.sdr[ss-sdr.next_free+1] = (ss-sdr.next_rec_id 8) 0xff; +ss-sdr.sdr[ss-sdr.next_free+2] = 0x51; /* Conform to IPMI 1.5 spec */ + +if (recid) + *recid = ss-sdr.next_rec_id; +ss-sdr.next_rec_id++; +set_timestamp(ss, ss-sdr.last_addition); +ss-sdr.next_free += len; +sdr_inc_reservation(ss-sdr); +return 0; +} + +static int sdr_find_entry(IPMISdr *sdr, uint16_t recid, + unsigned int *retpos, uint16_t *nextrec) +{
[Qemu-devel] [PATCH 9/9] IPMI: Add an external connection simulation interface
From: Corey Minyard cminy...@mvista.com This adds an interface for IPMI that connects to a remote BMC over a chardev (generally a TCP socket). The OpenIPMI lanserv simulator describes this interface, see that for interface details. Signed-off-by: Corey Minyard cminy...@mvista.com --- default-configs/i386-softmmu.mak |1 + default-configs/x86_64-softmmu.mak |1 + hw/Makefile.objs |1 + hw/ipmi_extern.c | 421 hw/ipmi_extern.h | 75 +++ 5 files changed, 499 insertions(+), 0 deletions(-) create mode 100644 hw/ipmi_extern.c create mode 100644 hw/ipmi_extern.h diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 8c99d5d..325f92e 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -12,6 +12,7 @@ CONFIG_ISA_IPMI=y CONFIG_IPMI_KCS=y CONFIG_IPMI_BT=y CONFIG_IPMI_LOCAL=y +CONFIG_IPMI_EXTERN=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 4d01883..2ac9177 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -12,6 +12,7 @@ CONFIG_ISA_IPMI=y CONFIG_IPMI_KCS=y CONFIG_IPMI_BT=y CONFIG_IPMI_LOCAL=y +CONFIG_IPMI_EXTERN=y CONFIG_SERIAL=y CONFIG_PARALLEL=y CONFIG_I8254=y diff --git a/hw/Makefile.objs b/hw/Makefile.objs index 193227d..06757b0 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -25,6 +25,7 @@ hw-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o hw-obj-$(CONFIG_IPMI_KCS) += ipmi_kcs.o hw-obj-$(CONFIG_IPMI_BT) += ipmi_bt.o hw-obj-$(CONFIG_IPMI_LOCAL) += ipmi_sim.o +hw-obj-$(CONFIG_IPMI_EXTERN) += ipmi_extern.o hw-obj-$(CONFIG_SERIAL) += serial.o hw-obj-$(CONFIG_PARALLEL) += parallel.o diff --git a/hw/ipmi_extern.c b/hw/ipmi_extern.c new file mode 100644 index 000..bbb8469 --- /dev/null +++ b/hw/ipmi_extern.c @@ -0,0 +1,421 @@ +/* + * IPMI BMC external connection + * + * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * This is designed to connect with OpenIPMI's lanserv serial interface + * using the VM connection type. See that for details. + */ + +#include ipmi_extern.h + +static int can_receive(void *opaque); +static void receive(void *opaque, const uint8_t *buf, int size); +static void chr_event(void *opaque, int event); + +static unsigned char +ipmb_checksum(const unsigned char *data, int size, unsigned char start) +{ + unsigned char csum = start; + + for (; size 0; size--, data++) + csum += *data; + + return csum; +} + +static void continue_send(IPMIState *s, IPMIExternState *es) +{ +if (es-outlen == 0) + goto check_reset; + + send: +es-outpos += qemu_chr_fe_write(es-chr, es-outbuf + es-outpos, + es-outlen - es-outpos); +if (es-outpos es-outlen) { + /* Not fully transmitted, try again in a 10ms */ + qemu_mod_timer(es-extern_timer, + qemu_get_clock_ns(vm_clock) + 1000); +} else { + /* Sent */ + es-outlen = 0; + es-outpos = 0; + if (!es-sending_cmd) + es-waiting_rsp = 1; + else + es-sending_cmd = 0; + +check_reset: + if (es-send_reset) { + /* Send the reset */ + es-outbuf[0] = VM_CMD_RESET; + es-outbuf[1] = VM_CMD_CHAR; + es-outlen = 2; + es-outpos = 0; + es-send_reset = 0; + es-sending_cmd = 1; + goto send; + } + + if (es-waiting_rsp) + /* Make sure we get a response within 4 seconds. */ + qemu_mod_timer(es-extern_timer, + qemu_get_clock_ns(vm_clock) + 40ULL); +} +return; +} + +static void extern_timeout(void *opaque) +{ +IPMIState *s = opaque;
Re: [Qemu-devel] [libvirt] [RFC 0/5] block: File descriptor passing using -open-hook-fd
On 05/17/2012 09:14 AM, Eric Blake wrote: On 05/17/2012 07:42 AM, Stefan Hajnoczi wrote: The -open-hook-fd approach allows QEMU to support file descriptor passing without changing -drive. It also supports snapshot_blkdev and other commands By the way, How will it support them? The problem with snapshot_blkdev is that closing a file and opening a new file cannot be done by the QEMU process when an SELinux policy is in place to prevent opening files. snapshot_blkdev can take an fd:name instead of a /path/to/file for the file to open, in which case libvirt can pass in the named fd _prior_ to the snapshot_blkdev using the 'getfd' monitor command. The -open-hook-fd approach works even when the QEMU process is not allowed to open files since file descriptor passing over a UNIX domain socket is used to open files on behalf of QEMU. The -open-hook-fd approach would indeed allow snapshot_blokdev to ask for the fd after the fact, but it's much more painful. Consider a case with a two-disk snapshot: with the fd:name approach, the sequence is: libvirt calls getfd:name1 over normal monitor qemu responds libvirt calls getfd:name2 over normal monitor qemu responds libvirt calls transaction around blockdev-snapshot-sync over normal monitor, using fd:name1 and fd:name2 qemu responds but with -open-hook-fd, the approach would be: libvirt calls transaction qemu calls open(file1) over hook libvirt responds qemu calls open(file2) over hook libvirt responds qemu responds to the original transaction The 'transaction' operation is thus blocked by the time it takes to do two intermediate opens over a second channel, which kind of defeats the purpose of making the transaction take effect with minimal guest downtime. How are you defining guest down time? It's important to note that code running in QEMU does not equate to guest visible down time unless QEMU does an explicit vm_stop() which is not happening here. Instead, a VCPU may become blocked *if* it attempts to acquire qemu_mute while QEMU is holding it. If your concern is qemu_mutex being held while waiting for libvirt, it would be fairly easy to implement a qemu_open_async() that dropped allowed dropping back to the main loop and then calling a callback when the open completes. It would be pretty trivial to convert qmp_transaction to use such a command. But this is all speculative. There's no reason to believe that an RPC would have a noticable guest visible latency unless you assume there's lot contention. I would strongly suspect that the bdrv_flush() is going to be a much greater source of lock contention than the RPC would be. An RPC is only bound by scheduler latency whereas synchronous disk I/O is bound spinning a platter. And libvirt code becomes a lot trickier to deal with the fact that two channels are in use, and that the channel that issued the 'transaction' command must block while the other channel for handling hooks must be responsive. All libvirt needs to do is listen on a socket and delegate access according to a white list. Whatever is providing fd's needs to have no knowledge of anythign other than what the guest is allowed to access which shouldn't depend on an executing command. Regards, Anthony Liguori I'm really disliking the hook-fd approach, when a better solution is to make use of 'getfd' in advance of any operation that will need to open new fds.
Re: [Qemu-devel] [libvirt] [RFC 0/5] block: File descriptor passing using -open-hook-fd
On 07/09/2012 02:00 PM, Anthony Liguori wrote: with the fd:name approach, the sequence is: libvirt calls getfd:name1 over normal monitor qemu responds libvirt calls getfd:name2 over normal monitor qemu responds libvirt calls transaction around blockdev-snapshot-sync over normal monitor, using fd:name1 and fd:name2 qemu responds This general layout is true whether we rewrite all commands to understand fd:nnn (proposal 1) or whether we add new magic parsing (/dev/fd/nnn of proposal 3, or even /dev/fdset/nnn of proposal 5), all as called out in these messages: https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg00227.html https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg01098.html but with -open-hook-fd, the approach would be: libvirt calls transaction qemu calls open(file1) over hook libvirt responds qemu calls open(file2) over hook libvirt responds qemu responds to the original transaction whereas this approach is quite different in semantics, but may indeed be easier for qemu to implement, at the expense of some more complexity on the part of libvirt. At the high level, I think both approaches have one thing in common: by refactoring all qemu code to go through qemu_open(), we can then implement our desired complexity (whether fd:nn, /dev/fd/nnn, /dev/fdset/nnn, or some other magic name parsing; or whether it is an rpc call over a second socket in parallel to the monitor socket) in just one location. Likewise, both approaches have to deal with libvirtd restarts (magic name parsing by changing an 'inuse' flag when the monitor detects EOF; rpc passing by failing a qemu_open() when the rpc socket detects EOF). The 'transaction' operation is thus blocked by the time it takes to do two intermediate opens over a second channel, which kind of defeats the purpose of making the transaction take effect with minimal guest downtime. How are you defining guest down time? It's important to note that code running in QEMU does not equate to guest visible down time unless QEMU does an explicit vm_stop() which is not happening here. Instead, a VCPU may become blocked *if* it attempts to acquire qemu_mute while QEMU is holding it. If your concern is qemu_mutex being held while waiting for libvirt, it would be fairly easy to implement a qemu_open_async() that dropped allowed dropping back to the main loop and then calling a callback when the open completes. It would be pretty trivial to convert qmp_transaction to use such a command. In other words, remembering that transactions are divided into phases: phase 1 - prepare: obtain all needed fds (whether by pre-opening them via 'pass-fd' or other new 'getfd' relative, or whether by RPC calls); no guest downtime, and with cleanup that avoids any leaks on any failures phase 2 - commit: flush all devices and actually make the changes in qemu state to use the fds obtained in phase 1 and where the guest downtime (if any) is more likely due to flushing changes in phase 2 But this is all speculative. There's no reason to believe that an RPC would have a noticable guest visible latency unless you assume there's lot contention. I would strongly suspect that the bdrv_flush() is going to be a much greater source of lock contention than the RPC would be. An RPC is only bound by scheduler latency whereas synchronous disk I/O is bound spinning a platter. And libvirt code becomes a lot trickier to deal with the fact that two channels are in use, and that the channel that issued the 'transaction' command must block while the other channel for handling hooks must be responsive. All libvirt needs to do is listen on a socket and delegate access according to a white list. Whatever is providing fd's needs to have no knowledge of anythign other than what the guest is allowed to access which shouldn't depend on an executing command. That's not quite accurate. What the guest is allowed to access should indeed change depending on the executing command. That is, if I start a guest with: base - delta then I only want to permet O_RDONLY access to base but O_RDWR access to delta. If I then call 'blockdev-snapshot-sync', I want to change to the situation: base - delta - snap and give O_RDWR permissions to snap; it would also be nice if qemu attempts to reopen delta with O_RDONLY permissions (although from a trust perspective, libvirt must assume that delta is still O_RDWR because qemu may have been compromised and lie about the tightening of permissions); at any rate, depending on SELinux capabilities of the file, libvirt may be able to enforce no further writes to 'delta' by toggling a SELinux label (obviously, this should only be done after 'blockdev-snapshot-sync' completes). On the other hand, the user could decide to do a 'block-commit', to squash things into: base where base is now O_RDWR. But libvirt doesn't want to grant write-access to 'base' up-front, so the whitelist allowing O_RDWR access to
Re: [Qemu-devel] [PATCH] target-i386: implement FPREM and FPREM1 using softfloat only
On Mon, Jul 2, 2012 at 11:25 AM, Catalin Patulea catal...@google.com wrote: FPREM1 now passes the TestFloat floatx80_rem suite (and FPREM is implemented very similarly). The code (the bulk of which is remainder_kernel and do_fprem) is derived from Bochs SVN revision 11224 dated 2012-06-21 10:33:37 -0700, with conversions to Qemu type aliases, C features only, etc. as needed. Signed-off-by: Catalin Patulea catal...@google.com --- fpu/softfloat.c | 195 +++ fpu/softfloat.h |4 + target-i386/op_helper.c | 166 3 files changed, 266 insertions(+), 99 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index b29256a..bd1879d 100644 [...] Ping - how do people feel about the latest patch?
[Qemu-devel] KVM call agenda for Tuesday, July 10th
Hi Please send in any agenda items you are interested in covering. Later, Juan.
Re: [Qemu-devel] [libvirt] [RFC 0/5] block: File descriptor passing using -open-hook-fd
On 07/09/2012 03:29 PM, Eric Blake wrote: On 07/09/2012 02:00 PM, Anthony Liguori wrote: with the fd:name approach, the sequence is: libvirt calls getfd:name1 over normal monitor qemu responds libvirt calls getfd:name2 over normal monitor qemu responds libvirt calls transaction around blockdev-snapshot-sync over normal monitor, using fd:name1 and fd:name2 qemu responds This general layout is true whether we rewrite all commands to understand fd:nnn (proposal 1) or whether we add new magic parsing (/dev/fd/nnn of proposal 3, or even /dev/fdset/nnn of proposal 5), all as called out in these messages: https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg00227.html https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg01098.html but with -open-hook-fd, the approach would be: libvirt calls transaction qemu calls open(file1) over hook libvirt responds qemu calls open(file2) over hook libvirt responds qemu responds to the original transaction whereas this approach is quite different in semantics, but may indeed be easier for qemu to implement, at the expense of some more complexity on the part of libvirt. At the high level, I think both approaches have one thing in common: by refactoring all qemu code to go through qemu_open(), we can then implement our desired complexity (whether fd:nn, /dev/fd/nnn, /dev/fdset/nnn, or some other magic name parsing; or whether it is an rpc call over a second socket in parallel to the monitor socket) in just one location. Likewise, both approaches have to deal with libvirtd restarts (magic name parsing by changing an 'inuse' flag when the monitor detects EOF; rpc passing by failing a qemu_open() when the rpc socket detects EOF). Ack. The 'transaction' operation is thus blocked by the time it takes to do two intermediate opens over a second channel, which kind of defeats the purpose of making the transaction take effect with minimal guest downtime. How are you defining guest down time? It's important to note that code running in QEMU does not equate to guest visible down time unless QEMU does an explicit vm_stop() which is not happening here. Instead, a VCPU may become blocked *if* it attempts to acquire qemu_mute while QEMU is holding it. If your concern is qemu_mutex being held while waiting for libvirt, it would be fairly easy to implement a qemu_open_async() that dropped allowed dropping back to the main loop and then calling a callback when the open completes. It would be pretty trivial to convert qmp_transaction to use such a command. In other words, remembering that transactions are divided into phases: phase 1 - prepare: obtain all needed fds (whether by pre-opening them via 'pass-fd' or other new 'getfd' relative, or whether by RPC calls); no guest downtime, and with cleanup that avoids any leaks on any failures phase 2 - commit: flush all devices and actually make the changes in qemu state to use the fds obtained in phase 1 and where the guest downtime (if any) is more likely due to flushing changes in phase 2 Not quite. A synchronous flush can cause lock contention. We need to separate out the problem of lock contention from guest down time. Also, there's no obvious need to move the flushes before opens. The main issue is that we use qemu_mutex to effectively create a write queue. You can imagine a simple write queueing mechanism that would obviate the need need for this such that we could flush, queue upcoming writes, and drop qemu_mutex to sleep waiting for libvirt to send us our fds. But this is all speculative. There's no reason to believe that an RPC would have a noticable guest visible latency unless you assume there's lot contention. I would strongly suspect that the bdrv_flush() is going to be a much greater source of lock contention than the RPC would be. An RPC is only bound by scheduler latency whereas synchronous disk I/O is bound spinning a platter. And libvirt code becomes a lot trickier to deal with the fact that two channels are in use, and that the channel that issued the 'transaction' command must block while the other channel for handling hooks must be responsive. All libvirt needs to do is listen on a socket and delegate access according to a white list. Whatever is providing fd's needs to have no knowledge of anythign other than what the guest is allowed to access which shouldn't depend on an executing command. That's not quite accurate. What the guest is allowed to access should indeed change depending on the executing command. That is, if I start a guest with: I should have spoke more clearly. libvirt may change the white list for various reasons dynamically. But there shouldn't be a direct dependency on whatever is serving up fd's and whatever is changing the white list. Basically, you just need a shared hash table for each guest. It should be quite simple. Maybe the only reason that I'm still leaning towards a 'pass-fd' solution instead of a hook fd solution is that
Re: [Qemu-devel] [PATCH v16 8/9] Add set_cachesize command
On 07/09/2012 12:22 PM, Orit Wasserman wrote: Change XBZRLE cache size in bytes (the size should be a power of 2). If XBZRLE cache size is too small there will be many cache miss. Signed-off-by: Benoit Hudzia benoit.hud...@sap.com Signed-off-by: Petter Svard pett...@cs.umu.se Signed-off-by: Aidan Shribman aidan.shrib...@sap.com Signed-off-by: Orit Wasserman owass...@redhat.com +++ b/qapi-schema.json @@ -1390,6 +1390,22 @@ { 'command': 'migrate_set_speed', 'data': {'value': 'int'} } We are copying after bad practice, but... ## +# @migrate_set_cachesize ...new QMP commands should prefer '-' over '_'. While the HMP version is fine with migrate_set_cachesize, the QMP command should be migrate-set-cachesize (or even 'migrate-set-cache-size'). + +Set cache size to be used by XBZRLE migration, the cache size will be round down s/round/rounded/ -- Eric Blake ebl...@redhat.com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH] target-i386: implement FPREM and FPREM1 using softfloat only
On 2 July 2012 16:25, Catalin Patulea catal...@google.com wrote: FPREM1 now passes the TestFloat floatx80_rem suite (and FPREM is implemented very similarly). The code (the bulk of which is remainder_kernel and do_fprem) is derived from Bochs SVN revision 11224 dated 2012-06-21 10:33:37 -0700, with conversions to Qemu type aliases, C features only, etc. as needed. QEMU is the official capitalization. Signed-off-by: Catalin Patulea catal...@google.com --- fpu/softfloat.c | 195 +++ fpu/softfloat.h |4 + target-i386/op_helper.c | 166 3 files changed, 266 insertions(+), 99 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index b29256a..bd1879d 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -5234,6 +5234,16 @@ int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM ) } /* +| Returns 1 if the extended double-precision floating-point value `a' is an +| unsupported value; otherwise returns 0. Let's try for something a little less cryptic to save future readers having to dig out the intel architecture manuals: an unsupported value (ie the bit pattern does not represent a valid IEEE number). +**/ +int floatx80_is_unsupported(floatx80 a) +{ +return extractFloatx80Exp(a) + !(extractFloatx80Frac(a) LIT64(0x8000)); +} This doesn't match up with all the cases in the Intel Software Developer's Manual table 8.3: it catches exponent non-zero but explicit integer bit is zero (pseudo-NaNs, pseudo-infinities, and unnormals) but not the case of exponent is zero but explicit integer bit is one (pseudo-denormals). [For those following along at home, it is because floatx80 has an explicit integer bit rather than the implicit bit used in IEEE single and double that you can get 'unsupported' bit patterns, where the explicit bit is a value different from what the implicit bit would be under IEEE rules.] + +/* | Returns the result of converting the quadruple-precision floating-point | value `a' to the 32-bit two's complement integer format. The conversion | is performed according to the IEC/IEEE Standard for Binary Floating-Point @@ -6828,6 +6838,191 @@ floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM ) aSign, aExp, aSig, 0 STATUS_VAR ); } +/* executes single exponent reduction cycle */ +static uint64_t remainder_kernel(uint64_t aSig0, uint64_t bSig, int expDiff, + uint64_t *zSig0, uint64_t *zSig1) +{ +uint64_t term0, term1; +uint64_t aSig1 = 0; + +shortShift128Left(aSig1, aSig0, expDiff, aSig1, aSig0); +uint64_t q = estimateDiv128To64(aSig1, aSig0, bSig); Declaring variables in the middle of code isn't QEMU coding style; top of the function, please. [Other cases below; I haven't bothered to call them all out.] +mul64To128(bSig, q, term0, term1); +sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); +while ((int64)(*zSig1) 0) { Cast to int64 is almost certainly wrong: this conditional will give different results if int64 is exactly 64 bits vs if it is more than 64 bits. You probably wanted int64_t. +--q; +add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0); +} +return q; +} + +static int do_fprem(floatx80 a, floatx80 b, floatx80 *r, uint64_t *q, +int rounding_mode STATUS_PARAM) +{ +int32 aExp, bExp, zExp, expDiff; +uint64_t aSig0, aSig1, bSig; +flag aSign; +*q = 0; + +/* handle unsupported extended double-precision floating encodings */ +if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) { +float_raise(float_flag_invalid, status); Use STATUS_VAR. It's stupid, but let's be consistently stupid until we get round to globally fixing it. +*r = floatx80_default_nan; +return -1; +} + +aSig0 = extractFloatx80Frac(a); +aExp = extractFloatx80Exp(a); +aSign = extractFloatx80Sign(a); +bSig = extractFloatx80Frac(b); +bExp = extractFloatx80Exp(b); + +if (aExp == 0x7FFF) { +if ((uint64_t) (aSig01) || ((bExp == 0x7FFF) + (uint64_t) (bSig1))) { aSig0 and bSig are already uint64_t, why the casts? +*r = propagateFloatx80NaN(a, b, status); +return -1; +} +float_raise(float_flag_invalid, status); +*r = floatx80_default_nan; +return -1; +} +if (bExp == 0x7FFF) { +if ((uint64_t) (bSig1)) { +*r = propagateFloatx80NaN(a, b, status); +return -1; +} +if (aExp == 0
[Qemu-devel] buildbot failure in qemu on default_i386_rhel61
The Buildbot has detected a new failure on builder default_i386_rhel61 while building qemu. Full details are available at: http://buildbot.b1-systems.de/qemu/builders/default_i386_rhel61/builds/304 Buildbot URL: http://buildbot.b1-systems.de/qemu/ Buildslave for this Build: kraxel_rhel61_32bit Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot
Re: [Qemu-devel] [PULL 00/14] SCSI updates for 2012-07-02
On 09.07.2012, at 18:48, Anthony Liguori wrote: On 07/02/2012 04:41 AM, Paolo Bonzini wrote: Anthony, The following changes since commit 71ea2e016131a9fcde6f1ffd3e0e34a64c21f593: bsd-user: fix build (2012-06-28 20:28:36 +) Pulled. Thanks. Megasas? :) http://buildbot.b1-systems.de/qemu/builders/default_i386_rhel61/builds/304/steps/compile/logs/stdio
[Qemu-devel] [PATCH] megasas: disable due to build breakage
The Buildbot has detected a new failure on builder default_i386_rhel61 while building qemu. Full details are available at: http://buildbot.b1-systems.de/qemu/builders/default_i386_rhel61/builds/304 The proper fix is non-trivial so let's disable the build by default until it's fixed properly. Signed-off-by: Anthony Liguori aligu...@us.ibm.com --- default-configs/pci.mak |1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 4b49c00..9d3e1db 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -10,7 +10,6 @@ CONFIG_EEPRO100_PCI=y CONFIG_PCNET_PCI=y CONFIG_PCNET_COMMON=y CONFIG_LSI_SCSI_PCI=y -CONFIG_MEGASAS_SCSI_PCI=y CONFIG_RTL8139_PCI=y CONFIG_E1000_PCI=y CONFIG_IDE_CORE=y -- 1.7.5.4
Re: [Qemu-devel] [PULL 00/14] SCSI updates for 2012-07-02
On 07/09/2012 06:09 PM, Alexander Graf wrote: On 09.07.2012, at 18:48, Anthony Liguori wrote: On 07/02/2012 04:41 AM, Paolo Bonzini wrote: Anthony, The following changes since commit 71ea2e016131a9fcde6f1ffd3e0e34a64c21f593: bsd-user: fix build (2012-06-28 20:28:36 +) Pulled. Thanks. Megasas? :) So this code is really broken: info.host.type = MFI_INFO_HOST_PCIX; info.device.type = MFI_INFO_DEV_SAS3G; info.device.port_count = 2; info.device.port_addr[0] = cpu_to_le64(megasas_gen_sas_addr((uint64_t)s)); This will make migration impossible not to mention the fact that casting a pointer to a uint64_t is really broken. This code needs to be refactored to not do this. It's quite pervasive though (there's a half a dozen instances like this). I'm going to disable the build by default. I don't want to see a rash fix like (uint64_t)(intptr_t). This needs to be fixed by not making the pointer address guest visible. It can then be re-enabled. Should be easy enough to update your .mak config if you want to test between now and then. Regards, Anthony Liguori http://buildbot.b1-systems.de/qemu/builders/default_i386_rhel61/builds/304/steps/compile/logs/stdio
[Qemu-devel] buildbot failure in qemu on default_mingw32
The Buildbot has detected a new failure on builder default_mingw32 while building qemu. Full details are available at: http://buildbot.b1-systems.de/qemu/builders/default_mingw32/builds/312 Buildbot URL: http://buildbot.b1-systems.de/qemu/ Buildslave for this Build: kraxel_rhel61 Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot
[Qemu-devel] [Bug 1018530] Re: No write access in a 9p/virtfs shared folder
No, commit daf0b9aca9f67323266af1a92e8ea06f9d7bf408 added create support proxy FS driver model. Local FS had support for creating files much before. Georg, is qemu running with root user privileges? -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1018530 Title: No write access in a 9p/virtfs shared folder Status in QEMU: New Status in “qemu-kvm” package in Ubuntu: Fix Released Bug description: Ubuntu version: Ubuntu 12.04 LTS Kernel: 3.2.0-25-generic Version of qemu-kvm: 1.0+noroms-0ubuntu13 I have created an shared folder for an virtual machine which is managed by libvirt. filesystem type='mount' accessmode='passthrough' source dir='/storage/data'/ target dir='data'/ address type='pci' domain='0x' bus='0x00' slot='0x08' function='0x0'/ /filesystem I mounted it in the virtual machine with this command: mount -t 9p -o trans=virtio,version=9p2000.L data /data The filesystem permissions of all files an folders in the shared folder are set to 777. I expected that I have the full permissions also in the virtual machine. Regardless of the permissions on the filesystem I cannot write or create files and folders in the virtual machine. The original filesystem (/storage) is XFS. In another shared folder (similar config in libvirt) which is originally NTFS I have no problems. ProblemType: Bug DistroRelease: Ubuntu 12.04 Package: qemu-kvm 1.0+noroms-0ubuntu13 ProcVersionSignature: Ubuntu 3.2.0-25.40-generic 3.2.18 Uname: Linux 3.2.0-25-generic x86_64 ApportVersion: 2.0.1-0ubuntu8 Architecture: amd64 Date: Wed Jun 27 20:15:20 2012 InstallationMedia: Ubuntu-Server 12.04 LTS Precise Pangolin - Beta amd64 (20120409) MachineType: To be filled by O.E.M. To be filled by O.E.M. ProcEnviron: TERM=xterm LANG=de_DE.UTF-8 SHELL=/bin/bash ProcKernelCmdLine: BOOT_IMAGE=/vmlinuz-3.2.0-25-generic root=/dev/mapper/system-root ro SourcePackage: qemu-kvm UpgradeStatus: No upgrade log present (probably fresh install) dmi.bios.date: 04/18/2012 dmi.bios.vendor: American Megatrends Inc. dmi.bios.version: 1208 dmi.board.asset.tag: To be filled by O.E.M. dmi.board.name: M5A99X EVO dmi.board.vendor: ASUSTeK COMPUTER INC. dmi.board.version: Rev 1.xx dmi.chassis.asset.tag: To Be Filled By O.E.M. dmi.chassis.type: 3 dmi.chassis.vendor: To Be Filled By O.E.M. dmi.chassis.version: To Be Filled By O.E.M. dmi.modalias: dmi:bvnAmericanMegatrendsInc.:bvr1208:bd04/18/2012:svnTobefilledbyO.E.M.:pnTobefilledbyO.E.M.:pvrTobefilledbyO.E.M.:rvnASUSTeKCOMPUTERINC.:rnM5A99XEVO:rvrRev1.xx:cvnToBeFilledByO.E.M.:ct3:cvrToBeFilledByO.E.M.: dmi.product.name: To be filled by O.E.M. dmi.product.version: To be filled by O.E.M. dmi.sys.vendor: To be filled by O.E.M. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1018530/+subscriptions
Re: [Qemu-devel] [RFC] introduce a dynamic library to expose qemu block API
于 2012-7-9 17:13, Paolo Bonzini 写道: Il 09/07/2012 10:54, Wenchao Xia ha scritto: Following is my implementing plan draft: 1 introduce libqblock.so in sub directory in qemu. 2 write a nbd client in libqblock, similar to qemu nbd client. Then use it to talk with nbd server, by default is qemu-nbd, to get access to images. In this way, libqblock.so could be friendly LGPL licensed. Did you actually assess the license situation of the block layer? block.c and large parts of block/* are under a BSD license, for example. If the library only has to support raw files, it might do so using synchronous I/O only. This would remove a large body of GPL-licensed code. If the library was built as nbd-client communicating with nbd-server, which then employ the BSO licensed code, could the library ignore the server side's license problem? The reason using nbd-client approach are: work around qemu block layer license issue and easy to implement, if other tool found this labrary useful then considering about directly employ the qemu block code. 3 still not got a good way to get additional info in (2)(3)(4), currently in my head is patch qemu-nbd to add an additional nbd command, image-info, in which returns related info. On the Linux kernel mailing list I would have no qualms labeling such command as crap. However, since the social standards on qemu-devel are a bit higher, I'll ask instead: what information would the command provide beyond the size? The API need to report the image format it is using, such as qcow2. And also API should report if a block at offset have been allocated or it is a hole. Paolo -- Best Regards Wenchao Xia
Re: [Qemu-devel] [PATCH v16 8/9] Add set_cachesize command
On 07/09/2012 11:59 PM, Eric Blake wrote: On 07/09/2012 12:22 PM, Orit Wasserman wrote: Change XBZRLE cache size in bytes (the size should be a power of 2). If XBZRLE cache size is too small there will be many cache miss. Signed-off-by: Benoit Hudzia benoit.hud...@sap.com Signed-off-by: Petter Svard pett...@cs.umu.se Signed-off-by: Aidan Shribman aidan.shrib...@sap.com Signed-off-by: Orit Wasserman owass...@redhat.com +++ b/qapi-schema.json @@ -1390,6 +1390,22 @@ { 'command': 'migrate_set_speed', 'data': {'value': 'int'} } We are copying after bad practice, but... ## +# @migrate_set_cachesize ...new QMP commands should prefer '-' over '_'. While the HMP version is fine with migrate_set_cachesize, the QMP command should be migrate-set-cachesize (or even 'migrate-set-cache-size'). I will change it Thanks, Orit + +Set cache size to be used by XBZRLE migration, the cache size will be round down s/round/rounded/
Re: [Qemu-devel] [RFC] introduce a dynamic library to expose qemu block API
于 2012-7-9 17:27, Daniel P. Berrange 写道: On Mon, Jul 09, 2012 at 04:54:08PM +0800, Wenchao Xia wrote: Hi, Paolo and folks, qemu have good capabilities to access different virtual disks, I want to expose its block layer API to let 3rd party program linked in, such as management stack or block tools, to access images data directly. Following is the objects: (1) API to write/read block device at offset. (2) Determine the image type,qcow2/qed/raw (3) Determine which blocks are allocated. (4) Determine backing file. Following is my implementing plan draft: 1 introduce libqblock.so in sub directory in qemu. 2 write a nbd client in libqblock, similar to qemu nbd client. Then use it to talk with nbd server, by default is qemu-nbd, to get access to images. In this way, libqblock.so could be friendly LGPL licensed. 3 still not got a good way to get additional info in (2)(3)(4), currently in my head is patch qemu-nbd to add an additional nbd command, image-info, in which returns related info. What do you think about it? For arbirary read/write access to disk images, I can see a little value in having a standalone libnbd client API, that is able to just talk to any NBD server. Arguably such a thing does not need to be part of the QEMU source tree - eg see the recently written libiscsi.so client. For getting the other metadata about the disk image you mention, another possibility to is just make 'qemu-img info' return the data in a machine parseable format, ie JSON make a client API for extracting data from this JSON document. Thank u for the idea. The .so is introduced to let program access the image more directly, parsing string is not so fast and it depends on another program's stdout output, I hope to get a faster way. For a full-blown RPC API for doing arbitrary tasks related to block devices, then many apps will tend towards libguestfs, since it provides such a wide range of functionality for manipulating disk images. I used libguestfs to make my image too, but the target of the .so is a bit different: it expose block data in a lower level, expose everything qemu main code(except block code) can see. Potential purpose is as first step to make qemu block layer independent, then qemu and other tool would be just employers of the library. But now it is out of plan because license issue, and the library acts as client now lowering the performance. Regards, Daniel -- Best Regards Wenchao Xia
[Qemu-devel] [PATCH] megasas: Fix compilation for 32 bit hosts
Cc: Hannes Reinecke h...@suse.de Signed-off-by: Stefan Weil s...@weilnetz.de --- default-configs/pci.mak |4 hw/megasas.c| 13 +++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 9d3e1db..120b69d 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -5,6 +5,10 @@ CONFIG_USB_UHCI=y CONFIG_USB_OHCI=y CONFIG_USB_EHCI=y CONFIG_USB_XHCI=y + +# RAID adapter +CONFIG_MEGASAS_SCSI_PCI=y + CONFIG_NE2000_PCI=y CONFIG_EEPRO100_PCI=y CONFIG_PCNET_PCI=y diff --git a/hw/megasas.c b/hw/megasas.c index b48836f..26cf118 100644 --- a/hw/megasas.c +++ b/hw/megasas.c @@ -372,12 +372,13 @@ static uint64_t megasas_fw_time(void) return bcd_time; } -static uint64_t megasas_gen_sas_addr(uint64_t id) +static uint64_t megasas_gen_sas_addr(void *p) { +uint64_t id = (uintptr_t)p; uint64_t addr; addr = 0x5001a4aULL 36; -addr |= id 0xf; +addr |= id 0xfULL; return addr; } @@ -672,7 +673,7 @@ static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd) info.host.type = MFI_INFO_HOST_PCIX; info.device.type = MFI_INFO_DEV_SAS3G; info.device.port_count = 2; -info.device.port_addr[0] = cpu_to_le64(megasas_gen_sas_addr((uint64_t)s)); +info.device.port_addr[0] = cpu_to_le64(megasas_gen_sas_addr(s)); memcpy(info.product_name, MegaRAID SAS 8708EM2, 20); snprintf(info.serial_number, 32, QEMU%08lx, @@ -761,7 +762,7 @@ static int megasas_mfc_get_defaults(MegasasState *s, MegasasCmd *cmd) return MFI_STAT_INVALID_PARAMETER; } -info.sas_addr = cpu_to_le64(megasas_gen_sas_addr((uint64_t)s)); +info.sas_addr = cpu_to_le64(megasas_gen_sas_addr(s)); info.stripe_size = 3; info.flush_time = 4; info.background_rate = 30; @@ -891,7 +892,7 @@ static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd) info.addr[num_pd_disks].scsi_dev_type = sdev-type; info.addr[num_pd_disks].connect_port_bitmap = 0x1; info.addr[num_pd_disks].sas_addr[0] = -cpu_to_le64(megasas_gen_sas_addr((uint64_t)sdev)); +cpu_to_le64(megasas_gen_sas_addr(sdev)); num_pd_disks++; offset += sizeof(struct mfi_pd_address); } @@ -994,7 +995,7 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, info-slot_number = (sdev-id 0xFF); info-path_info.count = 1; info-path_info.sas_addr[0] = -cpu_to_le64(megasas_gen_sas_addr((uint64_t)sdev)); +cpu_to_le64(megasas_gen_sas_addr(sdev)); info-connected_port_bitmap = 0x1; info-device_speed = 1; info-link_speed = 1; -- 1.7.0.4
Re: [Qemu-devel] [RFC] introduce a dynamic library to expose qemu block API
于 2012-7-9 22:36, Christoph Hellwig 写道: On Mon, Jul 09, 2012 at 04:54:08PM +0800, Wenchao Xia wrote: Hi, Paolo and folks, qemu have good capabilities to access different virtual disks, I want to expose its block layer API to let 3rd party program linked in, such as management stack or block tools, to access images data directly. Following is the objects: (1) API to write/read block device at offset. (2) Determine the image type,qcow2/qed/raw (3) Determine which blocks are allocated. (4) Determine backing file. Sounds like you want a procedural interface for that. At least for (1) I have patches I'll submit soon to add qemu img read/write commands. Yes, the purpose is introduce API interface about block data, the operation was supposed to happen frequently, a linked-in library may be better than process output string parsing in performance. -- Best Regards Wenchao Xia
[Qemu-devel] [PATCH 0/2] RFC: powerpc-vfio: adding support
The two patches in this set are supposed to add VFIO support for POWER. The first one adds one more step in the initalizaion sequence which I am not sure is correct. The second patch adds actual VFIO support. It is not ready to submit but ready to discuss. I would like to get rid of all #ifdef TARGET_PPC64 in patch #2 and I wonder if there is any plan to implement some generic EOI support code, etc. Alexey Kardashevskiy (2): pseries pci: spapr_finalize_pci_setup introduced vfio-powerpc: added VFIO support hw/ppc/Makefile.objs |3 ++ hw/spapr.c |7 hw/spapr.h |4 +++ hw/spapr_iommu.c | 87 ++ hw/spapr_pci.c | 36 ++--- hw/spapr_pci.h |4 +++ hw/vfio_pci.c| 76 +-- hw/vfio_pci.h|2 ++ 8 files changed, 212 insertions(+), 7 deletions(-) -- 1.7.10
[Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support
The patch enables VFIO on POWER. It literally does the following: 1. POWERPC IOMMU support (the kernel counterpart is required) 2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation. 3. Added vfio_get_container_fd() to VFIO in order to initialize 1). 4. Makefile fixed and is_vfio flag added into sPAPR PHB - required to distinguish VFIO's DMA context from the emulated one. WIth the pathes posted today a bit earlier, this patch fully supports VFIO what includes MSIX as well, Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- hw/ppc/Makefile.objs |3 ++ hw/spapr.h |4 +++ hw/spapr_iommu.c | 87 ++ hw/spapr_pci.c | 23 - hw/spapr_pci.h |2 ++ hw/vfio_pci.c| 76 +-- hw/vfio_pci.h|2 ++ 7 files changed, 193 insertions(+), 4 deletions(-) diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index f573a95..c46a049 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o # Xilinx PPC peripherals obj-y += xilinx_ethlite.o +# VFIO PCI device assignment +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o + obj-y := $(addprefix ../,$(obj-y)) diff --git a/hw/spapr.h b/hw/spapr.h index b37f337..9dca704 100644 --- a/hw/spapr.h +++ b/hw/spapr.h @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char *propname, int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, DMAContext *dma); +void spapr_vfio_init_dma(int fd, uint32_t liobn, + uint64_t *dma32_window_start, + uint64_t *dma32_window_size); + #endif /* !defined (__HW_SPAPR_H__) */ diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c index 50c288d..0a194e8 100644 --- a/hw/spapr_iommu.c +++ b/hw/spapr_iommu.c @@ -16,6 +16,8 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see http://www.gnu.org/licenses/. */ +#include sys/ioctl.h + #include hw.h #include kvm.h #include qdev.h @@ -23,6 +25,7 @@ #include dma.h #include hw/spapr.h +#include hw/linux-vfio.h #include libfdt.h @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong ioba, target_ulong tce) return 0; } +/* API for POWERPC IOMMU */ + +#define POWERPC_IOMMU 2 + +struct tce_iommu_info { +__u32 argsz; +__u32 dma32_window_start; +__u32 dma32_window_size; +}; + +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +struct tce_iommu_dma_map { +__u32 argsz; +__u64 va; +__u64 dmaaddr; +}; + +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) + +typedef struct sPAPRVFIOTable { +int fd; +uint32_t liobn; +QLIST_ENTRY(sPAPRVFIOTable) list; +} sPAPRVFIOTable; + +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables; + +void spapr_vfio_init_dma(int fd, uint32_t liobn, + uint64_t *dma32_window_start, + uint64_t *dma32_window_size) +{ +sPAPRVFIOTable *t; +struct tce_iommu_info info = { .argsz = sizeof(info) }; + +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, info)) { +fprintf(stderr, POWERPC_IOMMU_GET_INFO failed %d\n, errno); +return; +} +*dma32_window_start = info.dma32_window_start; +*dma32_window_size = info.dma32_window_size; + +t = g_malloc0(sizeof(*t)); +t-fd = fd; +t-liobn = liobn; + +QLIST_INSERT_HEAD(vfio_tce_tables, t, list); +} + +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce) +{ +sPAPRVFIOTable *t; +struct tce_iommu_dma_map map = { +.argsz = sizeof(map), +.va = 0, +.dmaaddr = ioba, +}; + +QLIST_FOREACH(t, vfio_tce_tables, list) { +if (t-liobn != liobn) { +continue; +} +if (tce) { +map.va = (uintptr_t)qemu_get_ram_ptr(tce ~SPAPR_TCE_PAGE_MASK); +if (ioctl(t-fd, POWERPC_IOMMU_MAP_DMA, map)) { +fprintf(stderr, TCE_MAP_DMA: %d\n, errno); +return H_PARAMETER; +} +} else { +if (ioctl(t-fd, POWERPC_IOMMU_UNMAP_DMA, map)) { +fprintf(stderr, TCE_UNMAP_DMA: %d\n, errno); +return H_PARAMETER; +} +} +return H_SUCCESS; +} +return H_CONTINUE; /* positive non-zero value */ +} + static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { @@ -203,6 +286,10 @@ static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr, if (0 = ret) { return ret ? H_PARAMETER : H_SUCCESS; } +ret = put_tce_vfio(liobn, ioba, tce); +if (0 = ret) { +return ret ?
[Qemu-devel] [PATCH 1/2] pseries pci: spapr_finalize_pci_setup introduced
Previously PCI bus setup was done in 3 steps: 1) create a PCI bus, configure DMA 2) create PCI devices on the bus 3) populate a PCI bus node in the Device Tree As some bus parameters can be configured only when some or all the devices got attached to the bus and initialized, the spapr_finalize_pci_setup has been introduced. As an example, such a handler can setup DMA window parameters taken from an IOMMU file descriptor available from a VFIO PCI device. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- hw/spapr.c |7 +++ hw/spapr_pci.c | 13 ++--- hw/spapr_pci.h |2 ++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/hw/spapr.c b/hw/spapr.c index b83f83b..688a135 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -516,7 +516,14 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, } QLIST_FOREACH(phb, spapr-phbs, list) { +ret = spapr_finalize_pci_setup(phb); +if (ret 0) { +break; +} ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt); +if (ret 0) { +break; +} } if (ret 0) { diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c index 014297b..5f89003 100644 --- a/hw/spapr_pci.c +++ b/hw/spapr_pci.c @@ -573,9 +573,6 @@ static int spapr_phb_init(SysBusDevice *s) phb-host_state.bus = bus; phb-dma_liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) 16); -phb-dma_window_start = 0; -phb-dma_window_size = 0x4000; -phb-dma = spapr_tce_new_dma_context(phb-dma_liobn, phb-dma_window_size); pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb); QLIST_INSERT_HEAD(spapr-phbs, phb, list); @@ -639,6 +636,16 @@ void spapr_create_phb(sPAPREnvironment *spapr, qdev_init_nofail(dev); } +/* Finalize PCI setup, called when all devices are already created */ +int spapr_finalize_pci_setup(sPAPRPHBState *phb) +{ +phb-dma_window_start = 0; +phb-dma_window_size = 0x4000; +phb-dma = spapr_tce_new_dma_context(phb-dma_liobn, + phb-dma_window_size); +return 0; +} + /* Macros to operate with address in OF binding to PCI */ #define b_x(x, p, l)(((x) ((1(l))-1)) (p)) #define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h index 145071c..3aae273 100644 --- a/hw/spapr_pci.h +++ b/hw/spapr_pci.h @@ -68,6 +68,8 @@ void spapr_create_phb(sPAPREnvironment *spapr, uint64_t mem_win_addr, uint64_t mem_win_size, uint64_t io_win_addr, uint64_t msi_win_addr); +int spapr_finalize_pci_setup(sPAPRPHBState *phb); + int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt); -- 1.7.10
Re: [Qemu-devel] [PULL 00/14] SCSI updates for 2012-07-02
On 07/10/2012 01:19 AM, Anthony Liguori wrote: On 07/09/2012 06:09 PM, Alexander Graf wrote: On 09.07.2012, at 18:48, Anthony Liguori wrote: On 07/02/2012 04:41 AM, Paolo Bonzini wrote: Anthony, The following changes since commit 71ea2e016131a9fcde6f1ffd3e0e34a64c21f593: bsd-user: fix build (2012-06-28 20:28:36 +) Pulled. Thanks. Megasas? :) So this code is really broken: info.host.type = MFI_INFO_HOST_PCIX; info.device.type = MFI_INFO_DEV_SAS3G; info.device.port_count = 2; info.device.port_addr[0] = cpu_to_le64(megasas_gen_sas_addr((uint64_t)s)); This will make migration impossible not to mention the fact that casting a pointer to a uint64_t is really broken. Hey, this is _NOT_ an address. It's a simple way of generating a system-wide unique SAS address. The whole thing is informational anyway, and can only be seen when using the (proprietary) MegaCLI userspace command. This code needs to be refactored to not do this. It's quite pervasive though (there's a half a dozen instances like this). Okay, so here's the challenge: We need to generate a system-wide unique SAS address, one per SCSI device and one per megasas instance. A simple counter won't work, as we might have several qemu instances running. Which would result in all of them having the same SAS address for the host. I'm going to disable the build by default. I don't want to see a rash fix like (uint64_t)(intptr_t). This needs to be fixed by not making the pointer address guest visible. It can then be re-enabled. Should be easy enough to update your .mak config if you want to test between now and then. As said, it's _not_ an address. The address it just use to seed the SAS address. But as you object, I see to use something else for seeding the SAS address. Cheers, Hannes -- Dr. Hannes Reinecke zSeries Storage h...@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)