[Qemu-devel] [PATCHv2] qemu-img: add special exit code if bdrv_check is not supported
currently it is not possible to distinguish by exitcode if there has been an error or if bdrv_check is not supported by the image format. Change the exitcode from 1 to 63 for the latter case. Signed-off-by: Peter Lieven p...@kamp.de --- v1-v2: As Eric suggested changed the exitcode from 255 to 63. qemu-img.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index 926f0a0..bf3fb4f 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -607,7 +607,7 @@ static int img_check(int argc, char **argv) if (output_format == OFORMAT_HUMAN) { error_report(This image format does not support checks); } -ret = 1; +ret = 63; goto fail; } -- 1.7.9.5
Re: [Qemu-devel] [PATCHv2] qemu-img: add special exit code if bdrv_check is not supported
On 10/24/2013 07:53 AM, Peter Lieven wrote: currently it is not possible to distinguish by exitcode if there has been an error or if bdrv_check is not supported by the image format. Change the exitcode from 1 to 63 for the latter case. Signed-off-by: Peter Lieven p...@kamp.de --- v1-v2: As Eric suggested changed the exitcode from 255 to 63. qemu-img.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Should the man page document this exit status? Then again, it doesn't document ANY exit status, so that could be a separate patch. Reviewed-by: Eric Blake ebl...@redhat.com diff --git a/qemu-img.c b/qemu-img.c index 926f0a0..bf3fb4f 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -607,7 +607,7 @@ static int img_check(int argc, char **argv) if (output_format == OFORMAT_HUMAN) { error_report(This image format does not support checks); } -ret = 1; +ret = 63; goto fail; } -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
[Qemu-devel] [PATCH v5 6/8] sheepdog: make add_aio_request and send_aioreq void functions
These functions no longer return errors. We can make them void functions and simplify the codes. Reviewed-by: Liu Yuan namei.u...@gmail.com Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp --- block/sheepdog.c | 66 ++ 1 file changed, 17 insertions(+), 49 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 3e98291..5846ac4 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -606,10 +606,10 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, return srco.ret; } -static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, +static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type); -static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); +static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); static int get_sheep_fd(BDRVSheepdogState *s); static void co_write_request(void *opaque); @@ -635,22 +635,14 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) { AIOReq *aio_req; SheepdogAIOCB *acb; -int ret; while ((aio_req = find_pending_req(s, oid)) != NULL) { acb = aio_req-aiocb; /* move aio_req from pending list to inflight one */ QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(s-inflight_aio_head, aio_req, aio_siblings); -ret = add_aio_request(s, aio_req, acb-qiov-iov, - acb-qiov-niov, false, acb-aiocb_type); -if (ret 0) { -error_report(add_aio_request is failed); -free_aio_req(s, aio_req); -if (!acb-nr_pending) { -sd_finish_aiocb(acb); -} -} +add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, false, +acb-aiocb_type); } } @@ -813,11 +805,8 @@ static void coroutine_fn aio_read_response(void *opaque) } else { aio_req-oid = vid_to_vdi_oid(s-inode.vdi_id); } -ret = resend_aioreq(s, aio_req); -if (ret == SD_RES_SUCCESS) { -goto out; -} -/* fall through */ +resend_aioreq(s, aio_req); +goto out; default: acb-ret = -EIO; error_report(%s, sd_strerror(rsp.result)); @@ -1066,7 +1055,7 @@ out: return ret; } -static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, +static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type) { @@ -1144,8 +1133,6 @@ out: qemu_aio_set_fd_handler(s-fd, co_read_response, NULL, s); s-co_send = NULL; qemu_co_mutex_unlock(s-lock); - -return 0; } static int read_write_object(int fd, char *buf, uint64_t oid, int copies, @@ -1248,7 +1235,7 @@ out: return ret; } -static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) +static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req-aiocb; bool create = false; @@ -1273,7 +1260,7 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) DPRINTF(simultaneous CoW to % PRIx64 \n, aio_req-oid); QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(s-pending_aio_head, aio_req, aio_siblings); -return SD_RES_SUCCESS; +return; } } @@ -1283,13 +1270,13 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) } out: if (is_data_obj(aio_req-oid)) { -return add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, - create, acb-aiocb_type); +add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, create, +acb-aiocb_type); } else { struct iovec iov; iov.iov_base = s-inode; iov.iov_len = sizeof(s-inode); -return add_aio_request(s, aio_req, iov, 1, false, AIOCB_WRITE_UDATA); +add_aio_request(s, aio_req, iov, 1, false, AIOCB_WRITE_UDATA); } } @@ -1689,7 +1676,6 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) */ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) { -int ret; BDRVSheepdogState *s = acb-common.bs-opaque; struct iovec iov; AIOReq *aio_req; @@ -1711,18 +1697,13 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s-inode.vdi_id), data_len, offset, 0, 0, offset);
[Qemu-devel] [PATCH v5 4/8] coroutine: add co_aio_sleep_ns() to allow sleep in block drivers
This helper function behaves similarly to co_sleep_ns(), but the sleeping coroutine will be resumed when using qemu_aio_wait(). Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp --- include/block/coroutine.h |9 + qemu-coroutine-sleep.c| 14 ++ 2 files changed, 23 insertions(+) diff --git a/include/block/coroutine.h b/include/block/coroutine.h index 4232569..4d5c0cf 100644 --- a/include/block/coroutine.h +++ b/include/block/coroutine.h @@ -216,6 +216,15 @@ void qemu_co_rwlock_unlock(CoRwlock *lock); void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns); /** + * Yield the coroutine for a given duration + * + * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be + * resumed when using qemu_aio_wait(). + */ +void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, + int64_t ns); + +/** * Yield until a file descriptor becomes readable * * Note that this function clobbers the handlers for the file descriptor. diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c index f6db978..ad78fba 100644 --- a/qemu-coroutine-sleep.c +++ b/qemu-coroutine-sleep.c @@ -13,6 +13,7 @@ #include block/coroutine.h #include qemu/timer.h +#include block/aio.h typedef struct CoSleepCB { QEMUTimer *ts; @@ -37,3 +38,16 @@ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns) timer_del(sleep_cb.ts); timer_free(sleep_cb.ts); } + +void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, + int64_t ns) +{ +CoSleepCB sleep_cb = { +.co = qemu_coroutine_self(), +}; +sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, sleep_cb); +timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); +qemu_coroutine_yield(); +timer_del(sleep_cb.ts); +timer_free(sleep_cb.ts); +} -- 1.7.10.4
[Qemu-devel] [PATCH v5 5/8] sheepdog: try to reconnect to sheepdog after network error
This introduces a failed request queue and links all the inflight requests to the list after network error happens. After QEMU reconnects to the sheepdog server successfully, the sheepdog block driver will retry all the requests in the failed queue. Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp --- block/sheepdog.c | 80 -- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 5569e54..3e98291 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -299,6 +299,8 @@ struct SheepdogAIOCB { }; typedef struct BDRVSheepdogState { +BlockDriverState *bs; + SheepdogInode inode; uint32_t min_dirty_data_idx; @@ -318,8 +320,11 @@ typedef struct BDRVSheepdogState { Coroutine *co_recv; uint32_t aioreq_seq_num; + +/* Every aio request must be linked to either of these queues. */ QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head; QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head; +QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head; } BDRVSheepdogState; static const char * sd_strerror(int err) @@ -606,6 +611,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, enum AIOCBState aiocb_type); static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); +static int get_sheep_fd(BDRVSheepdogState *s); +static void co_write_request(void *opaque); static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid) { @@ -647,6 +654,51 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) } } +static coroutine_fn void reconnect_to_sdog(void *opaque) +{ +BDRVSheepdogState *s = opaque; +AIOReq *aio_req, *next; + +qemu_aio_set_fd_handler(s-fd, NULL, NULL, NULL); +close(s-fd); +s-fd = -1; + +/* Wait for outstanding write requests to be completed. */ +while (s-co_send != NULL) { +co_write_request(opaque); +} + +/* Try to reconnect the sheepdog server every one second. */ +while (s-fd 0) { +s-fd = get_sheep_fd(s); +if (s-fd 0) { +DPRINTF(Wait for connection to be established\n); +co_aio_sleep_ns(bdrv_get_aio_context(s-bs), QEMU_CLOCK_REALTIME, +10ULL); +} +}; + +/* + * Now we have to resend all the request in the inflight queue. However, + * resend_aioreq() can yield and newly created requests can be added to the + * inflight queue before the coroutine is resumed. To avoid mixing them, we + * have to move all the inflight requests to the failed queue before + * resend_aioreq() is called. + */ +QLIST_FOREACH_SAFE(aio_req, s-inflight_aio_head, aio_siblings, next) { +QLIST_REMOVE(aio_req, aio_siblings); +QLIST_INSERT_HEAD(s-failed_aio_head, aio_req, aio_siblings); +} + +/* Resend all the failed aio requests. */ +while (!QLIST_EMPTY(s-failed_aio_head)) { +aio_req = QLIST_FIRST(s-failed_aio_head); +QLIST_REMOVE(aio_req, aio_siblings); +QLIST_INSERT_HEAD(s-inflight_aio_head, aio_req, aio_siblings); +resend_aioreq(s, aio_req); +} +} + /* * Receive responses of the I/O requests. * @@ -663,15 +715,11 @@ static void coroutine_fn aio_read_response(void *opaque) SheepdogAIOCB *acb; uint64_t idx; -if (QLIST_EMPTY(s-inflight_aio_head)) { -goto out; -} - /* read a header */ ret = qemu_co_recv(fd, rsp, sizeof(rsp)); if (ret != sizeof(rsp)) { error_report(failed to get the header, %s, strerror(errno)); -goto out; +goto err; } /* find the right aio_req from the inflight aio list */ @@ -682,7 +730,7 @@ static void coroutine_fn aio_read_response(void *opaque) } if (!aio_req) { error_report(cannot find aio_req %x, rsp.id); -goto out; +goto err; } acb = aio_req-aiocb; @@ -722,7 +770,7 @@ static void coroutine_fn aio_read_response(void *opaque) aio_req-iov_offset, rsp.data_length); if (ret != rsp.data_length) { error_report(failed to get the data, %s, strerror(errno)); -goto out; +goto err; } break; case AIOCB_FLUSH_CACHE: @@ -756,10 +804,9 @@ static void coroutine_fn aio_read_response(void *opaque) if (s-inode.vdi_id == oid_to_vid(aio_req-oid)) { ret = reload_inode(s, 0, ); if (ret 0) { -goto out; +goto err; } } - if (is_data_obj(aio_req-oid)) { aio_req-oid = vid_to_data_oid(s-inode.vdi_id, data_oid_to_idx(aio_req-oid)); @@ -787,6 +834,10 @@ static void
[Qemu-devel] [PATCH v5 1/8] sheepdog: check return values of qemu_co_recv/send correctly
If qemu_co_recv/send doesn't return the specified length, it means that an error happened. Reviewed-by: Liu Yuan namei.u...@gmail.com Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp --- block/sheepdog.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 5f81c93..cb681de 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -489,13 +489,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, int ret; ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); -if (ret sizeof(*hdr)) { +if (ret != sizeof(*hdr)) { error_report(failed to send a req, %s, strerror(errno)); return ret; } ret = qemu_co_send(sockfd, data, *wlen); -if (ret *wlen) { +if (ret != *wlen) { error_report(failed to send a req, %s, strerror(errno)); } @@ -541,7 +541,7 @@ static coroutine_fn void do_co_req(void *opaque) qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co); ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); -if (ret sizeof(*hdr)) { +if (ret != sizeof(*hdr)) { error_report(failed to get a rsp, %s, strerror(errno)); ret = -errno; goto out; @@ -553,7 +553,7 @@ static coroutine_fn void do_co_req(void *opaque) if (*rlen) { ret = qemu_co_recv(sockfd, data, *rlen); -if (ret *rlen) { +if (ret != *rlen) { error_report(failed to get the data, %s, strerror(errno)); ret = -errno; goto out; @@ -664,7 +664,7 @@ static void coroutine_fn aio_read_response(void *opaque) /* read a header */ ret = qemu_co_recv(fd, rsp, sizeof(rsp)); -if (ret 0) { +if (ret != sizeof(rsp)) { error_report(failed to get the header, %s, strerror(errno)); goto out; } @@ -715,7 +715,7 @@ static void coroutine_fn aio_read_response(void *opaque) case AIOCB_READ_UDATA: ret = qemu_co_recvv(fd, acb-qiov-iov, acb-qiov-niov, aio_req-iov_offset, rsp.data_length); -if (ret 0) { +if (ret != rsp.data_length) { error_report(failed to get the data, %s, strerror(errno)); goto out; } @@ -1059,7 +1059,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, /* send a header */ ret = qemu_co_send(s-fd, hdr, sizeof(hdr)); -if (ret 0) { +if (ret != sizeof(hdr)) { qemu_co_mutex_unlock(s-lock); error_report(failed to send a req, %s, strerror(errno)); return -errno; @@ -1067,7 +1067,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, if (wlen) { ret = qemu_co_sendv(s-fd, iov, niov, aio_req-iov_offset, wlen); -if (ret 0) { +if (ret != wlen) { qemu_co_mutex_unlock(s-lock); error_report(failed to send a data, %s, strerror(errno)); return -errno; -- 1.7.10.4
[Qemu-devel] [PATCH v5 3/8] sheepdog: reload inode outside of resend_aioreq
This prepares for using resend_aioreq() after reconnecting to the sheepdog server. Reviewed-by: Liu Yuan namei.u...@gmail.com Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp --- block/sheepdog.c | 33 +++-- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 59cad97..5569e54 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -222,6 +222,11 @@ static inline uint64_t data_oid_to_idx(uint64_t oid) return oid (MAX_DATA_OBJS - 1); } +static inline uint32_t oid_to_vid(uint64_t oid) +{ +return (oid ~VDI_BIT) VDI_SPACE_SHIFT; +} + static inline uint64_t vid_to_vdi_oid(uint32_t vid) { return VDI_BIT | ((uint64_t)vid VDI_SPACE_SHIFT); @@ -600,7 +605,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type); static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); - +static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid) { @@ -748,6 +753,19 @@ static void coroutine_fn aio_read_response(void *opaque) case SD_RES_SUCCESS: break; case SD_RES_READONLY: +if (s-inode.vdi_id == oid_to_vid(aio_req-oid)) { +ret = reload_inode(s, 0, ); +if (ret 0) { +goto out; +} +} + +if (is_data_obj(aio_req-oid)) { +aio_req-oid = vid_to_data_oid(s-inode.vdi_id, + data_oid_to_idx(aio_req-oid)); +} else { +aio_req-oid = vid_to_vdi_oid(s-inode.vdi_id); +} ret = resend_aioreq(s, aio_req); if (ret == SD_RES_SUCCESS) { goto out; @@ -1185,19 +1203,6 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req-aiocb; bool create = false; -int ret; - -ret = reload_inode(s, 0, ); -if (ret 0) { -return ret; -} - -if (is_data_obj(aio_req-oid)) { -aio_req-oid = vid_to_data_oid(s-inode.vdi_id, - data_oid_to_idx(aio_req-oid)); -} else { -aio_req-oid = vid_to_vdi_oid(s-inode.vdi_id); -} /* check whether this request becomes a CoW one */ if (acb-aiocb_type == AIOCB_WRITE_UDATA is_data_obj(aio_req-oid)) { -- 1.7.10.4
[Qemu-devel] [PATCH v5 0/8] sheepdog: reconnect server after connection failure
Currently, if a sheepdog server exits, all the connecting VMs need to be restarted. This series implements a feature to reconnect the server, and enables us to do online sheepdog upgrade and avoid restarting VMs when sheepdog servers crash unexpectedly. v5: - Use AioContext timer for co_aio_sleep_ns(). v4: - Added comment to explain why we need a failed queue. - Fixed a return value of sd_acb_cancelable(). v3: - Check return values of qemu_co_recv/send more strictly. - Move inflight requests to the failed list after reconnection completes. This is necessary to resend I/Os while connection is lost. - Check simultaneous create in resend_aioreq(). v2: - Dropped nonblocking connect patches. MORITA Kazutaka (8): sheepdog: check return values of qemu_co_recv/send correctly sheepdog: handle vdi objects in resend_aio_req sheepdog: reload inode outside of resend_aioreq coroutine: add co_aio_sleep_ns() to allow sleep in block drivers sheepdog: try to reconnect to sheepdog after network error sheepdog: make add_aio_request and send_aioreq void functions sheepdog: cancel aio requests if possible sheepdog: check simultaneous create in resend_aioreq block/sheepdog.c | 322 - include/block/coroutine.h |9 ++ qemu-coroutine-sleep.c| 14 ++ 3 files changed, 226 insertions(+), 119 deletions(-) -- 1.7.10.4
[Qemu-devel] [PATCH v5 7/8] sheepdog: cancel aio requests if possible
This patch tries to cancel aio requests in pending queue and failed queue. When the sheepdog driver cannot cancel the requests, it waits for them to be completed. Reviewed-by: Liu Yuan namei.u...@gmail.com Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp --- block/sheepdog.c | 70 +- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 5846ac4..cb3a22d 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -294,7 +294,8 @@ struct SheepdogAIOCB { Coroutine *coroutine; void (*aio_done_func)(SheepdogAIOCB *); -bool canceled; +bool cancelable; +bool *finished; int nr_pending; }; @@ -413,6 +414,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req-aiocb; +acb-cancelable = false; QLIST_REMOVE(aio_req, aio_siblings); g_free(aio_req); @@ -421,23 +423,68 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) { -if (!acb-canceled) { -qemu_coroutine_enter(acb-coroutine, NULL); +qemu_coroutine_enter(acb-coroutine, NULL); +if (acb-finished) { +*acb-finished = true; } qemu_aio_release(acb); } +/* + * Check whether the specified acb can be canceled + * + * We can cancel aio when any request belonging to the acb is: + * - Not processed by the sheepdog server. + * - Not linked to the inflight queue. + */ +static bool sd_acb_cancelable(const SheepdogAIOCB *acb) +{ +BDRVSheepdogState *s = acb-common.bs-opaque; +AIOReq *aioreq; + +if (!acb-cancelable) { +return false; +} + +QLIST_FOREACH(aioreq, s-inflight_aio_head, aio_siblings) { +if (aioreq-aiocb == acb) { +return false; +} +} + +return true; +} + static void sd_aio_cancel(BlockDriverAIOCB *blockacb) { SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb; +BDRVSheepdogState *s = acb-common.bs-opaque; +AIOReq *aioreq, *next; +bool finished = false; + +acb-finished = finished; +while (!finished) { +if (sd_acb_cancelable(acb)) { +/* Remove outstanding requests from pending and failed queues. */ +QLIST_FOREACH_SAFE(aioreq, s-pending_aio_head, aio_siblings, + next) { +if (aioreq-aiocb == acb) { +free_aio_req(s, aioreq); +} +} +QLIST_FOREACH_SAFE(aioreq, s-failed_aio_head, aio_siblings, + next) { +if (aioreq-aiocb == acb) { +free_aio_req(s, aioreq); +} +} -/* - * Sheepdog cannot cancel the requests which are already sent to - * the servers, so we just complete the request with -EIO here. - */ -acb-ret = -EIO; -qemu_coroutine_enter(acb-coroutine, NULL); -acb-canceled = true; +assert(acb-nr_pending == 0); +sd_finish_aiocb(acb); +return; +} +qemu_aio_wait(); +} } static const AIOCBInfo sd_aiocb_info = { @@ -458,7 +505,8 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, acb-nb_sectors = nb_sectors; acb-aio_done_func = NULL; -acb-canceled = false; +acb-cancelable = true; +acb-finished = NULL; acb-coroutine = qemu_coroutine_self(); acb-ret = 0; acb-nr_pending = 0; -- 1.7.10.4
[Qemu-devel] [PATCH v5 8/8] sheepdog: check simultaneous create in resend_aioreq
After reconnection happens, all the inflight requests are moved to the failed request list. As a result, sd_co_rw_vector() can send another create request before resend_aioreq() resends a create request from the failed list. This patch adds a helper function check_simultaneous_create() and checks simultaneous create requests more strictly in resend_aioreq(). Reviewed-by: Liu Yuan namei.u...@gmail.com Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp --- block/sheepdog.c | 64 +++--- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index cb3a22d..c9ee273 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1283,6 +1283,29 @@ out: return ret; } +/* Return true if the specified request is linked to the pending list. */ +static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req) +{ +AIOReq *areq; +QLIST_FOREACH(areq, s-inflight_aio_head, aio_siblings) { +if (areq != aio_req areq-oid == aio_req-oid) { +/* + * Sheepdog cannot handle simultaneous create requests to the same + * object, so we cannot send the request until the previous request + * finishes. + */ +DPRINTF(simultaneous create to % PRIx64 \n, aio_req-oid); +aio_req-flags = 0; +aio_req-base_oid = 0; +QLIST_REMOVE(aio_req, aio_siblings); +QLIST_INSERT_HEAD(s-pending_aio_head, aio_req, aio_siblings); +return true; +} +} + +return false; +} + static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req-aiocb; @@ -1291,29 +1314,19 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) /* check whether this request becomes a CoW one */ if (acb-aiocb_type == AIOCB_WRITE_UDATA is_data_obj(aio_req-oid)) { int idx = data_oid_to_idx(aio_req-oid); -AIOReq *areq; -if (s-inode.data_vdi_id[idx] == 0) { -create = true; -goto out; -} if (is_data_obj_writable(s-inode, idx)) { goto out; } -/* link to the pending list if there is another CoW request to - * the same object */ -QLIST_FOREACH(areq, s-inflight_aio_head, aio_siblings) { -if (areq != aio_req areq-oid == aio_req-oid) { -DPRINTF(simultaneous CoW to % PRIx64 \n, aio_req-oid); -QLIST_REMOVE(aio_req, aio_siblings); -QLIST_INSERT_HEAD(s-pending_aio_head, aio_req, aio_siblings); -return; -} +if (check_simultaneous_create(s, aio_req)) { +return; } -aio_req-base_oid = vid_to_data_oid(s-inode.data_vdi_id[idx], idx); -aio_req-flags |= SD_FLAG_CMD_COW; +if (s-inode.data_vdi_id[idx]) { +aio_req-base_oid = vid_to_data_oid(s-inode.data_vdi_id[idx], idx); +aio_req-flags |= SD_FLAG_CMD_COW; +} create = true; } out: @@ -1937,27 +1950,14 @@ static int coroutine_fn sd_co_rw_vector(void *p) } aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done); +QLIST_INSERT_HEAD(s-inflight_aio_head, aio_req, aio_siblings); if (create) { -AIOReq *areq; -QLIST_FOREACH(areq, s-inflight_aio_head, aio_siblings) { -if (areq-oid == oid) { -/* - * Sheepdog cannot handle simultaneous create - * requests to the same object. So we cannot send - * the request until the previous request - * finishes. - */ -aio_req-flags = 0; -aio_req-base_oid = 0; -QLIST_INSERT_HEAD(s-pending_aio_head, aio_req, - aio_siblings); -goto done; -} +if (check_simultaneous_create(s, aio_req)) { +goto done; } } -QLIST_INSERT_HEAD(s-inflight_aio_head, aio_req, aio_siblings); add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, create, acb-aiocb_type); done: -- 1.7.10.4
[Qemu-devel] [PATCH v5 2/8] sheepdog: handle vdi objects in resend_aio_req
The current resend_aio_req() doesn't work when the request is against vdi objects. This fixes the problem. Reviewed-by: Liu Yuan namei.u...@gmail.com Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp --- block/sheepdog.c | 21 - 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index cb681de..59cad97 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1192,11 +1192,15 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) return ret; } -aio_req-oid = vid_to_data_oid(s-inode.vdi_id, - data_oid_to_idx(aio_req-oid)); +if (is_data_obj(aio_req-oid)) { +aio_req-oid = vid_to_data_oid(s-inode.vdi_id, + data_oid_to_idx(aio_req-oid)); +} else { +aio_req-oid = vid_to_vdi_oid(s-inode.vdi_id); +} /* check whether this request becomes a CoW one */ -if (acb-aiocb_type == AIOCB_WRITE_UDATA) { +if (acb-aiocb_type == AIOCB_WRITE_UDATA is_data_obj(aio_req-oid)) { int idx = data_oid_to_idx(aio_req-oid); AIOReq *areq; @@ -1224,8 +1228,15 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) create = true; } out: -return add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, - create, acb-aiocb_type); +if (is_data_obj(aio_req-oid)) { +return add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, + create, acb-aiocb_type); +} else { +struct iovec iov; +iov.iov_base = s-inode; +iov.iov_len = sizeof(s-inode); +return add_aio_request(s, aio_req, iov, 1, false, AIOCB_WRITE_UDATA); +} } /* TODO Convert to fine grained options */ -- 1.7.10.4
[Qemu-devel] [PATCHv2] block/vpc: check that the image has not been truncated
this adds a check that a dynamic VHD file has not been accidently truncated (e.g. during transfer or upload). Signed-off-by: Peter Lieven p...@kamp.de --- v1-v2: used the errp argument as Eric suggested block/vpc.c |7 +++ 1 file changed, 7 insertions(+) diff --git a/block/vpc.c b/block/vpc.c index b5dca39..627d11c 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -260,6 +260,13 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, } } +if (s-free_data_block_offset bdrv_getlength(bs-file)) { +error_setg(errp, block-vpc: free_data_block_offset points after + the end of file. The image has been truncated.); +ret = -EINVAL; +goto fail; +} + s-last_bitmap_offset = (int64_t) -1; #ifdef CACHE -- 1.7.9.5
[Qemu-devel] [RESEND][PATCH] migration: drop MADVISE_DONT_NEED for incoming zero pages
The madvise for zeroed out pages was introduced when every transferred zero page was memset to zero and thus allocated. Since commit 211ea740 we check for zeroness of a target page before we memset it to zero. Additionally we memmap target memory so it is essentially zero initialized (except for e.g. option roms and bios which are loaded into target memory although they shouldn't). It was reported recently that this madvise causes a performance degradation in some situations. As the madvise should only be called rarely and if it's called it is likely on a busy page (it was non-zero and changed to zero during migration) drop it completely. Reported-By: Zhang Haoyu haoyu.zh...@huawei.com Acked-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- arch_init.c |8 1 file changed, 8 deletions(-) diff --git a/arch_init.c b/arch_init.c index 7545d96..e0acbc5 100644 --- a/arch_init.c +++ b/arch_init.c @@ -850,14 +850,6 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) { if (ch != 0 || !is_zero_range(host, size)) { memset(host, ch, size); -#ifndef _WIN32 -if (ch == 0 (!kvm_enabled() || kvm_has_sync_mmu())) { -size = size ~(getpagesize() - 1); -if (size 0) { -qemu_madvise(host, size, QEMU_MADV_DONTNEED); -} -} -#endif } } -- 1.7.9.5
Re: [Qemu-devel] [PATCH 1/2] vga: allow non-global vmstate
Gerd Hoffmann a écrit : Need a way to opt-out from vga.vram being global vmstate, for secondary vga cards. Add a bool parameter to vga_common_init to support this. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/display/cirrus_vga.c | 4 ++-- hw/display/qxl.c| 2 +- hw/display/vga-isa-mm.c | 2 +- hw/display/vga-isa.c| 2 +- hw/display/vga-pci.c| 2 +- hw/display/vga.c| 4 ++-- hw/display/vga_int.h| 2 +- hw/display/vmware_vga.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) [...] diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c index b3a45c8..dee180f 100644 --- a/hw/display/vga-pci.c +++ b/hw/display/vga-pci.c @@ -147,7 +147,7 @@ static int pci_std_vga_initfn(PCIDevice *dev) VGACommonState *s = d-vga; /* vga + console init */ -vga_common_init(s, OBJECT(dev)); +vga_common_init(s, OBJECT(dev), true); vga_init(s, OBJECT(dev), pci_address_space(dev), pci_address_space_io(dev), true); diff --git a/hw/display/vga.c b/hw/display/vga.c index 7b91d9c..fea30e5 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -2257,7 +2257,7 @@ static const GraphicHwOps vga_ops = { .text_update = vga_update_text, }; -void vga_common_init(VGACommonState *s, Object *obj) +void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate) { int i, j, v, b; @@ -2294,7 +2294,7 @@ void vga_common_init(VGACommonState *s, Object *obj) s-is_vbe_vmstate = 1; memory_region_init_ram(s-vram, obj, vga.vram, s-vram_size); -vmstate_register_ram_global(s-vram); +vmstate_register_ram(s-vram, global_vmstate ? NULL : DEVICE(obj)); Is it possible to do it depending of the QEMU compatibility version, or of some property, so we don't have to keep this global vmstate forever? With this, I think we can also expect to be able to specify -device VGA twice, and see the BIOS messages on one card or another. Hervé
[Qemu-devel] [PATCHv6 00/17] block: logical block provisioning enhancements
this patch adds the ability for targets to stay sparse during block migration (if the zero_blocks capability is set) and qemu-img convert even if the target does not have has_zero_init = 1. the series was especially developed for iSCSI, but it should also work with other drivers with little or no adjustments. these adjustments should be limited to providing block provisioning information through get_block_info and/or honouring BDRV_REQ_MAY_UNMAP on writing zeroes. v5-v6: - protected iscsi_co_write_zeroes by the existence of the SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED macro. This is ugly but necessary because the semantic of iscsi_writesame16_task silently changed between libiscsi 1.8.0 and 1.9.0. The above macro was the first added after the change. I already contacted Ronnie to introduce an API version macro which has to be bumped on each new function that will be added. Changes to the parameters should not happen at all of course. v4-v5: - new patches 4-6 to move the block provisioning information to the BlockDriverInfo. - kept 2 wrappers to read the information from the BDI and renamed them to make more clear what they do: bdrv_has_discard_zeroes - bdrv_unallocated_blocks_are_zero bdrv_has_discard_write_zeroes - bdrv_can_write_zeroes_with_unmap - added additional information about the 2 flags in the BDI struct in block.h v3-v4: - changed BlockLimits struct to typedef (Stefan, Eric) - renamed bdrv_zeroize to bdrv_make_zero (Stefan) - added comment about the -S flag of qemu-img convert in qemu-img.texi (Eric) - used struct assignment for bs-bl in raw_open (Stefan, Eric) - dropped 3 get_block_status fixes that are independent of this series and already partly merged. v2-v3: - fix merge conflict in block/qcow2_cluster.c - changed return type of bdrv_has_discard_zeroes and bdrv_has_discard_write_zeroes to bool. - moved alignment and limits info to a BlockLimits struct (Paolo). - added magic constanst for default maximum in bdrv_co_do_write_zeroes and bdrv_co_discard (Eric). - bdrv_co_do_write_zeroes: allocating the bounce buffer only once (Eric), fixed bounce iov_len in the fall back path. - bdrv_zeroize: added inline docu (Eric) and do not mask flags passed to bdrv_write_zeroes (Eric). - qemu-img: changed the default hint for -S (min_sparse) in the usage help to 4k. not changing the default as it is unclear why this default was set. size suffixes are already supported (Eric). v1-v2: - moved block max_discard and max_write_zeroes to BlockDriverState - added discard_alignment and write_zeroes_alignment to BlockDriverState - added bdrv_has_discard_zeroes() and bdrv_has_discard_write_zeroes() - added logic to bdrv_co_discard and bdrv_co_do_write_zeroes to honour limit and alignment info. - added support for -S 0 in qemu-img convert. Peter Lieven (17): block: make BdrvRequestFlags public block: add flags to bdrv_*_write_zeroes block: introduce BDRV_REQ_MAY_UNMAP request flag block: add logical block provisioning info to BlockDriverInfo block: add wrappers for logical block provisioning information block/iscsi: add .bdrv_get_info block: add BlockLimits structure to BlockDriverState block: honour BlockLimits in bdrv_co_do_write_zeroes block: honour BlockLimits in bdrv_co_discard iscsi: simplify iscsi_co_discard iscsi: set limits in BlockDriverState iscsi: add bdrv_co_write_zeroes block: introduce bdrv_make_zero block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks qemu-img: add support for fully allocated images qemu-img: conditionally zero out target on convert block/raw: copy BlockLimits on raw_open Peter Lieven (17): block: make BdrvRequestFlags public block: add flags to bdrv_*_write_zeroes block: introduce BDRV_REQ_MAY_UNMAP request flag block: add logical block provisioning info to BlockDriverInfo block: add wrappers for logical block provisioning information block/iscsi: add .bdrv_get_info block: add BlockLimits structure to BlockDriverState block: honour BlockLimits in bdrv_co_do_write_zeroes block: honour BlockLimits in bdrv_co_discard iscsi: simplify iscsi_co_discard iscsi: set limits in BlockDriverState iscsi: add bdrv_co_write_zeroes block: introduce bdrv_make_zero block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks qemu-img: add support for fully allocated images qemu-img: conditionally zero out target on convert block/raw: copy BlockLimits on raw_open block-migration.c |3 +- block.c | 200 + block/backup.c|3 +- block/iscsi.c | 150 +- block/qcow2-cluster.c |2 +- block/qcow2.c |2 +- block/qed.c |3 +- block/raw_bsd.c |6 +- block/vmdk.c |3 +- include/block/block.h | 35 +++-
[Qemu-devel] [PATCHv6 04/17] block: add logical block provisioning info to BlockDriverInfo
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- include/block/block.h | 16 1 file changed, 16 insertions(+) diff --git a/include/block/block.h b/include/block/block.h index 1f30a56..9c76967 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -18,6 +18,22 @@ typedef struct BlockDriverInfo { /* offset at which the VM state can be saved (0 if not possible) */ int64_t vm_state_offset; bool is_dirty; +/* + * True if unallocated blocks read back as zeroes. This is equivalent + * to the the LBPRZ flag in the SCSI logical block provisioning page. + */ +bool unallocated_blocks_are_zero; +/* + * True if the driver can optimize writing zeroes by unmapping + * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux + * with the difference that in qemu a discard is allowed to silently + * fail. Therefore we have to use bdrv_write_zeroes with the + * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping. + * After this call the driver has to guarantee that the contents read + * back as zero. It is additionally required that the block device is + * opened with BDRV_O_UNMAP flag for this to work. + */ +bool can_write_zeroes_with_unmap; } BlockDriverInfo; typedef struct BlockFragInfo { -- 1.7.9.5
[Qemu-devel] [PATCHv6 01/17] block: make BdrvRequestFlags public
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c |5 - include/block/block.h |5 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index fd05a80..eb11a07 100644 --- a/block.c +++ b/block.c @@ -51,11 +51,6 @@ #define NOT_DONE 0x7fff /* used while emulated sync operation in progress */ -typedef enum { -BDRV_REQ_COPY_ON_READ = 0x1, -BDRV_REQ_ZERO_WRITE = 0x2, -} BdrvRequestFlags; - static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, diff --git a/include/block/block.h b/include/block/block.h index 3560deb..ba2082c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -62,6 +62,11 @@ typedef struct BlockDevOps { void (*resize_cb)(void *opaque); } BlockDevOps; +typedef enum { +BDRV_REQ_COPY_ON_READ = 0x1, +BDRV_REQ_ZERO_WRITE = 0x2, +} BdrvRequestFlags; + #define BDRV_O_RDWR0x0002 #define BDRV_O_SNAPSHOT0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ -- 1.7.9.5
[Qemu-devel] [PATCHv6 03/17] block: introduce BDRV_REQ_MAY_UNMAP request flag
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block-migration.c |3 ++- block.c |4 block/backup.c|2 +- include/block/block.h |7 +++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/block-migration.c b/block-migration.c index 713a8e3..fc4ef93 100644 --- a/block-migration.c +++ b/block-migration.c @@ -780,7 +780,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) } if (flags BLK_MIG_FLAG_ZERO_BLOCK) { -ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0); +ret = bdrv_write_zeroes(bs, addr, nr_sectors, +BDRV_REQ_MAY_UNMAP); } else { buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); diff --git a/block.c b/block.c index 3259429..0d97ce6 100644 --- a/block.c +++ b/block.c @@ -2810,6 +2810,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, { trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); +if (!(bs-open_flags BDRV_O_UNMAP)) { +flags = ~BDRV_REQ_MAY_UNMAP; +} + return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, BDRV_REQ_ZERO_WRITE | flags); } diff --git a/block/backup.c b/block/backup.c index 830a179..0198514 100644 --- a/block/backup.c +++ b/block/backup.c @@ -139,7 +139,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (buffer_is_zero(iov.iov_base, iov.iov_len)) { ret = bdrv_co_write_zeroes(job-target, start * BACKUP_SECTORS_PER_CLUSTER, - n, 0); + n, BDRV_REQ_MAY_UNMAP); } else { ret = bdrv_co_writev(job-target, start * BACKUP_SECTORS_PER_CLUSTER, n, diff --git a/include/block/block.h b/include/block/block.h index 8ba9f0c..1f30a56 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -65,6 +65,13 @@ typedef struct BlockDevOps { typedef enum { BDRV_REQ_COPY_ON_READ = 0x1, BDRV_REQ_ZERO_WRITE = 0x2, +/* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver + * is allowed to optimize a write zeroes request by unmapping (discarding) + * blocks if it is guaranteed that the result will read back as + * zeroes. The flag is only passed to the driver if the block device is + * opened with BDRV_O_UNMAP. + */ +BDRV_REQ_MAY_UNMAP= 0x4, } BdrvRequestFlags; #define BDRV_O_RDWR0x0002 -- 1.7.9.5
[Qemu-devel] [PATCHv6 08/17] block: honour BlockLimits in bdrv_co_do_write_zeroes
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 65 +++ 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/block.c b/block.c index 0601b02..0c0b0ac 100644 --- a/block.c +++ b/block.c @@ -2703,32 +2703,65 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, BDRV_REQ_COPY_ON_READ); } +/* if no limit is specified in the BlockLimits use a default + * of 32768 512-byte sectors (16 MiB) per request. + */ +#define MAX_WRITE_ZEROES_DEFAULT 32768 + static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { BlockDriver *drv = bs-drv; QEMUIOVector qiov; -struct iovec iov; -int ret; +struct iovec iov = {0}; +int ret = 0; -/* TODO Emulate only part of misaligned requests instead of letting block - * drivers return -ENOTSUP and emulate everything */ +int max_write_zeroes = bs-bl.max_write_zeroes ? + bs-bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT; -/* First try the efficient write zeroes operation */ -if (drv-bdrv_co_write_zeroes) { -ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); -if (ret != -ENOTSUP) { -return ret; +while (nb_sectors 0 !ret) { +int num = nb_sectors; + +/* align request */ +if (bs-bl.write_zeroes_alignment +num = bs-bl.write_zeroes_alignment +sector_num % bs-bl.write_zeroes_alignment) { +if (num bs-bl.write_zeroes_alignment) { +num = bs-bl.write_zeroes_alignment; +} +num -= sector_num % bs-bl.write_zeroes_alignment; } -} -/* Fall back to bounce buffer if write zeroes is unsupported */ -iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; -iov.iov_base = qemu_blockalign(bs, iov.iov_len); -memset(iov.iov_base, 0, iov.iov_len); -qemu_iovec_init_external(qiov, iov, 1); +/* limit request size */ +if (num max_write_zeroes) { +num = max_write_zeroes; +} + +ret = -ENOTSUP; +/* First try the efficient write zeroes operation */ +if (drv-bdrv_co_write_zeroes) { +ret = drv-bdrv_co_write_zeroes(bs, sector_num, num, flags); +} + +if (ret == -ENOTSUP) { +/* Fall back to bounce buffer if write zeroes is unsupported */ +iov.iov_len = num * BDRV_SECTOR_SIZE; +if (iov.iov_base == NULL) { +/* allocate bounce buffer only once and ensure that it + * is big enough for this and all future requests. + */ +size_t bufsize = num = nb_sectors ? num : max_write_zeroes; +iov.iov_base = qemu_blockalign(bs, bufsize * BDRV_SECTOR_SIZE); +memset(iov.iov_base, 0, bufsize * BDRV_SECTOR_SIZE); +} +qemu_iovec_init_external(qiov, iov, 1); -ret = drv-bdrv_co_writev(bs, sector_num, nb_sectors, qiov); +ret = drv-bdrv_co_writev(bs, sector_num, num, qiov); +} + +sector_num += num; +nb_sectors -= num; +} qemu_vfree(iov.iov_base); return ret; -- 1.7.9.5
[Qemu-devel] [PATCHv6 02/17] block: add flags to bdrv_*_write_zeroes
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block-migration.c |2 +- block.c | 20 +++- block/backup.c|3 ++- block/qcow2-cluster.c |2 +- block/qcow2.c |2 +- block/qed.c |3 ++- block/raw_bsd.c |5 +++-- block/vmdk.c |3 ++- include/block/block.h |4 ++-- include/block/block_int.h |2 +- qemu-io-cmds.c|2 +- 11 files changed, 27 insertions(+), 21 deletions(-) diff --git a/block-migration.c b/block-migration.c index daf9ec1..713a8e3 100644 --- a/block-migration.c +++ b/block-migration.c @@ -780,7 +780,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) } if (flags BLK_MIG_FLAG_ZERO_BLOCK) { -ret = bdrv_write_zeroes(bs, addr, nr_sectors); +ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0); } else { buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); diff --git a/block.c b/block.c index eb11a07..3259429 100644 --- a/block.c +++ b/block.c @@ -79,7 +79,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, bool is_write); static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, -int64_t sector_num, int nb_sectors); +int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -2384,10 +2384,11 @@ int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov) return bdrv_rwv_co(bs, sector_num, qiov, true, 0); } -int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) { return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, - BDRV_REQ_ZERO_WRITE); + BDRV_REQ_ZERO_WRITE | flags); } int bdrv_pread(BlockDriverState *bs, int64_t offset, @@ -2569,7 +2570,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, if (drv-bdrv_co_write_zeroes buffer_is_zero(bounce_buffer, iov.iov_len)) { ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, - cluster_nb_sectors); + cluster_nb_sectors, 0); } else { /* This does not change the data on the disk, it is not necessary * to flush even in cache=writethrough mode. @@ -2703,7 +2704,7 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, } static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, -int64_t sector_num, int nb_sectors) +int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { BlockDriver *drv = bs-drv; QEMUIOVector qiov; @@ -2715,7 +2716,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, /* First try the efficient write zeroes operation */ if (drv-bdrv_co_write_zeroes) { -ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors); +ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); if (ret != -ENOTSUP) { return ret; } @@ -2770,7 +2771,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, if (ret 0) { /* Do nothing, write notifier decided to fail this request */ } else if (flags BDRV_REQ_ZERO_WRITE) { -ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors); +ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); } else { ret = drv-bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } @@ -2804,12 +2805,13 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, } int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, int nb_sectors, + BdrvRequestFlags flags) { trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, - BDRV_REQ_ZERO_WRITE); + BDRV_REQ_ZERO_WRITE | flags); } /** diff --git a/block/backup.c b/block/backup.c index cad14c9..830a179 100644 --- a/block/backup.c +++ b/block/backup.c @@ -138,7 +138,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (buffer_is_zero(iov.iov_base, iov.iov_len)) { ret = bdrv_co_write_zeroes(job-target, - start * BACKUP_SECTORS_PER_CLUSTER, n); +
[Qemu-devel] [PATCHv6 06/17] block/iscsi: add .bdrv_get_info
Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c |9 + 1 file changed, 9 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index a2a961e..1dbbcad 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1506,6 +1506,14 @@ out: return ret; } +static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ +IscsiLun *iscsilun = bs-opaque; +bdi-unallocated_blocks_are_zero = !!iscsilun-lbprz; +bdi-can_write_zeroes_with_unmap = iscsilun-lbprz iscsilun-lbp.lbpws; +return 0; +} + static QEMUOptionParameter iscsi_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1527,6 +1535,7 @@ static BlockDriver bdrv_iscsi = { .create_options = iscsi_create_options, .bdrv_getlength = iscsi_getlength, +.bdrv_get_info = iscsi_get_info, .bdrv_truncate = iscsi_truncate, #if defined(LIBISCSI_FEATURE_IOVECTOR) -- 1.7.9.5
[Qemu-devel] [PATCHv6 11/17] iscsi: set limits in BlockDriverState
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index 47b9cc9..c0465aa 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1367,6 +1367,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, sizeof(struct scsi_inquiry_block_limits)); scsi_free_scsi_task(task); task = NULL; + +if (iscsilun-bl.max_unmap 0x) { +bs-bl.max_discard = sector_lun2qemu(iscsilun-bl.max_unmap, + iscsilun); +} +bs-bl.discard_alignment = sector_lun2qemu(iscsilun-bl.opt_unmap_gran, + iscsilun); + +if (iscsilun-bl.max_ws_len 0x) { +bs-bl.max_write_zeroes = sector_lun2qemu(iscsilun-bl.max_ws_len, + iscsilun); +} +bs-bl.write_zeroes_alignment = sector_lun2qemu(iscsilun-bl.opt_unmap_gran, +iscsilun); } #if defined(LIBISCSI_FEATURE_NOP_COUNTER) -- 1.7.9.5
[Qemu-devel] [PATCHv6 05/17] block: add wrappers for logical block provisioning information
This adds 2 wrappers to read the unallocated_blocks_are_zero and can_write_zeroes_with_unmap info from the BDI. The wrappers are required to check for the existence of a backing_hd and if the devices are opened with the correct flags. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 30 ++ include/block/block.h |2 ++ 2 files changed, 32 insertions(+) diff --git a/block.c b/block.c index 0d97ce6..0601b02 100644 --- a/block.c +++ b/block.c @@ -3094,6 +3094,36 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 0; } +bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) +{ +BlockDriverInfo bdi; + +if (bs-backing_hd) { +return false; +} + +if (bdrv_get_info(bs, bdi) == 0) { +return bdi.unallocated_blocks_are_zero; +} + +return false; +} + +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) +{ +BlockDriverInfo bdi; + +if (bs-backing_hd || !(bs-open_flags BDRV_O_UNMAP)) { +return false; +} + +if (bdrv_get_info(bs, bdi) == 0) { +return bdi.can_write_zeroes_with_unmap; +} + +return false; +} + typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; diff --git a/include/block/block.h b/include/block/block.h index 9c76967..803c5ca 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -344,6 +344,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); +bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, -- 1.7.9.5
[Qemu-devel] [PATCHv6 09/17] block: honour BlockLimits in bdrv_co_discard
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 37 - 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 0c0b0ac..b28dd42 100644 --- a/block.c +++ b/block.c @@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque) rwco-ret = bdrv_co_discard(rwco-bs, rwco-sector_num, rwco-nb_sectors); } +/* if no limit is specified in the BlockLimits use a default + * of 32768 512-byte sectors (16 MiB) per request. + */ +#define MAX_DISCARD_DEFAULT 32768 + int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { @@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, } if (bs-drv-bdrv_co_discard) { -return bs-drv-bdrv_co_discard(bs, sector_num, nb_sectors); +int max_discard = bs-bl.max_discard ? + bs-bl.max_discard : MAX_DISCARD_DEFAULT; + +while (nb_sectors 0) { +int ret; +int num = nb_sectors; + +/* align request */ +if (bs-bl.discard_alignment +num = bs-bl.discard_alignment +sector_num % bs-bl.discard_alignment) { +if (num bs-bl.discard_alignment) { +num = bs-bl.discard_alignment; +} +num -= sector_num % bs-bl.discard_alignment; +} + +/* limit request size */ +if (num max_discard) { +num = max_discard; +} + +ret = bs-drv-bdrv_co_discard(bs, sector_num, num); +if (ret) { +return ret; +} + +sector_num += num; +nb_sectors -= num; +} +return 0; } else if (bs-drv-bdrv_aio_discard) { BlockDriverAIOCB *acb; CoroutineIOCompletion co = { -- 1.7.9.5
[Qemu-devel] [PATCHv6 13/17] block: introduce bdrv_make_zero
this patch adds a call to completely zero out a block device. the operation is sped up by checking the block status and only writing zeroes to the device if they currently do not return zeroes. optionally the zero writing can be sped up by setting the flag BDRV_REQ_MAY_UNMAP to emulate the zero write by unmapping if the driver supports it. Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 37 + include/block/block.h |1 + 2 files changed, 38 insertions(+) diff --git a/block.c b/block.c index b28dd42..21a992a 100644 --- a/block.c +++ b/block.c @@ -2391,6 +2391,43 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, BDRV_REQ_ZERO_WRITE | flags); } +/* + * Completely zero out a block device with the help of bdrv_write_zeroes. + * The operation is sped up by checking the block status and only writing + * zeroes to the device if they currently do not return zeroes. Optional + * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP). + * + * Returns 0 on error, 0 on success. For error codes see bdrv_write(). + */ +int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) +{ +int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE; +int64_t ret, nb_sectors, sector_num = 0; +int n; + +for (;;) { +nb_sectors = target_size - sector_num; +if (nb_sectors = 0) { +return 0; +} +if (nb_sectors INT_MAX) { +nb_sectors = INT_MAX; +} +ret = bdrv_get_block_status(bs, sector_num, nb_sectors, n); +if (ret BDRV_BLOCK_ZERO) { +sector_num += n; +continue; +} +ret = bdrv_write_zeroes(bs, sector_num, n, flags); +if (ret 0) { +error_report(error writing zeroes at sector % PRId64 : %s, + sector_num, strerror(-ret)); +return ret; +} +sector_num += n; +} +} + int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count1) { diff --git a/include/block/block.h b/include/block/block.h index 803c5ca..4d9e67c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -216,6 +216,7 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); +int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags); int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); -- 1.7.9.5
[Qemu-devel] [PATCHv6 07/17] block: add BlockLimits structure to BlockDriverState
this patch adds BlockLimits which introduces discard and write_zeroes limits and alignment information to the BlockDriverState. Signed-off-by: Peter Lieven p...@kamp.de --- include/block/block_int.h | 17 + 1 file changed, 17 insertions(+) diff --git a/include/block/block_int.h b/include/block/block_int.h index 9bbaa29..33be247 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -227,6 +227,20 @@ struct BlockDriver { QLIST_ENTRY(BlockDriver) list; }; +typedef struct BlockLimits { +/* maximum number of sectors that can be discarded at once */ +int max_discard; + +/* optimal alignment for discard requests in sectors */ +int64_t discard_alignment; + +/* maximum number of sectors that can zeroized at once */ +int max_write_zeroes; + +/* optimal alignment for write zeroes requests in sectors */ +int64_t write_zeroes_alignment; +} BlockLimits; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates, so if you add new fields to this struct, please @@ -280,6 +294,9 @@ struct BlockDriverState { uint64_t total_time_ns[BDRV_MAX_IOTYPE]; uint64_t wr_highest_sector; +/* I/O Limits */ +BlockLimits bl; + /* Whether the disk can expand beyond total_sectors */ int growable; -- 1.7.9.5
[Qemu-devel] [PATCHv6 10/17] iscsi: simplify iscsi_co_discard
now that bdrv_co_discard can handle limits we do not need the request split logic here anymore. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c | 67 + 1 file changed, 25 insertions(+), 42 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 1dbbcad..47b9cc9 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -87,7 +87,6 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 #define ISCSI_CMD_RETRIES 5 -#define ISCSI_MAX_UNMAP 131072 static void iscsi_bh_cb(void *p) @@ -912,8 +911,6 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, IscsiLun *iscsilun = bs-opaque; struct IscsiTask iTask; struct unmap_list list; -uint32_t nb_blocks; -uint32_t max_unmap; if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { return -EINVAL; @@ -925,52 +922,38 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, } list.lba = sector_qemu2lun(sector_num, iscsilun); -nb_blocks = sector_qemu2lun(nb_sectors, iscsilun); +list.num = sector_qemu2lun(nb_sectors, iscsilun); -max_unmap = iscsilun-bl.max_unmap; -if (max_unmap == 0x) { -max_unmap = ISCSI_MAX_UNMAP; -} - -while (nb_blocks 0) { -iscsi_co_init_iscsitask(iscsilun, iTask); -list.num = nb_blocks; -if (list.num max_unmap) { -list.num = max_unmap; -} +iscsi_co_init_iscsitask(iscsilun, iTask); retry: -if (iscsi_unmap_task(iscsilun-iscsi, iscsilun-lun, 0, 0, list, 1, - iscsi_co_generic_cb, iTask) == NULL) { -return -EIO; -} - -while (!iTask.complete) { -iscsi_set_events(iscsilun); -qemu_coroutine_yield(); -} +if (iscsi_unmap_task(iscsilun-iscsi, iscsilun-lun, 0, 0, list, 1, + iscsi_co_generic_cb, iTask) == NULL) { +return -EIO; +} -if (iTask.task != NULL) { -scsi_free_scsi_task(iTask.task); -iTask.task = NULL; -} +while (!iTask.complete) { +iscsi_set_events(iscsilun); +qemu_coroutine_yield(); +} -if (iTask.do_retry) { -goto retry; -} +if (iTask.task != NULL) { +scsi_free_scsi_task(iTask.task); +iTask.task = NULL; +} -if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { -/* the target might fail with a check condition if it - is not happy with the alignment of the UNMAP request - we silently fail in this case */ -return 0; -} +if (iTask.do_retry) { +goto retry; +} -if (iTask.status != SCSI_STATUS_GOOD) { -return -EIO; -} +if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { +/* the target might fail with a check condition if it + is not happy with the alignment of the UNMAP request + we silently fail in this case */ +return 0; +} -list.lba += list.num; -nb_blocks -= list.num; +if (iTask.status != SCSI_STATUS_GOOD) { +return -EIO; } return 0; -- 1.7.9.5
[Qemu-devel] [PATCHv6 12/17] iscsi: add bdrv_co_write_zeroes
Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c | 64 + 1 file changed, 64 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index c0465aa..014475d 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -56,6 +56,7 @@ typedef struct IscsiLun { uint8_t lbprz; struct scsi_inquiry_logical_block_provisioning lbp; struct scsi_inquiry_block_limits bl; +unsigned char *zeroblock; } IscsiLun; typedef struct IscsiTask { @@ -959,6 +960,65 @@ retry: return 0; } +#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED) + +static int +coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) +{ +IscsiLun *iscsilun = bs-opaque; +struct IscsiTask iTask; +uint64_t lba; +uint32_t nb_blocks; + +if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { +return -EINVAL; +} + +if (!iscsilun-lbp.lbpws) { +/* WRITE SAME is not supported by the target */ +return -ENOTSUP; +} + +lba = sector_qemu2lun(sector_num, iscsilun); +nb_blocks = sector_qemu2lun(nb_sectors, iscsilun); + +if (iscsilun-zeroblock == NULL) { +iscsilun-zeroblock = g_malloc0(iscsilun-block_size); +} + +iscsi_co_init_iscsitask(iscsilun, iTask); +retry: +if (iscsi_writesame16_task(iscsilun-iscsi, iscsilun-lun, lba, + iscsilun-zeroblock, iscsilun-block_size, + nb_blocks, 0, !!(flags BDRV_REQ_MAY_UNMAP), + 0, 0, iscsi_co_generic_cb, iTask) == NULL) { +return -EIO; +} + +while (!iTask.complete) { +iscsi_set_events(iscsilun); +qemu_coroutine_yield(); +} + +if (iTask.task != NULL) { +scsi_free_scsi_task(iTask.task); +iTask.task = NULL; +} + +if (iTask.do_retry) { +goto retry; +} + +if (iTask.status != SCSI_STATUS_GOOD) { +return -EIO; +} + +return 0; +} + +#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */ + static int parse_chap(struct iscsi_context *iscsi, const char *target) { QemuOptsList *list; @@ -1421,6 +1481,7 @@ static void iscsi_close(BlockDriverState *bs) } qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL); iscsi_destroy_context(iscsi); +g_free(iscsilun-zeroblock); memset(iscsilun, 0, sizeof(IscsiLun)); } @@ -1539,6 +1600,9 @@ static BlockDriver bdrv_iscsi = { .bdrv_co_get_block_status = iscsi_co_get_block_status, #endif .bdrv_co_discard = iscsi_co_discard, +#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED) +.bdrv_co_write_zeroes = iscsi_co_write_zeroes, +#endif .bdrv_aio_readv = iscsi_aio_readv, .bdrv_aio_writev = iscsi_aio_writev, -- 1.7.9.5
[Qemu-devel] [PATCHv6 14/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
this patch does 2 things: a) only do additional call outs if BDRV_BLOCK_ZERO is not already set. b) use the newly introduced bdrv_has_discard_zeroes() to return the zero state of an unallocated block. the used callout to bdrv_has_zero_init() is only valid right after bdrv_create. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 21a992a..69a2d2b 100644 --- a/block.c +++ b/block.c @@ -3263,8 +3263,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, *pnum, pnum); } -if (!(ret BDRV_BLOCK_DATA)) { -if (bdrv_has_zero_init(bs)) { +if (!(ret BDRV_BLOCK_DATA) !(ret BDRV_BLOCK_ZERO)) { +if (bdrv_unallocated_blocks_are_zero(bs)) { ret |= BDRV_BLOCK_ZERO; } else if (bs-backing_hd) { BlockDriverState *bs2 = bs-backing_hd; -- 1.7.9.5
[Qemu-devel] [PATCHv6 16/17] qemu-img: conditionally zero out target on convert
If the target has_zero_init = 0, but supports efficiently writing zeroes by unmapping we call bdrv_make_zero to avoid fully allocating the target. This currently is designed especially for iscsi. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- qemu-img.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index c6eff15..fe0bdb1 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1353,7 +1353,7 @@ static int img_convert(int argc, char **argv) } } -flags = BDRV_O_RDWR; +flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR; ret = bdrv_parse_cache_flags(cache, flags); if (ret 0) { error_report(Invalid cache option: %s, cache); @@ -1469,6 +1469,14 @@ static int img_convert(int argc, char **argv) } else { int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0; +if (!has_zero_init bdrv_can_write_zeroes_with_unmap(out_bs)) { +ret = bdrv_make_zero(out_bs, BDRV_REQ_MAY_UNMAP); +if (ret 0) { +goto out; +} +has_zero_init = 1; +} + sector_num = 0; // total number of sectors converted so far nb_sectors = total_sectors - sector_num; if (nb_sectors != 0) { -- 1.7.9.5
[Qemu-devel] [PATCHv6 17/17] block/raw: copy BlockLimits on raw_open
Signed-off-by: Peter Lieven p...@kamp.de --- block/raw_bsd.c |1 + 1 file changed, 1 insertion(+) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index b0dd23f..49ac18c 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { bs-sg = bs-file-sg; +bs-bl = bs-file-bl; return 0; } -- 1.7.9.5
[Qemu-devel] [PATCHv6 15/17] qemu-img: add support for fully allocated images
Signed-off-by: Peter Lieven p...@kamp.de --- qemu-img.c|8 +--- qemu-img.texi |5 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 926f0a0..c6eff15 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -100,8 +100,10 @@ static void help(void) '-h' with or without a command shows this help and lists the supported formats\n '-p' show progress of command (only certain commands)\n '-q' use Quiet mode - do not print any output (except errors)\n - '-S' indicates the consecutive number of bytes that must contain only zeros\n - for qemu-img to create a sparse image during conversion\n + '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n + contain only zeros for qemu-img to create a sparse image during\n + conversion. if the number of bytes is 0 sparse files are disabled and\n + images will always be fully allocated\n '--output' takes the format in which the output must be done (human or json)\n '-n' skips the target volume creation (useful if the volume is created\n prior to running qemu-img)\n @@ -1465,7 +1467,7 @@ static int img_convert(int argc, char **argv) /* signal EOF to align */ bdrv_write_compressed(out_bs, 0, NULL, 0); } else { -int has_zero_init = bdrv_has_zero_init(out_bs); +int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0; sector_num = 0; // total number of sectors converted so far nb_sectors = total_sectors - sector_num; diff --git a/qemu-img.texi b/qemu-img.texi index 768054e..51a1ee5 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -193,6 +193,11 @@ Image conversion is also useful to get smaller image when using a growable format such as @code{qcow} or @code{cow}: the empty sectors are detected and suppressed from the destination image. +@var{sparse_size} indicates the consecutive number of bytes (defaults to 4k) +that must contain only zeros for qemu-img to create a sparse image during +conversion. If the number of bytes is 0 sparse files are disabled and +images will always be fully allocated. + You can use the @var{backing_file} option to force the output image to be created as a copy on write image of the specified base image; the @var{backing_file} should have the same content as the input's base image, -- 1.7.9.5
Re: [Qemu-devel] [PATCH 02/10] sysbus: Set cannot_instantiate_with_device_add_yet
Peter Maydell peter.mayd...@linaro.org writes: On 17 October 2013 14:54, arm...@redhat.com wrote: From: Markus Armbruster arm...@redhat.com device_add plugs devices into suitable bus. For real buses, that actually connects the device. For sysbus, the connections need to be made separately, and device_add can't do that. The device would be left unconncected, and could not possibly work. unconnected Will fix. Many, but not all sysbus devices alreasy set already Will fix. cannot_instantiate_with_device_add_yet in their class init function. Set it in their abstract base's class init function sysbus_device_class_init(), and remove the now redundant assignments from device class init functions. So I think this change is probably OK (but see my comments on patch 1 about what our definition of the flag is supposed to be). But I'd like to see a list of the devices which this patch makes no-user which previously weren't. Then I could eyeball the list and check whether there's anything in it which shouldn't be. I'll include that list in v2. Thanks!
Re: [Qemu-devel] [PATCH 09/10] isa: Clean up use of cannot_instantiate_with_device_add_yet
Peter Maydell peter.mayd...@linaro.org writes: On 17 October 2013 14:55, arm...@redhat.com wrote: From: Markus Armbruster arm...@redhat.com Drop it when there's no obvious reason why device_add could not work. Else keep and document why. * isa-fdc, port92, i8042, m48t59_isa, mc146818rtc, isa-pit, kvm-pit: drop (from the last two by dropping it from their abstract base pit-common) port92 needs its a20_out qemu_irq line wiring up, doesn't it? the pit devices have an output IRQ line that needs wiring up. Good points; I'll have a second look at these two. Thanks!
Re: [Qemu-devel] [PATCHv2] block/vpc: check that the image has not been truncated
On 10/24/2013 08:16 AM, Peter Lieven wrote: this adds a check that a dynamic VHD file has not been accidently truncated (e.g. during transfer or upload). Signed-off-by: Peter Lieven p...@kamp.de --- v1-v2: used the errp argument as Eric suggested block/vpc.c |7 +++ 1 file changed, 7 insertions(+) Reviewed-by: Eric Blake ebl...@redhat.com -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCHv6 11/17] iscsi: set limits in BlockDriverState
Il 24/10/2013 08:46, Peter Lieven ha scritto: Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index 47b9cc9..c0465aa 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1367,6 +1367,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, sizeof(struct scsi_inquiry_block_limits)); scsi_free_scsi_task(task); task = NULL; + +if (iscsilun-bl.max_unmap 0x) { +bs-bl.max_discard = sector_lun2qemu(iscsilun-bl.max_unmap, + iscsilun); +} +bs-bl.discard_alignment = sector_lun2qemu(iscsilun-bl.opt_unmap_gran, + iscsilun); + +if (iscsilun-bl.max_ws_len 0x) { +bs-bl.max_write_zeroes = sector_lun2qemu(iscsilun-bl.max_ws_len, + iscsilun); +} +bs-bl.write_zeroes_alignment = sector_lun2qemu(iscsilun-bl.opt_unmap_gran, +iscsilun); } #if defined(LIBISCSI_FEATURE_NOP_COUNTER) This patch and the previous one needs to be swapped, but maintainers can do that. Paolo
Re: [Qemu-devel] [RESEND][PATCH] migration: drop MADVISE_DONT_NEED for incoming zero pages
Peter Lieven p...@kamp.de wrote: The madvise for zeroed out pages was introduced when every transferred zero page was memset to zero and thus allocated. Since commit 211ea740 we check for zeroness of a target page before we memset it to zero. Additionally we memmap target memory so it is essentially zero initialized (except for e.g. option roms and bios which are loaded into target memory although they shouldn't). It was reported recently that this madvise causes a performance degradation in some situations. As the madvise should only be called rarely and if it's called it is likely on a busy page (it was non-zero and changed to zero during migration) drop it completely. Reviewed-by: Juan Quintela quint...@redhat.com I take it. I am on KVM Forum/LinuxCon this week. Will send when back at home. Thanks. Reported-By: Zhang Haoyu haoyu.zh...@huawei.com Acked-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- arch_init.c |8 1 file changed, 8 deletions(-) diff --git a/arch_init.c b/arch_init.c index 7545d96..e0acbc5 100644 --- a/arch_init.c +++ b/arch_init.c @@ -850,14 +850,6 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) { if (ch != 0 || !is_zero_range(host, size)) { memset(host, ch, size); -#ifndef _WIN32 -if (ch == 0 (!kvm_enabled() || kvm_has_sync_mmu())) { -size = size ~(getpagesize() - 1); -if (size 0) { -qemu_madvise(host, size, QEMU_MADV_DONTNEED); -} -} -#endif } }
Re: [Qemu-devel] [PATCHv6 14/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
Il 24/10/2013 08:46, Peter Lieven ha scritto: this patch does 2 things: a) only do additional call outs if BDRV_BLOCK_ZERO is not already set. b) use the newly introduced bdrv_has_discard_zeroes() ... whose name became bdrv_unallocated_blocks_are_zero :) No big deal. Paolo to return the zero state of an unallocated block. the used callout to bdrv_has_zero_init() is only valid right after bdrv_create. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 21a992a..69a2d2b 100644 --- a/block.c +++ b/block.c @@ -3263,8 +3263,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, *pnum, pnum); } -if (!(ret BDRV_BLOCK_DATA)) { -if (bdrv_has_zero_init(bs)) { +if (!(ret BDRV_BLOCK_DATA) !(ret BDRV_BLOCK_ZERO)) { +if (bdrv_unallocated_blocks_are_zero(bs)) { ret |= BDRV_BLOCK_ZERO; } else if (bs-backing_hd) { BlockDriverState *bs2 = bs-backing_hd;
Re: [Qemu-devel] [PATCHv6 17/17] block/raw: copy BlockLimits on raw_open
Il 24/10/2013 08:46, Peter Lieven ha scritto: Signed-off-by: Peter Lieven p...@kamp.de --- block/raw_bsd.c |1 + 1 file changed, 1 insertion(+) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index b0dd23f..49ac18c 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { bs-sg = bs-file-sg; +bs-bl = bs-file-bl; return 0; } This must be moved before the introduction of BlockLimits in the iscsi driver, or patches that use BlockLimits in block.c will not have any effect. Paolo
Re: [Qemu-devel] [PATCHv6 15/17] qemu-img: add support for fully allocated images
Il 24/10/2013 08:46, Peter Lieven ha scritto: +@var{sparse_size} indicates the consecutive number of bytes (defaults to 4k) +that must contain only zeros for qemu-img to create a sparse image during +conversion. If the number of bytes is 0 sparse files are disabled and +images will always be fully allocated. + If @var{sparse_size} is 0, the source will not be scanned for unallocated or zero sectors, and the destination image will always be fully allocated. Paolo
Re: [Qemu-devel] [PATCHv6 17/17] block/raw: copy BlockLimits on raw_open
On 24.10.2013 11:10, Paolo Bonzini wrote: Il 24/10/2013 08:46, Peter Lieven ha scritto: Signed-off-by: Peter Lieven p...@kamp.de --- block/raw_bsd.c |1 + 1 file changed, 1 insertion(+) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index b0dd23f..49ac18c 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { bs-sg = bs-file-sg; +bs-bl = bs-file-bl; return 0; } This must be moved before the introduction of BlockLimits in the iscsi driver, or patches that use BlockLimits in block.c will not have any effect. You are the first to mention this. I was thinking the whole series will be seen as once so it shouldn't matter. Peter
Re: [Qemu-devel] [PATCHv6 16/17] qemu-img: conditionally zero out target on convert
Il 24/10/2013 08:46, Peter Lieven ha scritto: This currently is designed especially for iscsi. I'm not sure this is the way you want to spin this. :) Perhaps This currently works only for iscsi. It can be extended to raw with BLKDISCARDZEROES for example. Paolo
Re: [Qemu-devel] [RESEND][PATCH 1.7] migration: drop MADVISE_DONT_NEED for incoming zero pages
Il 24/10/2013 08:21, Peter Lieven ha scritto: Additionally we memmap target memory so it is essentially zero initialized (except for e.g. option roms and bios which are loaded into target memory although they shouldn't). It was reported recently that this madvise causes a performance degradation in some situations. As the madvise should only be called rarely and if it's called it is likely on a busy page (it was non-zero and changed to zero during migration) drop it completely. Tagging this patch for 1.7. Paolo
Re: [Qemu-devel] [PATCH 1/2] vga: allow non-global vmstate
diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c index b3a45c8..dee180f 100644 --- a/hw/display/vga-pci.c +++ b/hw/display/vga-pci.c @@ -147,7 +147,7 @@ static int pci_std_vga_initfn(PCIDevice *dev) VGACommonState *s = d-vga; /* vga + console init */ -vga_common_init(s, OBJECT(dev)); +vga_common_init(s, OBJECT(dev), true); -void vga_common_init(VGACommonState *s, Object *obj) +void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate) -vmstate_register_ram_global(s-vram); +vmstate_register_ram(s-vram, global_vmstate ? NULL : DEVICE(obj)); Is it possible to do it depending of the QEMU compatibility version, or of some property, so we don't have to keep this global vmstate forever? Sure. We can easily add a property to stdvga, then make the new vga_common_init paramater depend on the property. With this, I think we can also expect to be able to specify -device VGA twice, and see the BIOS messages on one card or another. It would get us a step closer, but the two vga cards would still compete for the legacy vga io ports at 0x3c0. Thats why there is patch #2 of this series, which adds a stdvga variant not allocating legacy ports, so it can be added without port conflicts. cheers, Gerd
Re: [Qemu-devel] [PATCH 05/10] pci-host: Consistently set cannot_instantiate_with_device_add_yet
Peter Maydell peter.mayd...@linaro.org writes: On 17 October 2013 14:54, arm...@redhat.com wrote: From: Markus Armbruster arm...@redhat.com Many PCI host bridges consist of a sysbus device and a PCI device. You need both for the thing to work. Arguably, these bridges should be modelled as a single, composite devices instead of pairs of seemingly independent devices you can only use together, but we're not there, yet. I disagree here -- we should be using the modularity that our device model provides, not arbitrarily squashing things together into single objects just because we've foolishly exposed to the end user direct command line access to create any random object whatsoever even if it doesn't make sense. I'm afraid I didn't express myself clearly. I'm not advocating *squashing* these components together. I'm saying that if A and B can only be used wired together, there should be a C composed of A, B and the necessary wiring, and that C is what actually gets put on the board by configuration. Since the sysbus part can't be instantiated with device_add, yet, permitting it with the PCI part is useless. We shouldn't offer useless options to the user, so let's set cannot_instantiate_with_device_add_yet for them. It doesn't make sense to allow the user to create the on-PCI-bus representation of the host controller anyway even if they could device_add the host controller proper: creating the host controller will always automatically create the on-PCI-bus part. Technically, a device_add i440FX-pcihost doesn't automatically create i440FX *now*. I suspect we're arguing only about what exact kind of crazy device_add of the PCI-facing part of the PCI host bridge is. Assuming we actually agree it's crazy in *today's* state of things, does it matter what kind of crazy it is? If it doesn't matter, perhaps you could give me a hint on how to rephrase the commit message. --- a/hw/mips/gt64xxx_pci.c +++ b/hw/mips/gt64xxx_pci.c @@ -1157,6 +1157,11 @@ static void gt64120_pci_class_init(ObjectClass *klass, void *data) k-device_id = PCI_DEVICE_ID_MARVELL_GT6412X; k-revision = 0x10; k-class_id = PCI_CLASS_BRIDGE_HOST; +/* + * PCI-facing part of the host bridge, not usable without the + * host-facing part, which can't be device_add'ed, yet. + */ +k-parent_class.cannot_instantiate_with_device_add_yet = true; Please don't directly access parent_class -- you should be using the proper QOM cast macros to get the DeviceClass pointer. Will fix, thanks!
Re: [Qemu-devel] [PATCHv6 16/17] qemu-img: conditionally zero out target on convert
On 24.10.2013 11:13, Paolo Bonzini wrote: Il 24/10/2013 08:46, Peter Lieven ha scritto: This currently is designed especially for iscsi. I'm not sure this is the way you want to spin this. :) Perhaps This currently works only for iscsi. It can be extended to raw with BLKDISCARDZEROES for example. Thanks for your comments. Will respin. Peter
[Qemu-devel] kvm binary is deprecated - solved!
Hi Stefan , Great thanks - your easy trick works! (after I upgraded Ubuntu 13.04 to 13.10). As for sniffing the traffic between VMs - I have yet one idea and I would appreciate your feedback. The activities at VM that involve modifying data can be divided into the following categories: 1. Talk through network (sending net packets to other hosts) 2. Disk operations 3. Memory accesses In essence memory accesses are always performed BEFORE disk or network operations are executed (and the corresponding drivers are employed). For example, we prepare data in a buffer and send it into a socket. That is, a sniffer in the Linux should be put at a kernel driver that makes physical memory available to user space. Thanks, Alex P.S. I CC my colleague Dr.Reuven Yagel, a member of the team I am working in. On Mon 14 Oct 16:16 2013 Stefan Hajnoczi wrote: On Mon, Oct 14, 2013 at 12:36 PM, Alexander Binun bi...@cs.bgu.ac.il wrote: The workaround offered in bug trackers is: change the path associated with the emulation tag in the xml definition file. Change it to qemu-system-x86_64. Well, I am familiar with XML definition files for VMs: they are used manually when defining VMs in virsh (virsh define xmldef.xml and so on). There is the emulation tag there, pointing to the path to the emulator. virt-manager (used by me) creates such a file also (putting in into /etc/libvirt/qemu). But so far I do not have valid XML definition files. So I intend to try the following ways: --- find an example definition file and create a VM manually (through virsh) --- use qemu kvm compiled from the Git sources referred to by you. An easy trick: # mv /usr/bin/kvm /usr/bin/kvm.orig # ln -s /usr/bin/qemu-system-x86_64 /usr/bin/kvm Hopefully libvirt will be happier with the actual qemu-system-x86_64 binary. If this doesn't work you can move /usr/bin/kvm.orig back and try the other methods. Stefan
Re: [Qemu-devel] [PATCH 01/10] qdev: Replace no_user by cannot_instantiate_with_device_add_yet
Peter Maydell peter.mayd...@linaro.org writes: On 17 October 2013 14:54, arm...@redhat.com wrote: From: Markus Armbruster arm...@redhat.com In an ideal world, machines can be built by wiring devices together with configuration, not code. Unfortunately, that's not the world we live in right now. We still have quite a few devices that need to be wired up by code. If you try to device_add such a device, it'll fail in sometimes mysterious ways. If you're lucky, you get an unmysterious immediate crash. +/* + * Shall we hide this device model from -device / device_add? + * All devices should support instantiation with device_add, and + * this flag should not exist. But we're not there, yet. Some + * devices fail to instantiate with cryptic error messages. + * Others instantiate, but don't work. Exposing users to such + * behavior would be cruel; this flag serves to protect them. It + * should never be set without a comment explaining why it is set. + * TODO remove once we're there + */ +bool cannot_instantiate_with_device_add_yet; So reading this I'm still not entirely sure what the scope of this flag is intended to be. I can think of two possibilities: (1) the minimal definition: this device would actually crash or cause QEMU to break if you created it with device_add (2) a larger definition, which includes also devices which are completely useless if created with device_add because there's no way for the user to wire them up properly. I think most sysbus devices are going to be in (2) but not (1), because they should be fine to create and initialize, but they'll just be sitting completely pointlessly totally disconnected from the machine model. Definition (1) is a harder boundary and more straightforward to check against, but definition (2) is arguably a bit more useful for the end user. I agree, and I'd like us to adopt definition (2). I tried to express this when I wrote instantiate, but don't work. Care to suggest clearer language for this comment? Regarding (2) being less straightforward to check against: I think we should try hard to make our cannot_instantiate_with_device_add_yet use correct (any device we mark that way is actually useless with device_add), but I view completeness (all the devices that are actually useless with -device are marked) as not quite that important.
Re: [Qemu-devel] [PATCH] qemu-iotests: Test for loading VM state from qcow2
Am 23.10.2013 um 20:26 hat Max Reitz geschrieben: Add a test for saving a VM state from a qcow2 image and loading it back (with having restarted qemu in between); this should work without any problems. Signed-off-by: Max Reitz mre...@redhat.com --- Follow-up to (depends on): - qcow2: Restore total_sectors value in save_vmstate - qcow2: Unset zero_beyond_eof in save_vmstate --- tests/qemu-iotests/068 | 65 tests/qemu-iotests/group | 1 + 068.out is missing. Kevin
Re: [Qemu-devel] kvm binary is deprecated - solved!
On Thu, Oct 24, 2013 at 10:23 AM, Alexander Binun bi...@cs.bgu.ac.il wrote: As for sniffing the traffic between VMs - I have yet one idea and I would appreciate your feedback. [...] That is, a sniffer in the Linux should be put at a kernel driver that makes physical memory available to user space. I'm not sure what you are trying to do. Can you describe your goal? Depending on what you are trying to observe, there may already be sniffing or tracing mechanisms available. Stefan
Re: [Qemu-devel] [PATCH] qcow2: Restore total_sectors value in save_vmstate
Am 23.10.2013 um 19:03 hat Max Reitz geschrieben: On 2013-10-21 22:36, Eric Blake wrote: On 10/20/2013 07:28 PM, Max Reitz wrote: Since df2a6f29a5, bdrv_co_do_writev increases the total_sectors value of a growable block devices on writes after the current end. This leads to the virtual disk apparently growing in qcow2_save_vmstate, which in turn affects the disk size captured by the internal snapshot taken directly afterwards through e.g. the HMP savevm command. Such a grown snapshot cannot be loaded after reopening the qcow2 image, since its disk size differs from the actual virtual disk size (writing a VM state does not actually increase the virtual disk size). Fix this by restoring total_sectors at the end of qcow2_save_vmstate. Signed-off-by: Max Reitz mre...@redhat.com --- block/qcow2.c | 5 + 1 file changed, 5 insertions(+) @@ -1946,6 +1947,10 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, bs-growable = 1; ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); bs-growable = growable; +// bdrv_co_do_writev will have increased the total_sectors value to include +// the VM state - the VM state is however not an actual part of the block +// device, therefore, we need to restore the old value. +bs-total_sectors = total_sectors; It looks like // comments aren't forbidden, but also uncommon; I don't know if /**/ would be better. At any rate: Ah, right, sorry, I forgot. Thanks, fixed up the command and applied to the block branch. Kevin
Re: [Qemu-devel] [PATCH] qcow2: Unset zero_beyond_eof in save_vmstate
Am 21.10.2013 um 22:37 hat Eric Blake geschrieben: On 10/20/2013 08:52 PM, Max Reitz wrote: Saving the VM state is done using bdrv_pwrite. This function may perform a read-modify-write, which in this case results in data being read from beyond the end of the virtual disk. Since we are actually trying to access an area which is not a part of the virtual disk, zero_beyond_eof has to be set to false before performing the partial write, otherwise the VM state may become corrupted. Signed-off-by: Max Reitz mre...@redhat.com --- Follow-up to (depends on): - qcow2: Restore total_sectors value in save_vmstate Reviewed-by: Eric Blake ebl...@redhat.com Thanks, applied to the block branch. Kevin
[Qemu-devel] observing VM actions
I am trying to observe the memory/disk/network accesses done by a VM. The resulting log can be used to decide whether a VM initiates a malicious action (because , say, it runs a malicious software). On Thu 24 Oct 11:49 2013 Stefan Hajnoczi wrote: On Thu, Oct 24, 2013 at 10:23 AM, Alexander Binun bi...@cs.bgu.ac.il wrote: As for sniffing the traffic between VMs - I have yet one idea and I would appreciate your feedback. [...] That is, a sniffer in the Linux should be put at a kernel driver that makes physical memory available to user space. I'm not sure what you are trying to do. Can you describe your goal? Depending on what you are trying to observe, there may already be sniffing or tracing mechanisms available. Stefan
Re: [Qemu-devel] [PATCH] qcow2: Flush image after creation
Am 23.10.2013 um 21:40 hat Max Reitz geschrieben: Opening the qcow2 image with BDRV_O_NO_FLUSH prevents any flushes during the image creation. This means that the image has not yet been flushed to disk when qemu-img create exits. This flush is delayed until the next operation on the image involving opening it without BDRV_O_NO_FLUSH and closing (or directly flushing) it. For large images and/or images with a small cluster size and preallocated metadata, this flush may take a significant amount of time and may occur unexpectedly. Reopening the image without BDRV_O_NO_FLUSH right before the end of qcow2_create2() results in preponing the potentially costly flush into the image creation, which is expected to take some time (whereas successive image operations may be not). Signed-off-by: Max Reitz mre...@redhat.com --- block/qcow2.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index c1abaff..8b98c3a 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1584,7 +1584,15 @@ static int qcow2_create2(const char *filename, int64_t total_size, } } -ret = 0; I would prefer to keep the explicit ret = 0 there (just like the unnecessary last 'goto out:', it just makes things more obvious and consistent) +bdrv_close(bs); + +/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ +ret = bdrv_open(bs, filename, NULL, +BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, local_err); +if (error_is_set(local_err)) { +error_propagate(errp, local_err); So a goto here wouldn't hurt either. Note how the unnecessary goto in the block before allowed you to just add your new code without modifying existing parts. +} + out: bdrv_unref(bs); return ret; Kevin
Re: [Qemu-devel] [PATCHv2] qemu-img: add special exit code if bdrv_check is not supported
Am 24.10.2013 um 09:00 hat Eric Blake geschrieben: On 10/24/2013 07:53 AM, Peter Lieven wrote: currently it is not possible to distinguish by exitcode if there has been an error or if bdrv_check is not supported by the image format. Change the exitcode from 1 to 63 for the latter case. Signed-off-by: Peter Lieven p...@kamp.de --- v1-v2: As Eric suggested changed the exitcode from 255 to 63. qemu-img.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Should the man page document this exit status? Then again, it doesn't document ANY exit status, so that could be a separate patch. Reviewed-by: Eric Blake ebl...@redhat.com Thanks, applied to the block branch. Kevin
Re: [Qemu-devel] [PATCH] qcow2: Flush image after creation
On 10/23/2013 08:40 PM, Max Reitz wrote: Opening the qcow2 image with BDRV_O_NO_FLUSH prevents any flushes during the image creation. This means that the image has not yet been flushed to disk when qemu-img create exits. This flush is delayed until the next operation on the image involving opening it without BDRV_O_NO_FLUSH and closing (or directly flushing) it. For large images and/or images with a small cluster size and preallocated metadata, this flush may take a significant amount of time and may occur unexpectedly. Reopening the image without BDRV_O_NO_FLUSH right before the end of qcow2_create2() results in preponing the potentially costly flush into s/preponing/hoisting/ the image creation, which is expected to take some time (whereas successive image operations may be not). Signed-off-by: Max Reitz mre...@redhat.com --- block/qcow2.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) Reviewed-by: Eric Blake ebl...@redhat.com -ret = 0; +bdrv_close(bs); + +/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ +ret = bdrv_open(bs, filename, NULL, +BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, local_err); I would probably have indented as: ret = bdrv_open(bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, local_err); but it's trivial enough that I'm also fine with your choice. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCHv6 17/17] block/raw: copy BlockLimits on raw_open
Il 24/10/2013 10:12, Peter Lieven ha scritto: This must be moved before the introduction of BlockLimits in the iscsi driver, or patches that use BlockLimits in block.c will not have any effect. You are the first to mention this. I was thinking the whole series will be seen as once so it shouldn't matter. In general, series should keep old functionality at all stages. This helps when someone reports a regression, because we can ask them to bisect and not have them burdened by problems in the middle of a series. (It would also help you debugging things, if this series turned out to have a bug). After patch 10 of this series, an iSCSI array will stop receiving split requests for large discards. This may introduce spurious failures. I made the same remark on patch 11, but that patch alone is not enough to restore this; you need this one too for patch 11 to have any effect. So the correct order is patch 17 first, then patch 11, then patch 10. In other word, remove code only after it has become dead. Paolo
Re: [Qemu-devel] [PATCHv2] block/vpc: check that the image has not been truncated
Am 24.10.2013 um 09:16 hat Peter Lieven geschrieben: this adds a check that a dynamic VHD file has not been accidently truncated (e.g. during transfer or upload). Signed-off-by: Peter Lieven p...@kamp.de Thanks, applied to the block branch. Kevin
[Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements
this patch adds the ability for targets to stay sparse during block migration (if the zero_blocks capability is set) and qemu-img convert even if the target does not have has_zero_init = 1. the series was especially developed for iSCSI, but it should also work with other drivers with little or no adjustments. these adjustments should be limited to providing block provisioning information through get_block_info and/or honouring BDRV_REQ_MAY_UNMAP on writing zeroes. v6-v7: - switched position of iscsi: set limits in BlockDriverState and iscsi: simplify iscsi_co_discard. (Paolo) - fixed commit message of block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks. (Paolo) - moved block/raw: copy BlockLimits on raw_open right after block: add BlockLimits structure to BlockDriverState. (Paolo) - Reworded desciption for -S 0 in qemu-img: add support for fully allocated images as suggested by Paolo. - Reworded commit message of: qemu-img: conditionally zero out target on convert. regarding iscsi (Paolo) v5-v6: - protected iscsi_co_write_zeroes by the existence of the SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED macro. This is ugly but necessary because the semantic of iscsi_writesame16_task silently changed between libiscsi 1.8.0 and 1.9.0. The above macro was the first added after the change. I already contacted Ronnie to introduce an API version macro which has to be bumped on each new function that will be added. Changes to the parameters should not happen at all of course. v4-v5: - new patches 4-6 to move the block provisioning information to the BlockDriverInfo. - kept 2 wrappers to read the information from the BDI and renamed them to make more clear what they do: bdrv_has_discard_zeroes - bdrv_unallocated_blocks_are_zero bdrv_has_discard_write_zeroes - bdrv_can_write_zeroes_with_unmap - added additional information about the 2 flags in the BDI struct in block.h v3-v4: - changed BlockLimits struct to typedef (Stefan, Eric) - renamed bdrv_zeroize to bdrv_make_zero (Stefan) - added comment about the -S flag of qemu-img convert in qemu-img.texi (Eric) - used struct assignment for bs-bl in raw_open (Stefan, Eric) - dropped 3 get_block_status fixes that are independent of this series and already partly merged. v2-v3: - fix merge conflict in block/qcow2_cluster.c - changed return type of bdrv_has_discard_zeroes and bdrv_has_discard_write_zeroes to bool. - moved alignment and limits info to a BlockLimits struct (Paolo). - added magic constanst for default maximum in bdrv_co_do_write_zeroes and bdrv_co_discard (Eric). - bdrv_co_do_write_zeroes: allocating the bounce buffer only once (Eric), fixed bounce iov_len in the fall back path. - bdrv_zeroize: added inline docu (Eric) and do not mask flags passed to bdrv_write_zeroes (Eric). - qemu-img: changed the default hint for -S (min_sparse) in the usage help to 4k. not changing the default as it is unclear why this default was set. size suffixes are already supported (Eric). v1-v2: - moved block max_discard and max_write_zeroes to BlockDriverState - added discard_alignment and write_zeroes_alignment to BlockDriverState - added bdrv_has_discard_zeroes() and bdrv_has_discard_write_zeroes() - added logic to bdrv_co_discard and bdrv_co_do_write_zeroes to honour limit and alignment info. - added support for -S 0 in qemu-img convert. Peter Lieven (17): block: make BdrvRequestFlags public block: add flags to bdrv_*_write_zeroes block: introduce BDRV_REQ_MAY_UNMAP request flag block: add logical block provisioning info to BlockDriverInfo block: add wrappers for logical block provisioning information block/iscsi: add .bdrv_get_info block: add BlockLimits structure to BlockDriverState block/raw: copy BlockLimits on raw_open block: honour BlockLimits in bdrv_co_do_write_zeroes block: honour BlockLimits in bdrv_co_discard iscsi: set limits in BlockDriverState iscsi: simplify iscsi_co_discard iscsi: add bdrv_co_write_zeroes block: introduce bdrv_make_zero block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks qemu-img: add support for fully allocated images qemu-img: conditionally zero out target on convert block-migration.c |3 +- block.c | 200 + block/backup.c|3 +- block/iscsi.c | 150 +- block/qcow2-cluster.c |2 +- block/qcow2.c |2 +- block/qed.c |3 +- block/raw_bsd.c |6 +- block/vmdk.c |3 +- include/block/block.h | 35 +++- include/block/block_int.h | 19 - qemu-img.c| 20 - qemu-img.texi |6 ++ qemu-io-cmds.c|2 +- 14 files changed, 366 insertions(+), 88 deletions(-) -- 1.7.9.5
[Qemu-devel] [PATCHv7 06/17] block/iscsi: add .bdrv_get_info
Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c |9 + 1 file changed, 9 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index a2a961e..1dbbcad 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1506,6 +1506,14 @@ out: return ret; } +static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ +IscsiLun *iscsilun = bs-opaque; +bdi-unallocated_blocks_are_zero = !!iscsilun-lbprz; +bdi-can_write_zeroes_with_unmap = iscsilun-lbprz iscsilun-lbp.lbpws; +return 0; +} + static QEMUOptionParameter iscsi_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1527,6 +1535,7 @@ static BlockDriver bdrv_iscsi = { .create_options = iscsi_create_options, .bdrv_getlength = iscsi_getlength, +.bdrv_get_info = iscsi_get_info, .bdrv_truncate = iscsi_truncate, #if defined(LIBISCSI_FEATURE_IOVECTOR) -- 1.7.9.5
[Qemu-devel] [PATCHv7 05/17] block: add wrappers for logical block provisioning information
This adds 2 wrappers to read the unallocated_blocks_are_zero and can_write_zeroes_with_unmap info from the BDI. The wrappers are required to check for the existence of a backing_hd and if the devices are opened with the correct flags. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 30 ++ include/block/block.h |2 ++ 2 files changed, 32 insertions(+) diff --git a/block.c b/block.c index 0d97ce6..0601b02 100644 --- a/block.c +++ b/block.c @@ -3094,6 +3094,36 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 0; } +bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) +{ +BlockDriverInfo bdi; + +if (bs-backing_hd) { +return false; +} + +if (bdrv_get_info(bs, bdi) == 0) { +return bdi.unallocated_blocks_are_zero; +} + +return false; +} + +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) +{ +BlockDriverInfo bdi; + +if (bs-backing_hd || !(bs-open_flags BDRV_O_UNMAP)) { +return false; +} + +if (bdrv_get_info(bs, bdi) == 0) { +return bdi.can_write_zeroes_with_unmap; +} + +return false; +} + typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; diff --git a/include/block/block.h b/include/block/block.h index 9c76967..803c5ca 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -344,6 +344,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); +bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, -- 1.7.9.5
[Qemu-devel] [PATCHv7 07/17] block: add BlockLimits structure to BlockDriverState
this patch adds BlockLimits which introduces discard and write_zeroes limits and alignment information to the BlockDriverState. Signed-off-by: Peter Lieven p...@kamp.de --- include/block/block_int.h | 17 + 1 file changed, 17 insertions(+) diff --git a/include/block/block_int.h b/include/block/block_int.h index 9bbaa29..33be247 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -227,6 +227,20 @@ struct BlockDriver { QLIST_ENTRY(BlockDriver) list; }; +typedef struct BlockLimits { +/* maximum number of sectors that can be discarded at once */ +int max_discard; + +/* optimal alignment for discard requests in sectors */ +int64_t discard_alignment; + +/* maximum number of sectors that can zeroized at once */ +int max_write_zeroes; + +/* optimal alignment for write zeroes requests in sectors */ +int64_t write_zeroes_alignment; +} BlockLimits; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates, so if you add new fields to this struct, please @@ -280,6 +294,9 @@ struct BlockDriverState { uint64_t total_time_ns[BDRV_MAX_IOTYPE]; uint64_t wr_highest_sector; +/* I/O Limits */ +BlockLimits bl; + /* Whether the disk can expand beyond total_sectors */ int growable; -- 1.7.9.5
Re: [Qemu-devel] [PATCHv2] block/vpc: check that the image has not been truncated
On 24.10.2013 12:06, Kevin Wolf wrote: Am 24.10.2013 um 09:16 hat Peter Lieven geschrieben: this adds a check that a dynamic VHD file has not been accidently truncated (e.g. during transfer or upload). Signed-off-by: Peter Lieven p...@kamp.de Thanks, applied to the block branch. Can you have a look at: block/vpc: fix virtual size for images created with disk2vhd as well please. Peter
[Qemu-devel] [PATCHv7 14/17] block: introduce bdrv_make_zero
this patch adds a call to completely zero out a block device. the operation is sped up by checking the block status and only writing zeroes to the device if they currently do not return zeroes. optionally the zero writing can be sped up by setting the flag BDRV_REQ_MAY_UNMAP to emulate the zero write by unmapping if the driver supports it. Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 37 + include/block/block.h |1 + 2 files changed, 38 insertions(+) diff --git a/block.c b/block.c index b28dd42..21a992a 100644 --- a/block.c +++ b/block.c @@ -2391,6 +2391,43 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, BDRV_REQ_ZERO_WRITE | flags); } +/* + * Completely zero out a block device with the help of bdrv_write_zeroes. + * The operation is sped up by checking the block status and only writing + * zeroes to the device if they currently do not return zeroes. Optional + * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP). + * + * Returns 0 on error, 0 on success. For error codes see bdrv_write(). + */ +int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) +{ +int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE; +int64_t ret, nb_sectors, sector_num = 0; +int n; + +for (;;) { +nb_sectors = target_size - sector_num; +if (nb_sectors = 0) { +return 0; +} +if (nb_sectors INT_MAX) { +nb_sectors = INT_MAX; +} +ret = bdrv_get_block_status(bs, sector_num, nb_sectors, n); +if (ret BDRV_BLOCK_ZERO) { +sector_num += n; +continue; +} +ret = bdrv_write_zeroes(bs, sector_num, n, flags); +if (ret 0) { +error_report(error writing zeroes at sector % PRId64 : %s, + sector_num, strerror(-ret)); +return ret; +} +sector_num += n; +} +} + int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count1) { diff --git a/include/block/block.h b/include/block/block.h index 803c5ca..4d9e67c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -216,6 +216,7 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); +int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags); int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); -- 1.7.9.5
[Qemu-devel] [PATCHv7 11/17] iscsi: set limits in BlockDriverState
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index 1dbbcad..16d8052 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1384,6 +1384,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, sizeof(struct scsi_inquiry_block_limits)); scsi_free_scsi_task(task); task = NULL; + +if (iscsilun-bl.max_unmap 0x) { +bs-bl.max_discard = sector_lun2qemu(iscsilun-bl.max_unmap, + iscsilun); +} +bs-bl.discard_alignment = sector_lun2qemu(iscsilun-bl.opt_unmap_gran, + iscsilun); + +if (iscsilun-bl.max_ws_len 0x) { +bs-bl.max_write_zeroes = sector_lun2qemu(iscsilun-bl.max_ws_len, + iscsilun); +} +bs-bl.write_zeroes_alignment = sector_lun2qemu(iscsilun-bl.opt_unmap_gran, +iscsilun); } #if defined(LIBISCSI_FEATURE_NOP_COUNTER) -- 1.7.9.5
[Qemu-devel] [PATCHv7 13/17] iscsi: add bdrv_co_write_zeroes
Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c | 64 + 1 file changed, 64 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index c0465aa..014475d 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -56,6 +56,7 @@ typedef struct IscsiLun { uint8_t lbprz; struct scsi_inquiry_logical_block_provisioning lbp; struct scsi_inquiry_block_limits bl; +unsigned char *zeroblock; } IscsiLun; typedef struct IscsiTask { @@ -959,6 +960,65 @@ retry: return 0; } +#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED) + +static int +coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) +{ +IscsiLun *iscsilun = bs-opaque; +struct IscsiTask iTask; +uint64_t lba; +uint32_t nb_blocks; + +if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { +return -EINVAL; +} + +if (!iscsilun-lbp.lbpws) { +/* WRITE SAME is not supported by the target */ +return -ENOTSUP; +} + +lba = sector_qemu2lun(sector_num, iscsilun); +nb_blocks = sector_qemu2lun(nb_sectors, iscsilun); + +if (iscsilun-zeroblock == NULL) { +iscsilun-zeroblock = g_malloc0(iscsilun-block_size); +} + +iscsi_co_init_iscsitask(iscsilun, iTask); +retry: +if (iscsi_writesame16_task(iscsilun-iscsi, iscsilun-lun, lba, + iscsilun-zeroblock, iscsilun-block_size, + nb_blocks, 0, !!(flags BDRV_REQ_MAY_UNMAP), + 0, 0, iscsi_co_generic_cb, iTask) == NULL) { +return -EIO; +} + +while (!iTask.complete) { +iscsi_set_events(iscsilun); +qemu_coroutine_yield(); +} + +if (iTask.task != NULL) { +scsi_free_scsi_task(iTask.task); +iTask.task = NULL; +} + +if (iTask.do_retry) { +goto retry; +} + +if (iTask.status != SCSI_STATUS_GOOD) { +return -EIO; +} + +return 0; +} + +#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */ + static int parse_chap(struct iscsi_context *iscsi, const char *target) { QemuOptsList *list; @@ -1421,6 +1481,7 @@ static void iscsi_close(BlockDriverState *bs) } qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL); iscsi_destroy_context(iscsi); +g_free(iscsilun-zeroblock); memset(iscsilun, 0, sizeof(IscsiLun)); } @@ -1539,6 +1600,9 @@ static BlockDriver bdrv_iscsi = { .bdrv_co_get_block_status = iscsi_co_get_block_status, #endif .bdrv_co_discard = iscsi_co_discard, +#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED) +.bdrv_co_write_zeroes = iscsi_co_write_zeroes, +#endif .bdrv_aio_readv = iscsi_aio_readv, .bdrv_aio_writev = iscsi_aio_writev, -- 1.7.9.5
[Qemu-devel] [PATCHv7 09/17] block: honour BlockLimits in bdrv_co_do_write_zeroes
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 65 +++ 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/block.c b/block.c index 0601b02..0c0b0ac 100644 --- a/block.c +++ b/block.c @@ -2703,32 +2703,65 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, BDRV_REQ_COPY_ON_READ); } +/* if no limit is specified in the BlockLimits use a default + * of 32768 512-byte sectors (16 MiB) per request. + */ +#define MAX_WRITE_ZEROES_DEFAULT 32768 + static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { BlockDriver *drv = bs-drv; QEMUIOVector qiov; -struct iovec iov; -int ret; +struct iovec iov = {0}; +int ret = 0; -/* TODO Emulate only part of misaligned requests instead of letting block - * drivers return -ENOTSUP and emulate everything */ +int max_write_zeroes = bs-bl.max_write_zeroes ? + bs-bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT; -/* First try the efficient write zeroes operation */ -if (drv-bdrv_co_write_zeroes) { -ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); -if (ret != -ENOTSUP) { -return ret; +while (nb_sectors 0 !ret) { +int num = nb_sectors; + +/* align request */ +if (bs-bl.write_zeroes_alignment +num = bs-bl.write_zeroes_alignment +sector_num % bs-bl.write_zeroes_alignment) { +if (num bs-bl.write_zeroes_alignment) { +num = bs-bl.write_zeroes_alignment; +} +num -= sector_num % bs-bl.write_zeroes_alignment; } -} -/* Fall back to bounce buffer if write zeroes is unsupported */ -iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; -iov.iov_base = qemu_blockalign(bs, iov.iov_len); -memset(iov.iov_base, 0, iov.iov_len); -qemu_iovec_init_external(qiov, iov, 1); +/* limit request size */ +if (num max_write_zeroes) { +num = max_write_zeroes; +} + +ret = -ENOTSUP; +/* First try the efficient write zeroes operation */ +if (drv-bdrv_co_write_zeroes) { +ret = drv-bdrv_co_write_zeroes(bs, sector_num, num, flags); +} + +if (ret == -ENOTSUP) { +/* Fall back to bounce buffer if write zeroes is unsupported */ +iov.iov_len = num * BDRV_SECTOR_SIZE; +if (iov.iov_base == NULL) { +/* allocate bounce buffer only once and ensure that it + * is big enough for this and all future requests. + */ +size_t bufsize = num = nb_sectors ? num : max_write_zeroes; +iov.iov_base = qemu_blockalign(bs, bufsize * BDRV_SECTOR_SIZE); +memset(iov.iov_base, 0, bufsize * BDRV_SECTOR_SIZE); +} +qemu_iovec_init_external(qiov, iov, 1); -ret = drv-bdrv_co_writev(bs, sector_num, nb_sectors, qiov); +ret = drv-bdrv_co_writev(bs, sector_num, num, qiov); +} + +sector_num += num; +nb_sectors -= num; +} qemu_vfree(iov.iov_base); return ret; -- 1.7.9.5
[Qemu-devel] [PATCHv7 12/17] iscsi: simplify iscsi_co_discard
now that bdrv_co_discard can handle limits we do not need the request split logic here anymore. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block/iscsi.c | 67 + 1 file changed, 25 insertions(+), 42 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 16d8052..c0465aa 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -87,7 +87,6 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 #define ISCSI_CMD_RETRIES 5 -#define ISCSI_MAX_UNMAP 131072 static void iscsi_bh_cb(void *p) @@ -912,8 +911,6 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, IscsiLun *iscsilun = bs-opaque; struct IscsiTask iTask; struct unmap_list list; -uint32_t nb_blocks; -uint32_t max_unmap; if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { return -EINVAL; @@ -925,52 +922,38 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, } list.lba = sector_qemu2lun(sector_num, iscsilun); -nb_blocks = sector_qemu2lun(nb_sectors, iscsilun); +list.num = sector_qemu2lun(nb_sectors, iscsilun); -max_unmap = iscsilun-bl.max_unmap; -if (max_unmap == 0x) { -max_unmap = ISCSI_MAX_UNMAP; -} - -while (nb_blocks 0) { -iscsi_co_init_iscsitask(iscsilun, iTask); -list.num = nb_blocks; -if (list.num max_unmap) { -list.num = max_unmap; -} +iscsi_co_init_iscsitask(iscsilun, iTask); retry: -if (iscsi_unmap_task(iscsilun-iscsi, iscsilun-lun, 0, 0, list, 1, - iscsi_co_generic_cb, iTask) == NULL) { -return -EIO; -} - -while (!iTask.complete) { -iscsi_set_events(iscsilun); -qemu_coroutine_yield(); -} +if (iscsi_unmap_task(iscsilun-iscsi, iscsilun-lun, 0, 0, list, 1, + iscsi_co_generic_cb, iTask) == NULL) { +return -EIO; +} -if (iTask.task != NULL) { -scsi_free_scsi_task(iTask.task); -iTask.task = NULL; -} +while (!iTask.complete) { +iscsi_set_events(iscsilun); +qemu_coroutine_yield(); +} -if (iTask.do_retry) { -goto retry; -} +if (iTask.task != NULL) { +scsi_free_scsi_task(iTask.task); +iTask.task = NULL; +} -if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { -/* the target might fail with a check condition if it - is not happy with the alignment of the UNMAP request - we silently fail in this case */ -return 0; -} +if (iTask.do_retry) { +goto retry; +} -if (iTask.status != SCSI_STATUS_GOOD) { -return -EIO; -} +if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { +/* the target might fail with a check condition if it + is not happy with the alignment of the UNMAP request + we silently fail in this case */ +return 0; +} -list.lba += list.num; -nb_blocks -= list.num; +if (iTask.status != SCSI_STATUS_GOOD) { +return -EIO; } return 0; -- 1.7.9.5
[Qemu-devel] [PATCHv7 16/17] qemu-img: add support for fully allocated images
Signed-off-by: Peter Lieven p...@kamp.de --- qemu-img.c| 10 +++--- qemu-img.texi |6 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 926f0a0..7f08364 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -100,8 +100,12 @@ static void help(void) '-h' with or without a command shows this help and lists the supported formats\n '-p' show progress of command (only certain commands)\n '-q' use Quiet mode - do not print any output (except errors)\n - '-S' indicates the consecutive number of bytes that must contain only zeros\n - for qemu-img to create a sparse image during conversion\n + '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n + contain only zeros for qemu-img to create a sparse image during\n + conversion. If the number of bytes is 0, the source will not be scanned for\n + unallocated or zero sectors, and the destination image will always be\n + fully allocated\n + images will always be fully allocated\n '--output' takes the format in which the output must be done (human or json)\n '-n' skips the target volume creation (useful if the volume is created\n prior to running qemu-img)\n @@ -1465,7 +1469,7 @@ static int img_convert(int argc, char **argv) /* signal EOF to align */ bdrv_write_compressed(out_bs, 0, NULL, 0); } else { -int has_zero_init = bdrv_has_zero_init(out_bs); +int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0; sector_num = 0; // total number of sectors converted so far nb_sectors = total_sectors - sector_num; diff --git a/qemu-img.texi b/qemu-img.texi index 768054e..da36975 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -193,6 +193,12 @@ Image conversion is also useful to get smaller image when using a growable format such as @code{qcow} or @code{cow}: the empty sectors are detected and suppressed from the destination image. +@var{sparse_size} indicates the consecutive number of bytes (defaults to 4k) +that must contain only zeros for qemu-img to create a sparse image during +conversion. If @var{sparse_size} is 0, the source will not be scanned for +unallocated or zero sectors, and the destination image will always be +fully allocated. + You can use the @var{backing_file} option to force the output image to be created as a copy on write image of the specified base image; the @var{backing_file} should have the same content as the input's base image, -- 1.7.9.5
[Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 37 - 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 0c0b0ac..b28dd42 100644 --- a/block.c +++ b/block.c @@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque) rwco-ret = bdrv_co_discard(rwco-bs, rwco-sector_num, rwco-nb_sectors); } +/* if no limit is specified in the BlockLimits use a default + * of 32768 512-byte sectors (16 MiB) per request. + */ +#define MAX_DISCARD_DEFAULT 32768 + int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { @@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, } if (bs-drv-bdrv_co_discard) { -return bs-drv-bdrv_co_discard(bs, sector_num, nb_sectors); +int max_discard = bs-bl.max_discard ? + bs-bl.max_discard : MAX_DISCARD_DEFAULT; + +while (nb_sectors 0) { +int ret; +int num = nb_sectors; + +/* align request */ +if (bs-bl.discard_alignment +num = bs-bl.discard_alignment +sector_num % bs-bl.discard_alignment) { +if (num bs-bl.discard_alignment) { +num = bs-bl.discard_alignment; +} +num -= sector_num % bs-bl.discard_alignment; +} + +/* limit request size */ +if (num max_discard) { +num = max_discard; +} + +ret = bs-drv-bdrv_co_discard(bs, sector_num, num); +if (ret) { +return ret; +} + +sector_num += num; +nb_sectors -= num; +} +return 0; } else if (bs-drv-bdrv_aio_discard) { BlockDriverAIOCB *acb; CoroutineIOCompletion co = { -- 1.7.9.5
[Qemu-devel] [PATCHv7 15/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
this patch does 2 things: a) only do additional call outs if BDRV_BLOCK_ZERO is not already set. b) use the newly introduced bdrv_unallocated_blocks_are_zero() to return the zero state of an unallocated block. the used callout to bdrv_has_zero_init() is only valid right after bdrv_create. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 21a992a..69a2d2b 100644 --- a/block.c +++ b/block.c @@ -3263,8 +3263,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, *pnum, pnum); } -if (!(ret BDRV_BLOCK_DATA)) { -if (bdrv_has_zero_init(bs)) { +if (!(ret BDRV_BLOCK_DATA) !(ret BDRV_BLOCK_ZERO)) { +if (bdrv_unallocated_blocks_are_zero(bs)) { ret |= BDRV_BLOCK_ZERO; } else if (bs-backing_hd) { BlockDriverState *bs2 = bs-backing_hd; -- 1.7.9.5
Re: [Qemu-devel] [PATCH] MAINTAINERS: add block driver sub-maintainers
On 23.10.2013 08:23, Paolo Bonzini wrote: Il 21/10/2013 14:26, Stefan Hajnoczi ha scritto: +iSCSI +M: Ronnie Sahlberg ronniesahlb...@gmail.com +M: Paolo Bonzini pbonz...@redhat.com +S: Supported +F: block/iscsi.c As I have worked a lot on the iSCSI driver recently I would like to receive patches as well if possible. Peter
[Qemu-devel] [PATCHv7 17/17] qemu-img: conditionally zero out target on convert
If the target has_zero_init = 0, but supports efficiently writing zeroes by unmapping we call bdrv_make_zero to avoid fully allocating the target. This currently works only for iscsi. It can be extended to raw with BLKDISCARDZEROES for example. Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- qemu-img.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index 7f08364..bec6da3 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1355,7 +1355,7 @@ static int img_convert(int argc, char **argv) } } -flags = BDRV_O_RDWR; +flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR; ret = bdrv_parse_cache_flags(cache, flags); if (ret 0) { error_report(Invalid cache option: %s, cache); @@ -1471,6 +1471,14 @@ static int img_convert(int argc, char **argv) } else { int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0; +if (!has_zero_init bdrv_can_write_zeroes_with_unmap(out_bs)) { +ret = bdrv_make_zero(out_bs, BDRV_REQ_MAY_UNMAP); +if (ret 0) { +goto out; +} +has_zero_init = 1; +} + sector_num = 0; // total number of sectors converted so far nb_sectors = total_sectors - sector_num; if (nb_sectors != 0) { -- 1.7.9.5
Re: [Qemu-devel] [PATCH v2] block: support dropping active in bdrv_drop_intermediate
On Tue, Oct 15, 2013 at 03:25:00PM +0800, Fam Zheng wrote: There is only one failure point: bdrv_change_backing_file in this function, so we can drop the qlist and try to change the backing file before deleting anything. This way bdrv_drop_intermediate is simplified while keeping the operation transactional. A bonus is dropping an active BDS is supported too by swapping the base and top. Although no caller uses this yet, the comment is updated to reflect the change. Signed-off-by: Fam Zheng f...@redhat.com --- v2: check for active, top and base being in a backing chain. (Jeff) This does check for that, but it doesn't catch all errors. It will verify: [base] - [active] And verifies: [top] - [active] (when active is != top) However, it does not verify that the following is true: [base] - [top] (e.g., it will pass on [top] - [base] - [active]) Rather than add another call to bdrv_find_overlay to verify the last case, would just adding the bdrv_swap() and a check for active == top to the existing function do what you need for the active layer support? Signed-off-by: Fam Zheng f...@redhat.com --- block.c| 103 - block/commit.c | 1 + 2 files changed, 37 insertions(+), 67 deletions(-) diff --git a/block.c b/block.c index fd05a80..9ead554 100644 --- a/block.c +++ b/block.c @@ -2130,18 +2130,11 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active, return overlay; } -typedef struct BlkIntermediateStates { -BlockDriverState *bs; -QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; -} BlkIntermediateStates; - - /* - * Drops images above 'base' up to and including 'top', and sets the image - * above 'top' to have base as its backing file. - * - * Requires that the overlay to 'top' is opened r/w, so that the backing file - * information in 'bs' can be properly updated. + * Drops images above 'base' up to and including 'top', and sets new 'base' + * as backing_hd of top_overlay (the image orignally has 'top' as backing + * file). top_overlay may be NULL if 'top' is active, no such update needed. + * Requires that the top_overlay to 'top' is opened r/w. * * E.g., this will convert the following chain: * bottom - base - intermediate - top - active @@ -2158,86 +2151,62 @@ typedef struct BlkIntermediateStates { * * base - active * - * Error conditions: - * if active == top, that is considered an error + * It also allows active==top, in which case it converts: + * + * base - intermediate - active (also top) + * + * to + * + * base == active == top, i.e. only base remains: *top == *base when return. * */ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base) { -BlockDriverState *intermediate; +BlockDriverState *pbs; +BlockDriverState *overlay = NULL; BlockDriverState *base_bs = NULL; -BlockDriverState *new_top_bs = NULL; -BlkIntermediateStates *intermediate_state, *next; -int ret = -EIO; - -QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; -QSIMPLEQ_INIT(states_to_delete); +int ret = -EINVAL; if (!top-drv || !base-drv) { goto exit; } -new_top_bs = bdrv_find_overlay(active, top); - -if (new_top_bs == NULL) { -/* we could not find the image above 'top', this is an error */ -goto exit; -} - -/* special case of new_top_bs-backing_hd already pointing to base - nothing - * to do, no intermediate images */ -if (new_top_bs-backing_hd == base) { -ret = 0; +if (!bdrv_find_overlay(active, base)) { goto exit; } -intermediate = top; - -/* now we will go down through the list, and add each BDS we find - * into our deletion queue, until we hit the 'base' - */ -while (intermediate) { -intermediate_state = g_malloc0(sizeof(BlkIntermediateStates)); -intermediate_state-bs = intermediate; -QSIMPLEQ_INSERT_TAIL(states_to_delete, intermediate_state, entry); - -if (intermediate-backing_hd == base) { -base_bs = intermediate-backing_hd; -break; +if (active != top) { +/* If there's an overlay, its backing_hd points to top's BDS now, + * the top image is dropped but this BDS structure is kept and swapped + * with base, this way we keep the pointers valid after dropping top */ +overlay = bdrv_find_overlay(active, top); +if (!overlay) { +goto exit; +} +ret = bdrv_change_backing_file(overlay, base-filename, + base-drv ? +base-drv-format_name : ); +if (ret) { +goto exit; } -intermediate = intermediate-backing_hd; -
[Qemu-devel] [PATCHv7 01/17] block: make BdrvRequestFlags public
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c |5 - include/block/block.h |5 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index fd05a80..eb11a07 100644 --- a/block.c +++ b/block.c @@ -51,11 +51,6 @@ #define NOT_DONE 0x7fff /* used while emulated sync operation in progress */ -typedef enum { -BDRV_REQ_COPY_ON_READ = 0x1, -BDRV_REQ_ZERO_WRITE = 0x2, -} BdrvRequestFlags; - static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, diff --git a/include/block/block.h b/include/block/block.h index 3560deb..ba2082c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -62,6 +62,11 @@ typedef struct BlockDevOps { void (*resize_cb)(void *opaque); } BlockDevOps; +typedef enum { +BDRV_REQ_COPY_ON_READ = 0x1, +BDRV_REQ_ZERO_WRITE = 0x2, +} BdrvRequestFlags; + #define BDRV_O_RDWR0x0002 #define BDRV_O_SNAPSHOT0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ -- 1.7.9.5
Re: [Qemu-devel] [PATCH v2] configure: create fsdev/ directory
Il 21/10/2013 09:35, Michael Tokarev ha scritto: In some cases when building with parallelism (make -jN), build fails because the directory where output files are supposed to be does not exist. In particular, when make decides to build virtfs-proxy-helper.1 before other files in fsdev/, build will fail with the following error: perl -Ww -- BUILDDIR/scripts/texi2pod.pl BUILDDIR/fsdev/virtfs-proxy-helper.texi fsdev/virtfs-proxy-helper.pod pod2man --utf8 --section=1 --center= --release= fsdev/virtfs-proxy-helper.pod fsdev/virtfs-proxy-helper.1 opening fsdev/virtfs-proxy-helper.pod: No such file or directory Signed-off-by: Michael Tokarev m...@tls.msk.ru --- configure |1 + 1 file changed, 1 insertion(+) diff --git a/configure b/configure index 57ee62a..61bb27c 100755 --- a/configure +++ b/configure @@ -4647,6 +4647,7 @@ fi # build tree in object directory in case the source is not in the current directory DIRS=tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests +DIRS=$DIRS fsdev DIRS=$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw DIRS=$DIRS roms/seabios roms/vgabios DIRS=$DIRS qapi-generated Acked-by: Paolo Bonzini pbonz...@redhat.com Please queue it through trivial. Paolo
[Qemu-devel] [PATCHv7 08/17] block/raw: copy BlockLimits on raw_open
Signed-off-by: Peter Lieven p...@kamp.de --- block/raw_bsd.c |1 + 1 file changed, 1 insertion(+) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index b0dd23f..49ac18c 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { bs-sg = bs-file-sg; +bs-bl = bs-file-bl; return 0; } -- 1.7.9.5
[Qemu-devel] [PATCHv7 04/17] block: add logical block provisioning info to BlockDriverInfo
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- include/block/block.h | 16 1 file changed, 16 insertions(+) diff --git a/include/block/block.h b/include/block/block.h index 1f30a56..9c76967 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -18,6 +18,22 @@ typedef struct BlockDriverInfo { /* offset at which the VM state can be saved (0 if not possible) */ int64_t vm_state_offset; bool is_dirty; +/* + * True if unallocated blocks read back as zeroes. This is equivalent + * to the the LBPRZ flag in the SCSI logical block provisioning page. + */ +bool unallocated_blocks_are_zero; +/* + * True if the driver can optimize writing zeroes by unmapping + * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux + * with the difference that in qemu a discard is allowed to silently + * fail. Therefore we have to use bdrv_write_zeroes with the + * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping. + * After this call the driver has to guarantee that the contents read + * back as zero. It is additionally required that the block device is + * opened with BDRV_O_UNMAP flag for this to work. + */ +bool can_write_zeroes_with_unmap; } BlockDriverInfo; typedef struct BlockFragInfo { -- 1.7.9.5
[Qemu-devel] [PATCHv7 03/17] block: introduce BDRV_REQ_MAY_UNMAP request flag
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block-migration.c |3 ++- block.c |4 block/backup.c|2 +- include/block/block.h |7 +++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/block-migration.c b/block-migration.c index 713a8e3..fc4ef93 100644 --- a/block-migration.c +++ b/block-migration.c @@ -780,7 +780,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) } if (flags BLK_MIG_FLAG_ZERO_BLOCK) { -ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0); +ret = bdrv_write_zeroes(bs, addr, nr_sectors, +BDRV_REQ_MAY_UNMAP); } else { buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); diff --git a/block.c b/block.c index 3259429..0d97ce6 100644 --- a/block.c +++ b/block.c @@ -2810,6 +2810,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, { trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); +if (!(bs-open_flags BDRV_O_UNMAP)) { +flags = ~BDRV_REQ_MAY_UNMAP; +} + return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, BDRV_REQ_ZERO_WRITE | flags); } diff --git a/block/backup.c b/block/backup.c index 830a179..0198514 100644 --- a/block/backup.c +++ b/block/backup.c @@ -139,7 +139,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (buffer_is_zero(iov.iov_base, iov.iov_len)) { ret = bdrv_co_write_zeroes(job-target, start * BACKUP_SECTORS_PER_CLUSTER, - n, 0); + n, BDRV_REQ_MAY_UNMAP); } else { ret = bdrv_co_writev(job-target, start * BACKUP_SECTORS_PER_CLUSTER, n, diff --git a/include/block/block.h b/include/block/block.h index 8ba9f0c..1f30a56 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -65,6 +65,13 @@ typedef struct BlockDevOps { typedef enum { BDRV_REQ_COPY_ON_READ = 0x1, BDRV_REQ_ZERO_WRITE = 0x2, +/* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver + * is allowed to optimize a write zeroes request by unmapping (discarding) + * blocks if it is guaranteed that the result will read back as + * zeroes. The flag is only passed to the driver if the block device is + * opened with BDRV_O_UNMAP. + */ +BDRV_REQ_MAY_UNMAP= 0x4, } BdrvRequestFlags; #define BDRV_O_RDWR0x0002 -- 1.7.9.5
Re: [Qemu-devel] qemu 1.6.1
Il 23/10/2013 21:26, Stefan Weil ha scritto: Am 23.10.2013 11:00, schrieb Paolo Bonzini: Il 23/10/2013 08:39, Michael W. Bombardieri ha scritto: Hi, My newly built qemu/win32 binary (v1.6.1) crashes in qemu-system-i386 and qemu-system-x86_64 when booting from an install CD. C:\Program Files\qemuqemu-system-x86_64 -boot d -vnc 0.0.0.0:20 -cdrom NetBSD-6.1.2-amd64.iso Assertion failed: qemu_in_coroutine(), file qemu-coroutine-lock.c, line 99 This application has requested the Runtime to terminate it in an unusual way. Please contact the application's support team for more information. I noticed that qemu-system-sparc still booted OpenBSD/sparc 5.3 install CD correctly. No further info at this stage. Any ideas? It's a known problem that not everyone can reproduce. Please compile with --disable-coroutine-pool on the configure command line. Paolo This patch also helps (at least for me, tested native and on Linux / Wine): http://repo.or.cz/w/qemu/ar7.git/commit/c777d5d62a729fd8b19847aaa0aad3d7a1f73f47 It looks like a compiler problem related to thread local storage (variable current). Ugh. I recently got several bug reports from a Windows user and included patches to fix them in my personal tree http://repo.or.cz/w/qemu/ar7.git. The binaries on qemu.weilnetz.de are based on that tree. Does something like CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, current); also work? Then we can just remove from_. Paolo
[Qemu-devel] [PATCHv7 02/17] block: add flags to bdrv_*_write_zeroes
Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block-migration.c |2 +- block.c | 20 +++- block/backup.c|3 ++- block/qcow2-cluster.c |2 +- block/qcow2.c |2 +- block/qed.c |3 ++- block/raw_bsd.c |5 +++-- block/vmdk.c |3 ++- include/block/block.h |4 ++-- include/block/block_int.h |2 +- qemu-io-cmds.c|2 +- 11 files changed, 27 insertions(+), 21 deletions(-) diff --git a/block-migration.c b/block-migration.c index daf9ec1..713a8e3 100644 --- a/block-migration.c +++ b/block-migration.c @@ -780,7 +780,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) } if (flags BLK_MIG_FLAG_ZERO_BLOCK) { -ret = bdrv_write_zeroes(bs, addr, nr_sectors); +ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0); } else { buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); diff --git a/block.c b/block.c index eb11a07..3259429 100644 --- a/block.c +++ b/block.c @@ -79,7 +79,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, bool is_write); static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, -int64_t sector_num, int nb_sectors); +int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -2384,10 +2384,11 @@ int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov) return bdrv_rwv_co(bs, sector_num, qiov, true, 0); } -int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) { return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, - BDRV_REQ_ZERO_WRITE); + BDRV_REQ_ZERO_WRITE | flags); } int bdrv_pread(BlockDriverState *bs, int64_t offset, @@ -2569,7 +2570,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, if (drv-bdrv_co_write_zeroes buffer_is_zero(bounce_buffer, iov.iov_len)) { ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, - cluster_nb_sectors); + cluster_nb_sectors, 0); } else { /* This does not change the data on the disk, it is not necessary * to flush even in cache=writethrough mode. @@ -2703,7 +2704,7 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, } static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, -int64_t sector_num, int nb_sectors) +int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { BlockDriver *drv = bs-drv; QEMUIOVector qiov; @@ -2715,7 +2716,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, /* First try the efficient write zeroes operation */ if (drv-bdrv_co_write_zeroes) { -ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors); +ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); if (ret != -ENOTSUP) { return ret; } @@ -2770,7 +2771,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, if (ret 0) { /* Do nothing, write notifier decided to fail this request */ } else if (flags BDRV_REQ_ZERO_WRITE) { -ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors); +ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); } else { ret = drv-bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } @@ -2804,12 +2805,13 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, } int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, int nb_sectors, + BdrvRequestFlags flags) { trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, - BDRV_REQ_ZERO_WRITE); + BDRV_REQ_ZERO_WRITE | flags); } /** diff --git a/block/backup.c b/block/backup.c index cad14c9..830a179 100644 --- a/block/backup.c +++ b/block/backup.c @@ -138,7 +138,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (buffer_is_zero(iov.iov_base, iov.iov_len)) { ret = bdrv_co_write_zeroes(job-target, - start * BACKUP_SECTORS_PER_CLUSTER, n); +
Re: [Qemu-devel] [PATCH v3 0/4] Curling: KVM Fault Tolerance
On Wed, Oct 23, 2013 at 1:08 AM, Jules junqing.w...@cs2c.com.cn wrote: On Tue, Oct 15, 2013 at 03:26:19PM +0800, Jules Wang wrote: v2 - v3: * add documentation of new option in qapi-schema. * long option name: ft - fault-tolerant v1 - v2: * cmdline: migrate curling:tcp:address:port - migrate -f tcp:address:port * sender: use QEMU_VM_FILE_MAGIC_FT as the header of the migration to indicate this is a ft migration. * receiver: look for the signature: QEMU_VM_EOF_MAGIC + QEMU_VM_FILE_MAGIC_FT(64bit total) which indicates the end of one migration. -- Jules Wang (4): Curling: add doc Curling: cmdline interface. Curling: the sender Curling: the receiver First of all, thanks for your superb and spot-on comments. It would be helpful to clarify the status of Curling in the cover letter email so reviewers know what to expect. OK, but I'm not quite clear about how to clarify the status, would you pls give me an example? That status would be an explanation of what is current included in the patch, which functionality already works, and what you still plan to implement before the series can be merged. This series does not address I/O or failover. I guess you are aware of the missing topics that I mentioned, here are my thoughts on them: I/O needs to be held back until the destination host has acknowledged receiving the last full migration state. The outside world cannot witness state changes in the guest until the migration state has been successfully transferred to the destination host. Otherwise the guest may appear to act incorrectly when resuming execution from the last snapshot. The time period used by the FT sender thread determines how much latency is added to I/O requests. Yes, there is the latency. That is inevitable. I guess you mean the following situation: If a msg 'hello' is sent to the chat room server just a few seconds before the failover happens, there is a possibility that the msg will be sent to the others twice or be lost. Am I right? Yes, and this is a fundamental requirement for FT. I/O is not idempotent. This means it is not possible to repeat the same operation twice and get the same result. Other fault tolerance solutions include a mechanism to hold back I/O until the checkpoint has been committed by the other host. This way no I/O is repeated and applications will not break during failover. For example, imagine a compare and swap operation. If the VM sends out a compare and swap command to a remote server and fails, then your current patches may send the command again on the other host. The problem is that the command will not succeed the second time and therefore the application fails with an error. Failover functionality is missing from these patches. We cannot simply start executing on the destination host when the migration connection ends. If the guest disk image is located on shared storage then split-brain occurs when a network error terminates the migration connection - will both hosts begin accessing the shared disk? YES I have a simple way to handle that. In one word, the third point --gateway. Both the sender and the receiver check the connectivity to the gateway every X seconds. Let's use A and B stand for whether the sender and the receiver are connected to the gateway respectively. When the connection between the sender and the receiver is down. A B is false. If A is false, the vm instance at the sender will be stopped. If B is false, the vm instance at the receiver will not be started. a.A false B false: 0 vm run b.A false B true: 1 vm run c.A true B false: 1 vm run d.A true B true : 1 vm run (normal case) It becomes complicated when we consider the state transitions in these four states. I suggest adding this feature to libvirt instead of qemu. I agree that the details of the failover (aka quorum and fencing) should be implemented as policies outside QEMU, if possible. Also, there were two presentations about fault tolerance at KVM Forum 2013 a few days ago: https://docs.google.com/file/d/0BzyAwvVlQckebVBrNXdlaTdWVUk/edit https://docs.google.com/file/d/0BzyAwvVlQckeczNUZHRod28yVXc/edit Stefan
Re: [Qemu-devel] [PATCH resend] sdl: Reverse support for video mode setting
This patch has been confirmed by the reporter himself as link below, https://bugs.launchpad.net/qemu/+bug/1216368 It has been on the mailing list for a while, could it be merged? PING... On 09/04/2013 05:07 PM, Lei Li wrote: Currently, If the setting of video mode failed, qemu will exit. It should go back to the previous setting if the new screen resolution failed. This patch fixes LP#1216368, add support to revert to existing surface for the failure of video mode setting. Reported-by: Sascha Krissler sas...@srlabs.de Signed-off-by: Lei Li li...@linux.vnet.ibm.com --- ui/sdl.c | 23 +++ 1 files changed, 19 insertions(+), 4 deletions(-) diff --git a/ui/sdl.c b/ui/sdl.c index 39a42d6..9d8583c 100644 --- a/ui/sdl.c +++ b/ui/sdl.c @@ -86,6 +86,7 @@ static void sdl_update(DisplayChangeListener *dcl, static void do_sdl_resize(int width, int height, int bpp) { int flags; +SDL_Surface *tmp_screen; //printf(resizing to %d %d\n, w, h); @@ -98,12 +99,26 @@ static void do_sdl_resize(int width, int height, int bpp) if (gui_noframe) flags |= SDL_NOFRAME; -real_screen = SDL_SetVideoMode(width, height, bpp, flags); +tmp_screen = SDL_SetVideoMode(width, height, bpp, flags); if (!real_screen) { - fprintf(stderr, Could not open SDL display (%dx%dx%d): %s\n, width, - height, bpp, SDL_GetError()); -exit(1); +if (!tmp_screen) { +fprintf(stderr, Could not open SDL display (%dx%dx%d): %s\n, +width, height, bpp, SDL_GetError()); +exit(1); +} +} else { +/* + * Revert to the previous video mode if the change of resizing or + * resolution failed. + */ +if (!tmp_screen) { +fprintf(stderr, Failed to set SDL display (%dx%dx%d): %s\n, +width, height, bpp, SDL_GetError()); +return; +} } + +real_screen = tmp_screen; } static void sdl_switch(DisplayChangeListener *dcl, -- Lei
Re: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply _EJ0 methods for PCIslots that support hotplug by runtime patching
Il 24/10/2013 14:17, Gonglei (Arei) ha scritto: -Original Message- From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Tuesday, October 22, 2013 4:06 PM To: Gonglei (Arei) Cc: anthony.per...@citrix.com; Ian Campbell; Stefano Stabellini; Gaowei (UVP); Hanweidong (Randy); Huangweidong (Hardware); Luonengjun; Yanqiangjun; xen-de...@lists.xen.org; Fabio Fantoni; qemu-devel@nongnu.org; Markus Armbruster Subject: RE: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply _EJ0 methods for PCIslots that support hotplug by runtime patching On 22.10.13 at 06:08, Gonglei (Arei) arei.gong...@huawei.com wrote: Hi, guys. The new patch has been modified based on the principles you suggested, thank you so much. Last time I test the patch based on the codes of 4.3.0. This time, I found that the system based on the codes of trunk causes the VM reboot again and again, which I have not found out the reason. So i can not test the patch based on the codes of trunk (details in EJ0_ACPI_PCI_Hotplug.patch).. I'm afraid we will need you to figure out that problem first, and then do the verification on -unstable. Even if the code shouldn't be that different from 4.3, we still don't want to apply completely untested stuff. Hi, Jan. We found that the reason that we used a wrong seabios PATH, and the hvmloader can't load the bios.bin. So the VM restart again and again after we start it. That's our fault. Now I test the patch based on the codes of trunk, which works well. The patch has been modified after your suggestion. The patch works well with upstream qemu and doesn't affect the system with traditional qemu. --- a/tools/firmware/hvmloader/ovmf.c +++ b/tools/firmware/hvmloader/ovmf.c @@ -79,7 +79,11 @@ static void ovmf_acpi_build_tables(void) .dsdt_anycpu = dsdt_anycpu, .dsdt_anycpu_len = dsdt_anycpu_len, .dsdt_15cpu = NULL, -.dsdt_15cpu_len = 0 +.dsdt_15cpu_len = 0, +.aml_ej0_name = NULL, +.aml_adr_dword = NULL, +.aml_ej0_name_len = 0, +.aml_adr_dword_len = 0, I don't see why you're adding these. Insurance purposes is that just initialize the struct. Signed-off-by: Gaowei gao.gao...@huawei.com Signed-off-by: gonglei arei.gong...@huawei.com Tested-by: Fabio Fantoni fabio.fant...@m2r.biz Tested on xen unstable with qemu 1.6.1, no problem found for now. Only one question: this patch remove hotplug only from essentials pci device, right? On windows 7 hotplug continues to show: virtio-serial driver, xen pci device driver and hd audio. Thanks for any reply. --- tools/firmware/hvmloader/acpi/Makefile | 37 ++- tools/firmware/hvmloader/acpi/acpi2_0.h| 4 + tools/firmware/hvmloader/acpi/build.c | 21 +- tools/firmware/hvmloader/acpi/dsdt.asl | 1 + tools/firmware/hvmloader/acpi/mk_dsdt.c| 2 + tools/firmware/hvmloader/ovmf.c| 6 +- tools/firmware/hvmloader/rombios.c | 4 + tools/firmware/hvmloader/seabios.c | 8 + tools/firmware/hvmloader/tools/acpi_extract.py | 308 + .../hvmloader/tools/acpi_extract_preprocess.py | 41 +++ 10 files changed, 419 insertions(+), 13 deletions(-) create mode 100644 tools/firmware/hvmloader/tools/acpi_extract.py create mode 100644 tools/firmware/hvmloader/tools/acpi_extract_preprocess.py diff --git a/tools/firmware/hvmloader/acpi/Makefile b/tools/firmware/hvmloader/acpi/Makefile index 2c50851..b96e058 100644 --- a/tools/firmware/hvmloader/acpi/Makefile +++ b/tools/firmware/hvmloader/acpi/Makefile @@ -24,30 +24,45 @@ OBJS = $(patsubst %.c,%.o,$(C_SRC)) CFLAGS += $(CFLAGS_xeninclude) vpath iasl $(PATH) + +.DELETE_ON_ERROR: $(filter dsdt_%.c,$(C_SRC)) + all: acpi.a ssdt_s3.h ssdt_s4.h ssdt_pm.h ssdt_tpm.h: %.h: %.asl iasl iasl -vs -p $* -tc $ - sed -e 's/AmlCode/$*/g' $*.hex $@ + sed -e 's/AmlCode/$*/g' $*.hex $@.tmp + $(call move-if-changed,$@.tmp $@) rm -f $*.hex $*.aml mk_dsdt: mk_dsdt.c $(HOSTCC) $(HOSTCFLAGS) $(CFLAGS_xeninclude) -o $@ mk_dsdt.c dsdt_anycpu_qemu_xen.asl: dsdt.asl mk_dsdt - awk 'NR 1 {print s} {s=$$0}' $ $@ - ./mk_dsdt --dm-version qemu-xen $@ + awk 'NR 1 {print s} {s=$$0}' $ $@.tmp + sed -i 's/AmlCode/dsdt_anycpu_qemu_xen/g' $@.tmp + ./mk_dsdt --dm-version qemu-xen $@.tmp + sed -i 's/aml_ej0_name/dsdt_anycpu_qemu_xen_aml_ej0_name/g' $@.tmp + sed -i 's/aml_adr_dword/dsdt_anycpu_qemu_xen_aml_adr_dword/g' $@.tmp + $(call move-if-changed,$@.tmp $@) # NB. awk invocation is a portable alternative to 'head -n -1' dsdt_%cpu.asl: dsdt.asl mk_dsdt - awk 'NR 1 {print s} {s=$$0}' $ $@ - ./mk_dsdt --maxcpu $* $@ + awk 'NR 1 {print s} {s=$$0}' $ $@.tmp + sed -i 's/AmlCode/dsdt_$*cpu/g' $@.tmp + ./mk_dsdt --maxcpu $* $@.tmp + $(call
[Qemu-devel] [PULL for-1.7 0/7] usb fixes
Hi, Here comes a collection of bugfixes for xhci and usb-host, mostly related to usb3 streams. please pull, Gerd The following changes since commit fc8ead74674b7129e8f31c2595c76658e5622197: Merge remote-tracking branch 'qemu-kvm/uq/master' into staging (2013-10-18 10:03:24 -0700) are available in the git repository at: git://git.kraxel.org/qemu usb.91 for you to fetch changes up to c90daa1c109348099088c1cc954c1e9f3392ae03: usb-hcd-xhci: Update endpoint context dequeue pointer for streams too (2013-10-22 16:28:49 +0200) Hans de Goede (7): usb-host-libusb: Fix reset handling usb-host-libusb: Configuration 0 may be a valid configuration usb-host-libusb: Detach kernel drivers earlier usb-hcd-xhci: Remove unused sstreamsm member from XHCIStreamContext usb-hcd-xhci: Remove unused cancelled member from XHCITransfer usb-hcd-xhci: Report completion of active transfer with CC_STOPPED on ep stop usb-hcd-xhci: Update endpoint context dequeue pointer for streams too hw/usb/hcd-xhci.c| 50 ++ hw/usb/host-libusb.c | 26 +- 2 files changed, 39 insertions(+), 37 deletions(-)
[Qemu-devel] [PATCH 7/7] usb-hcd-xhci: Update endpoint context dequeue pointer for streams too
From: Hans de Goede hdego...@redhat.com With streams the endpoint context dequeue pointer should point to the dequeue value for the currently active stream. At least Linux guests expect it to point to value set by an set_ep_dequeue upon completion of the set_ep_dequeue (before kicking the ep). Otherwise the Linux kernel will complain (and things won't work): xhci_hcd :00:05.0: Mismatch between completed Set TR Deq Ptr command xHCI internal state. xhci_hcd :00:05.0: ep deq seg = 8800366f0880, deq ptr = 8800366ec010 Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb/hcd-xhci.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 0131151..fa27299 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -1187,6 +1187,7 @@ static XHCIStreamContext *xhci_find_stream(XHCIEPContext *epctx, static void xhci_set_ep_state(XHCIState *xhci, XHCIEPContext *epctx, XHCIStreamContext *sctx, uint32_t state) { +XHCIRing *ring = NULL; uint32_t ctx[5]; uint32_t ctx2[2]; @@ -1197,6 +1198,7 @@ static void xhci_set_ep_state(XHCIState *xhci, XHCIEPContext *epctx, /* update ring dequeue ptr */ if (epctx-nr_pstreams) { if (sctx != NULL) { +ring = sctx-ring; xhci_dma_read_u32s(xhci, sctx-pctx, ctx2, sizeof(ctx2)); ctx2[0] = 0xe; ctx2[0] |= sctx-ring.dequeue | sctx-ring.ccs; @@ -1204,8 +1206,12 @@ static void xhci_set_ep_state(XHCIState *xhci, XHCIEPContext *epctx, xhci_dma_write_u32s(xhci, sctx-pctx, ctx2, sizeof(ctx2)); } } else { -ctx[2] = epctx-ring.dequeue | epctx-ring.ccs; -ctx[3] = (epctx-ring.dequeue 16) 16; +ring = epctx-ring; +} +if (ring) { +ctx[2] = ring-dequeue | ring-ccs; +ctx[3] = (ring-dequeue 16) 16; + DPRINTF(xhci: set epctx: DMA_ADDR_FMT state=%d dequeue=%08x%08x\n, epctx-pctx, state, ctx[3], ctx[2]); } -- 1.8.3.1
[Qemu-devel] [PATCH 3/7] usb-host-libusb: Detach kernel drivers earlier
From: Hans de Goede hdego...@redhat.com If we detach the kernel drivers on the first set_config, then they will be still attached when the device gets its initial reset. Causing the drivers to re-initialize the device after the reset, dirtying the device state. Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb/host-libusb.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 35bae55..fd320cd 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -137,6 +137,7 @@ static QTAILQ_HEAD(, USBHostDevice) hostdevs = static void usb_host_auto_check(void *unused); static void usb_host_release_interfaces(USBHostDevice *s); static void usb_host_nodev(USBHostDevice *s); +static void usb_host_detach_kernel(USBHostDevice *s); static void usb_host_attach_kernel(USBHostDevice *s); /* */ @@ -787,10 +788,13 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev) goto fail; } -libusb_get_device_descriptor(dev, s-ddesc); s-dev = dev; s-bus_num = bus_num; s-addr= addr; + +usb_host_detach_kernel(s); + +libusb_get_device_descriptor(dev, s-ddesc); usb_host_get_port(s-dev, s-port, sizeof(s-port)); usb_ep_init(udev); @@ -1051,7 +1055,6 @@ static void usb_host_set_config(USBHostDevice *s, int config, USBPacket *p) trace_usb_host_set_config(s-bus_num, s-addr, config); usb_host_release_interfaces(s); -usb_host_detach_kernel(s); rc = libusb_set_configuration(s-dh, config); if (rc != 0) { usb_host_libusb_error(libusb_set_configuration, rc); -- 1.8.3.1
[Qemu-devel] [PATCH 4/7] usb-hcd-xhci: Remove unused sstreamsm member from XHCIStreamContext
From: Hans de Goede hdego...@redhat.com Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb/hcd-xhci.c | 9 - 1 file changed, 9 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 469c24d..e078c50 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -374,7 +374,6 @@ struct XHCIStreamContext { dma_addr_t pctx; unsigned int sct; XHCIRing ring; -XHCIStreamContext *sstreams; }; struct XHCIEPContext { @@ -1133,7 +1132,6 @@ static void xhci_reset_streams(XHCIEPContext *epctx) for (i = 0; i epctx-nr_pstreams; i++) { epctx-pstreams[i].sct = -1; -g_free(epctx-pstreams[i].sstreams); } } @@ -1146,15 +1144,8 @@ static void xhci_alloc_streams(XHCIEPContext *epctx, dma_addr_t base) static void xhci_free_streams(XHCIEPContext *epctx) { -int i; - assert(epctx-pstreams != NULL); -if (!epctx-lsa) { -for (i = 0; i epctx-nr_pstreams; i++) { -g_free(epctx-pstreams[i].sstreams); -} -} g_free(epctx-pstreams); epctx-pstreams = NULL; epctx-nr_pstreams = 0; -- 1.8.3.1
[Qemu-devel] [PATCH 6/7] usb-hcd-xhci: Report completion of active transfer with CC_STOPPED on ep stop
From: Hans de Goede hdego...@redhat.com As we should per the XHCI spec 4.6.9 Stop Endpoint. Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb/hcd-xhci.c | 26 ++ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 7cf89ce..0131151 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -505,6 +505,7 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid, unsigned int streamid); static TRBCCode xhci_disable_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid); +static void xhci_xfer_report(XHCITransfer *xfer); static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v); static void xhci_write_event(XHCIState *xhci, XHCIEvent *event, int v); static USBEndpoint *xhci_epid_to_usbep(XHCIState *xhci, @@ -1302,10 +1303,15 @@ static TRBCCode xhci_enable_ep(XHCIState *xhci, unsigned int slotid, return CC_SUCCESS; } -static int xhci_ep_nuke_one_xfer(XHCITransfer *t) +static int xhci_ep_nuke_one_xfer(XHCITransfer *t, TRBCCode report) { int killed = 0; +if (report (t-running_async || t-running_retry)) { +t-status = report; +xhci_xfer_report(t); +} + if (t-running_async) { usb_cancel_packet(t-packet); t-running_async = 0; @@ -1318,6 +1324,7 @@ static int xhci_ep_nuke_one_xfer(XHCITransfer *t) timer_del(epctx-kick_timer); } t-running_retry = 0; +killed = 1; } if (t-trbs) { g_free(t-trbs); @@ -1330,7 +1337,7 @@ static int xhci_ep_nuke_one_xfer(XHCITransfer *t) } static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned int slotid, - unsigned int epid) + unsigned int epid, TRBCCode report) { XHCISlot *slot; XHCIEPContext *epctx; @@ -1351,7 +1358,10 @@ static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned int slotid, xferi = epctx-next_xfer; for (i = 0; i TD_QUEUE; i++) { -killed += xhci_ep_nuke_one_xfer(epctx-transfers[xferi]); +killed += xhci_ep_nuke_one_xfer(epctx-transfers[xferi], report); +if (killed) { +report = 0; /* Only report once */ +} epctx-transfers[xferi].packet.ep = NULL; xferi = (xferi + 1) % TD_QUEUE; } @@ -1381,7 +1391,7 @@ static TRBCCode xhci_disable_ep(XHCIState *xhci, unsigned int slotid, return CC_SUCCESS; } -xhci_ep_nuke_xfers(xhci, slotid, epid); +xhci_ep_nuke_xfers(xhci, slotid, epid, 0); epctx = slot-eps[epid-1]; @@ -1423,7 +1433,7 @@ static TRBCCode xhci_stop_ep(XHCIState *xhci, unsigned int slotid, return CC_EP_NOT_ENABLED_ERROR; } -if (xhci_ep_nuke_xfers(xhci, slotid, epid) 0) { +if (xhci_ep_nuke_xfers(xhci, slotid, epid, CC_STOPPED) 0) { fprintf(stderr, xhci: FIXME: endpoint stopped w/ xfers running, data might be lost\n); } @@ -1468,7 +1478,7 @@ static TRBCCode xhci_reset_ep(XHCIState *xhci, unsigned int slotid, return CC_CONTEXT_STATE_ERROR; } -if (xhci_ep_nuke_xfers(xhci, slotid, epid) 0) { +if (xhci_ep_nuke_xfers(xhci, slotid, epid, 0) 0) { fprintf(stderr, xhci: FIXME: endpoint reset w/ xfers running, data might be lost\n); } @@ -2461,7 +2471,7 @@ static void xhci_detach_slot(XHCIState *xhci, USBPort *uport) for (ep = 0; ep 31; ep++) { if (xhci-slots[slot].eps[ep]) { -xhci_ep_nuke_xfers(xhci, slot+1, ep+1); +xhci_ep_nuke_xfers(xhci, slot + 1, ep + 1, 0); } } xhci-slots[slot].uport = NULL; @@ -3276,7 +3286,7 @@ static void xhci_complete(USBPort *port, USBPacket *packet) XHCITransfer *xfer = container_of(packet, XHCITransfer, packet); if (packet-status == USB_RET_REMOVE_FROM_QUEUE) { -xhci_ep_nuke_one_xfer(xfer); +xhci_ep_nuke_one_xfer(xfer, 0); return; } xhci_complete_packet(xfer); -- 1.8.3.1
[Qemu-devel] [PATCH 1/7] usb-host-libusb: Fix reset handling
From: Hans de Goede hdego...@redhat.com The guest will issue an initial device reset when the device is attached, but since the current usb-host-libusb code only actually does the reset when udev-configuration != 0, and on attach the device is not yet configured, the reset gets ignored. This means that the device gets passed to the guest in an unknown state, which is not good. The udev-configuration check is there because of the release / claim interfaces done around the libusb_device_reset call, but these are not necessary. If interfaces are claimed when libusb_device_reset gets called libusb will release + reclaim them itself. The usb_host_ep_update call also is not necessary. If the reset succeeds the original config and interface alt settings will be restored. Last if the reset fails, that means the device has either disconnected or morphed into an another device and has been completely re-enumerated, so it is treated by the host as a new device and our handle is invalid, so on reset failure we need to call usb_host_nodev(). Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb/host-libusb.c | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 128955d..428c7c5 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -1256,16 +1256,14 @@ static void usb_host_flush_ep_queue(USBDevice *dev, USBEndpoint *ep) static void usb_host_handle_reset(USBDevice *udev) { USBHostDevice *s = USB_HOST_DEVICE(udev); +int rc; trace_usb_host_reset(s-bus_num, s-addr); -if (udev-configuration == 0) { -return; +rc = libusb_reset_device(s-dh); +if (rc != 0) { +usb_host_nodev(s); } -usb_host_release_interfaces(s); -libusb_reset_device(s-dh); -usb_host_claim_interfaces(s, 0); -usb_host_ep_update(s); } /* -- 1.8.3.1
[Qemu-devel] [PATCH 2/7] usb-host-libusb: Configuration 0 may be a valid configuration
From: Hans de Goede hdego...@redhat.com Quoting from: linux/Documentation/ABI/stable/sysfs-bus-usb: Note that some devices, in violation of the USB spec, have a configuration with a value equal to 0. Writing 0 to bConfigurationValue for these devices will install that configuration, rather then unconfigure the device. So don't compare the configuration value against 0 to check for unconfigured devices, instead check for a LIBUSB_ERROR_NOT_FOUND return from libusb_get_active_config_descriptor(). Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb/host-libusb.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 428c7c5..35bae55 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -992,15 +992,14 @@ static int usb_host_claim_interfaces(USBHostDevice *s, int configuration) udev-ninterfaces = 0; udev-configuration = 0; -if (configuration == 0) { -/* address state - ignore */ -return USB_RET_SUCCESS; -} - usb_host_detach_kernel(s); rc = libusb_get_active_config_descriptor(s-dev, conf); if (rc != 0) { +if (rc == LIBUSB_ERROR_NOT_FOUND) { +/* address state - ignore */ +return USB_RET_SUCCESS; +} return USB_RET_STALL; } -- 1.8.3.1
[Qemu-devel] [PATCH 5/7] usb-hcd-xhci: Remove unused cancelled member from XHCITransfer
From: Hans de Goede hdego...@redhat.com Since qemu's USB model is geared towards emulated devices cancellation is instanteneous, so no need to wait for cancellation to complete, as such there is no wait for cancellation code, and the cancelled bool as well as the bogus comment about it can be removed. Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb/hcd-xhci.c | 5 - 1 file changed, 5 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index e078c50..7cf89ce 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -346,7 +346,6 @@ typedef struct XHCITransfer { QEMUSGList sgl; bool running_async; bool running_retry; -bool cancelled; bool complete; bool int_req; unsigned int iso_pkts; @@ -1310,8 +1309,6 @@ static int xhci_ep_nuke_one_xfer(XHCITransfer *t) if (t-running_async) { usb_cancel_packet(t-packet); t-running_async = 0; -t-cancelled = 1; -DPRINTF(xhci: cancelling transfer, waiting for it to complete\n); killed = 1; } if (t-running_retry) { @@ -1728,14 +1725,12 @@ static int xhci_complete_packet(XHCITransfer *xfer) xfer-running_async = 1; xfer-running_retry = 0; xfer-complete = 0; -xfer-cancelled = 0; return 0; } else if (xfer-packet.status == USB_RET_NAK) { trace_usb_xhci_xfer_nak(xfer); xfer-running_async = 0; xfer-running_retry = 1; xfer-complete = 0; -xfer-cancelled = 0; return 0; } else { xfer-running_async = 0; -- 1.8.3.1
Re: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply _EJ0 methods for PCIslots that support hotplug by runtime patching
-Original Message- From: Fabio Fantoni [mailto:fabio.fant...@m2r.biz] Sent: Thursday, October 24, 2013 8:58 PM To: Gonglei (Arei); Jan Beulich Cc: anthony.per...@citrix.com; Ian Campbell; Stefano Stabellini; Gaowei (UVP); Hanweidong (Randy); Huangweidong (Hardware); Luonengjun; Yanqiangjun; xen-de...@lists.xen.org; qemu-devel@nongnu.org; Markus Armbruster Subject: Re: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply _EJ0 methods for PCIslots that support hotplug by runtime patching Il 24/10/2013 14:17, Gonglei (Arei) ha scritto: -Original Message- From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Tuesday, October 22, 2013 4:06 PM To: Gonglei (Arei) Cc: anthony.per...@citrix.com; Ian Campbell; Stefano Stabellini; Gaowei (UVP); Hanweidong (Randy); Huangweidong (Hardware); Luonengjun; Yanqiangjun; xen-de...@lists.xen.org; Fabio Fantoni; qemu-devel@nongnu.org; Markus Armbruster Subject: RE: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply _EJ0 methods for PCIslots that support hotplug by runtime patching On 22.10.13 at 06:08, Gonglei (Arei) arei.gong...@huawei.com wrote: Hi, guys. The new patch has been modified based on the principles you suggested, thank you so much. Last time I test the patch based on the codes of 4.3.0. This time, I found that the system based on the codes of trunk causes the VM reboot again and again, which I have not found out the reason. So i can not test the patch based on the codes of trunk (details in EJ0_ACPI_PCI_Hotplug.patch).. I'm afraid we will need you to figure out that problem first, and then do the verification on -unstable. Even if the code shouldn't be that different from 4.3, we still don't want to apply completely untested stuff. Hi, Jan. We found that the reason that we used a wrong seabios PATH, and the hvmloader can't load the bios.bin. So the VM restart again and again after we start it. That's our fault. Now I test the patch based on the codes of trunk, which works well. The patch has been modified after your suggestion. The patch works well with upstream qemu and doesn't affect the system with traditional qemu. --- a/tools/firmware/hvmloader/ovmf.c +++ b/tools/firmware/hvmloader/ovmf.c @@ -79,7 +79,11 @@ static void ovmf_acpi_build_tables(void) .dsdt_anycpu = dsdt_anycpu, .dsdt_anycpu_len = dsdt_anycpu_len, .dsdt_15cpu = NULL, -.dsdt_15cpu_len = 0 +.dsdt_15cpu_len = 0, +.aml_ej0_name = NULL, +.aml_adr_dword = NULL, +.aml_ej0_name_len = 0, +.aml_adr_dword_len = 0, I don't see why you're adding these. Insurance purposes is that just initialize the struct. Signed-off-by: Gaowei gao.gao...@huawei.com Signed-off-by: gonglei arei.gong...@huawei.com Tested-by: Fabio Fantoni fabio.fant...@m2r.biz Tested on xen unstable with qemu 1.6.1, no problem found for now. Only one question: this patch remove hotplug only from essentials pci device, right? On windows 7 hotplug continues to show: virtio-serial driver, xen pci device driver and hd audio. It depends on the property of hotplug of pci devices' class emulated by upstream qemu. If you set k-no_hotplug = 1 in class_init function for those pci devices, which will not be shown in the Windows guest any more. Thanks for any reply. Best regards, -Gonglei
[Qemu-devel] [PATCH 1/1] audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* second
From: Hans de Goede hdego...@redhat.com Now that we no longer have MIN_REARM_TIMER_NS a bug in the audio subsys has clearly shown it self by trying to make a timer fire every nano second. Note we have a similar problem in 1.6, 1.5 and older but there MIN_REARM_TIMER_NS limits the wakeups caused by audio being active to 4000 times / second. This still causes a host cpu load of 50 % for simply playing audio, where as with this patch git master is at 13%, so we should backport this to 1.5 and 1.6 too. Note this will not apply to 1.5 and 1.6 as is. Cc: qemu-sta...@nongnu.org Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- audio/audio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/audio/audio.c b/audio/audio.c index af4cdf6..b3db679 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -1124,7 +1124,8 @@ static int audio_is_timer_needed (void) static void audio_reset_timer (AudioState *s) { if (audio_is_timer_needed ()) { -timer_mod (s-ts, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1); +timer_mod (s-ts, +qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks); } else { timer_del (s-ts); -- 1.8.3.1
[Qemu-devel] [PULL for-1.7 0/1] audio bugfix
Hi, Single audio bugfix for 1.7. please pull, Gerd The following changes since commit fc8ead74674b7129e8f31c2595c76658e5622197: Merge remote-tracking branch 'qemu-kvm/uq/master' into staging (2013-10-18 10:03:24 -0700) are available in the git repository at: git://git.kraxel.org/qemu audio.2 for you to fetch changes up to b4350deed67b95651896ddb60cf9f765093a4848: audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* second (2013-10-23 10:37:27 +0200) Hans de Goede (1): audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* second audio/audio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
Re: [Qemu-devel] [PATCH 01/17] rename is_active to is_block_active
Il 22/10/2013 04:25, Lei Li ha scritto: is_active is used to identify block migration, rename to is_block_active to make it more clear. No, is_active is used to identify whether a set of SaveVMHandlers is active. The default is true, so only block migration is using it. But we could use it in the future for other features (probably using migration capabilities instead of a flag as is the case for block). Paolo Signed-off-by: Lei Li li...@linux.vnet.ibm.com --- block-migration.c |2 +- include/migration/vmstate.h |2 +- savevm.c| 16 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/block-migration.c b/block-migration.c index daf9ec1..b637695 100644 --- a/block-migration.c +++ b/block-migration.c @@ -834,7 +834,7 @@ SaveVMHandlers savevm_block_handlers = { .save_live_pending = block_save_pending, .load_state = block_load, .cancel = block_migration_cancel, -.is_active = block_is_active, +.is_block_active = block_is_active, }; void blk_mig_init(void) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 9d09e60..c634d65 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -42,7 +42,7 @@ typedef struct SaveVMHandlers { int (*save_live_complete)(QEMUFile *f, void *opaque); /* This runs both outside and inside the iothread lock. */ -bool (*is_active)(void *opaque); +bool (*is_block_active)(void *opaque); /* This runs outside the iothread lock in the migration case, and * within the lock in the savevm case. The callback had better only diff --git a/savevm.c b/savevm.c index 2f631d4..56b8643 100644 --- a/savevm.c +++ b/savevm.c @@ -1867,8 +1867,8 @@ void qemu_savevm_state_begin(QEMUFile *f, if (!se-ops || !se-ops-save_live_setup) { continue; } -if (se-ops se-ops-is_active) { -if (!se-ops-is_active(se-opaque)) { +if (se-ops se-ops-is_block_active) { +if (!se-ops-is_block_active(se-opaque)) { continue; } } @@ -1907,8 +1907,8 @@ int qemu_savevm_state_iterate(QEMUFile *f) if (!se-ops || !se-ops-save_live_iterate) { continue; } -if (se-ops se-ops-is_active) { -if (!se-ops-is_active(se-opaque)) { +if (se-ops se-ops-is_block_active) { +if (!se-ops-is_block_active(se-opaque)) { continue; } } @@ -1948,8 +1948,8 @@ void qemu_savevm_state_complete(QEMUFile *f) if (!se-ops || !se-ops-save_live_complete) { continue; } -if (se-ops se-ops-is_active) { -if (!se-ops-is_active(se-opaque)) { +if (se-ops se-ops-is_block_active) { +if (!se-ops-is_block_active(se-opaque)) { continue; } } @@ -2002,8 +2002,8 @@ uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size) if (!se-ops || !se-ops-save_live_pending) { continue; } -if (se-ops se-ops-is_active) { -if (!se-ops-is_active(se-opaque)) { +if (se-ops se-ops-is_block_active) { +if (!se-ops-is_block_active(se-opaque)) { continue; } }
Re: [Qemu-devel] [PATCH 02/17] QAPI: introduce magration capability unix_page_flipping
Il 22/10/2013 04:25, Lei Li ha scritto: +# @unix-page-flipping: If enabled, QEMU will support localhost migration. This +# feature allows live upgrade of a running QEMU instance by doing localhost +# migration with page flipping. It requires the source and destination +# are both on localhost. Disabled by default. (since 1.7) +# If enabled, QEMU can optimize migration when the destination is a QEMU process that runs on the same host as the source (as is the case for live upgrade). If the migration transport is a Unix socket, QEMU will flip RAM pages directly to the destination, so that memory is only allocated twice for the source and destination processes. Disabled by default. (since 1.8) Paolo
Re: [Qemu-devel] [PATCH 03/17] migration: add migrate_unix_page_flipping()
Il 22/10/2013 04:25, Lei Li ha scritto: Add migrate_unix_page_flipping() to check if MIGRATION_CAPABILITY_UNIX_PAGE_FLIPPING is enabled. Signed-off-by: Lei Li li...@linux.vnet.ibm.com --- include/migration/migration.h |3 +++ migration.c |9 + 2 files changed, 12 insertions(+), 0 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index 140e6b4..7e5d01a 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -131,10 +131,13 @@ void migrate_add_blocker(Error *reason); void migrate_del_blocker(Error *reason); bool migrate_rdma_pin_all(void); + bool migrate_zero_blocks(void); bool migrate_auto_converge(void); +bool migrate_unix_page_flipping(void); + int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst, int dlen); int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); diff --git a/migration.c b/migration.c index 2b1ab20..4ac466b 100644 --- a/migration.c +++ b/migration.c @@ -541,6 +541,15 @@ int64_t migrate_xbzrle_cache_size(void) return s-xbzrle_cache_size; } +bool migrate_unix_page_flipping(void) +{ +MigrationState *s; + +s = migrate_get_current(); + +return s-enabled_capabilities[MIGRATION_CAPABILITY_UNIX_PAGE_FLIPPING]; +} + /* migration thread support */ static void *migration_thread(void *opaque) Reviewed-by: Paolo Bonzini pbonz...@redhat.com
Re: [Qemu-devel] [PATCH 04/17] qmp-command.hx: add missing docs for migration capabilites
Il 22/10/2013 04:25, Lei Li ha scritto: Signed-off-by: Lei Li li...@linux.vnet.ibm.com --- qmp-commands.hx |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/qmp-commands.hx b/qmp-commands.hx index fba15cd..650a3a8 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2898,6 +2898,10 @@ migrate-set-capabilities Enable/Disable migration capabilities - xbzrle: XBZRLE support +- x-rdma-pin-all: RDMA support Pin all pages during RDMA support. +- zero-blocks: zero-blocks support Compress zero blocks during block migration. +- auto-converge: Auto converge support Block VCPU to help convergence of migration +- unix-page-flipping: Page flipping support Page flipping for live QEMU upgrade Arguments: @@ -2922,6 +2926,10 @@ Query current migration capabilities - capabilities: migration capabilities state - xbzrle : XBZRLE state (json-bool) + - x-rdma-pin-all: RDMA state (json-bool) + - zero-blocks: zero-blocks state (json-bool) + - auto-converge: Auto converge state (json-bool) + - unix-page-flipping: Page flipping state (json-bool) Arguments: Please separate page flipping in a separate patch and send it for 1.7. Once you do that, patches 2/3/4 can be merged. Paolo
[Qemu-devel] [Bug 1243968] [NEW] VMware ESXi on QEmu Kernel Panic
Public bug reported: I attempted to install ESXi 5.5 (the free version) into a QEmu 1.6.1 VM. The guest OS does have the svm capabilities, but it appears VMware is trying to do some kind of hypercall that crashes the guest. There is more information here: https://communities.vmware.com/message/2297382 It seems to me that this stubbed feature should just be disabled if it is unusable. Or at the very least I should be able to disable it at run- time with a command-line argument. Is there some way to disable all the hypervisor features that makes it very obvious to a guest os that it is running inside a VM? It would be great if I could install a software and it would actually work (even if it's slow with those features disabled). FYI, my guest OS capabilities are: # cat /proc/cpuinfo processor : 0 vendor_id : AuthenticAMD cpu family : 6 model : 2 model name : QEMU Virtual CPU version 1.5.3 stepping: 3 microcode : 0x165 cpu MHz : 1999.999 cache size : 512 KB fpu : yes fpu_exception : yes cpuid level : 4 wp : yes flags : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx lm nopl pni cx16 popcnt hypervisor lahf_lm svm abm sse4a bogomips: 3999.99 TLB size: 1024 4K pages clflush size: 64 cache_alignment : 64 address sizes : 40 bits physical, 48 bits virtual power management: ** Affects: qemu Importance: Undecided Status: Confirmed ** Tags: esxi hypercall vmware ** Changed in: qemu Status: New = Confirmed -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1243968 Title: VMware ESXi on QEmu Kernel Panic Status in QEMU: Confirmed Bug description: I attempted to install ESXi 5.5 (the free version) into a QEmu 1.6.1 VM. The guest OS does have the svm capabilities, but it appears VMware is trying to do some kind of hypercall that crashes the guest. There is more information here: https://communities.vmware.com/message/2297382 It seems to me that this stubbed feature should just be disabled if it is unusable. Or at the very least I should be able to disable it at run-time with a command-line argument. Is there some way to disable all the hypervisor features that makes it very obvious to a guest os that it is running inside a VM? It would be great if I could install a software and it would actually work (even if it's slow with those features disabled). FYI, my guest OS capabilities are: # cat /proc/cpuinfo processor : 0 vendor_id : AuthenticAMD cpu family : 6 model : 2 model name : QEMU Virtual CPU version 1.5.3 stepping: 3 microcode : 0x165 cpu MHz : 1999.999 cache size : 512 KB fpu : yes fpu_exception : yes cpuid level : 4 wp : yes flags : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx lm nopl pni cx16 popcnt hypervisor lahf_lm svm abm sse4a bogomips: 3999.99 TLB size: 1024 4K pages clflush size: 64 cache_alignment : 64 address sizes : 40 bits physical, 48 bits virtual power management: To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1243968/+subscriptions