[Qemu-devel] [PATCHv2] qemu-img: add special exit code if bdrv_check is not supported

2013-10-24 Thread Peter Lieven
currently it is not possible to distinguish by exitcode if there
has been an error or if bdrv_check is not supported by the image
format. Change the exitcode from 1 to 63 for the latter case.

Signed-off-by: Peter Lieven p...@kamp.de
---
v1-v2: As Eric suggested changed the exitcode from 255 to 63.

 qemu-img.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qemu-img.c b/qemu-img.c
index 926f0a0..bf3fb4f 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -607,7 +607,7 @@ static int img_check(int argc, char **argv)
 if (output_format == OFORMAT_HUMAN) {
 error_report(This image format does not support checks);
 }
-ret = 1;
+ret = 63;
 goto fail;
 }
 
-- 
1.7.9.5




Re: [Qemu-devel] [PATCHv2] qemu-img: add special exit code if bdrv_check is not supported

2013-10-24 Thread Eric Blake
On 10/24/2013 07:53 AM, Peter Lieven wrote:
 currently it is not possible to distinguish by exitcode if there
 has been an error or if bdrv_check is not supported by the image
 format. Change the exitcode from 1 to 63 for the latter case.
 
 Signed-off-by: Peter Lieven p...@kamp.de
 ---
 v1-v2: As Eric suggested changed the exitcode from 255 to 63.
 
  qemu-img.c |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

Should the man page document this exit status?  Then again, it doesn't
document ANY exit status, so that could be a separate patch.

Reviewed-by: Eric Blake ebl...@redhat.com

 
 diff --git a/qemu-img.c b/qemu-img.c
 index 926f0a0..bf3fb4f 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
 @@ -607,7 +607,7 @@ static int img_check(int argc, char **argv)
  if (output_format == OFORMAT_HUMAN) {
  error_report(This image format does not support checks);
  }
 -ret = 1;
 +ret = 63;
  goto fail;
  }
  
 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [PATCH v5 6/8] sheepdog: make add_aio_request and send_aioreq void functions

2013-10-24 Thread MORITA Kazutaka
These functions no longer return errors.  We can make them void
functions and simplify the codes.

Reviewed-by: Liu Yuan namei.u...@gmail.com
Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
---
 block/sheepdog.c |   66 ++
 1 file changed, 17 insertions(+), 49 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 3e98291..5846ac4 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -606,10 +606,10 @@ static int do_req(int sockfd, SheepdogReq *hdr, void 
*data,
 return srco.ret;
 }
 
-static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
+static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, bool create,
enum AIOCBState aiocb_type);
-static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
+static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
 static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char 
*tag);
 static int get_sheep_fd(BDRVSheepdogState *s);
 static void co_write_request(void *opaque);
@@ -635,22 +635,14 @@ static void coroutine_fn 
send_pending_req(BDRVSheepdogState *s, uint64_t oid)
 {
 AIOReq *aio_req;
 SheepdogAIOCB *acb;
-int ret;
 
 while ((aio_req = find_pending_req(s, oid)) != NULL) {
 acb = aio_req-aiocb;
 /* move aio_req from pending list to inflight one */
 QLIST_REMOVE(aio_req, aio_siblings);
 QLIST_INSERT_HEAD(s-inflight_aio_head, aio_req, aio_siblings);
-ret = add_aio_request(s, aio_req, acb-qiov-iov,
-  acb-qiov-niov, false, acb-aiocb_type);
-if (ret  0) {
-error_report(add_aio_request is failed);
-free_aio_req(s, aio_req);
-if (!acb-nr_pending) {
-sd_finish_aiocb(acb);
-}
-}
+add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, false,
+acb-aiocb_type);
 }
 }
 
@@ -813,11 +805,8 @@ static void coroutine_fn aio_read_response(void *opaque)
 } else {
 aio_req-oid = vid_to_vdi_oid(s-inode.vdi_id);
 }
-ret = resend_aioreq(s, aio_req);
-if (ret == SD_RES_SUCCESS) {
-goto out;
-}
-/* fall through */
+resend_aioreq(s, aio_req);
+goto out;
 default:
 acb-ret = -EIO;
 error_report(%s, sd_strerror(rsp.result));
@@ -1066,7 +1055,7 @@ out:
 return ret;
 }
 
-static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
+static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, bool create,
enum AIOCBState aiocb_type)
 {
@@ -1144,8 +1133,6 @@ out:
 qemu_aio_set_fd_handler(s-fd, co_read_response, NULL, s);
 s-co_send = NULL;
 qemu_co_mutex_unlock(s-lock);
-
-return 0;
 }
 
 static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
@@ -1248,7 +1235,7 @@ out:
 return ret;
 }
 
-static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
+static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
 {
 SheepdogAIOCB *acb = aio_req-aiocb;
 bool create = false;
@@ -1273,7 +1260,7 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState 
*s, AIOReq *aio_req)
 DPRINTF(simultaneous CoW to % PRIx64 \n, aio_req-oid);
 QLIST_REMOVE(aio_req, aio_siblings);
 QLIST_INSERT_HEAD(s-pending_aio_head, aio_req, aio_siblings);
-return SD_RES_SUCCESS;
+return;
 }
 }
 
@@ -1283,13 +1270,13 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState 
*s, AIOReq *aio_req)
 }
 out:
 if (is_data_obj(aio_req-oid)) {
-return add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov,
-   create, acb-aiocb_type);
+add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, create,
+acb-aiocb_type);
 } else {
 struct iovec iov;
 iov.iov_base = s-inode;
 iov.iov_len = sizeof(s-inode);
-return add_aio_request(s, aio_req, iov, 1, false, AIOCB_WRITE_UDATA);
+add_aio_request(s, aio_req, iov, 1, false, AIOCB_WRITE_UDATA);
 }
 }
 
@@ -1689,7 +1676,6 @@ static int sd_truncate(BlockDriverState *bs, int64_t 
offset)
  */
 static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
 {
-int ret;
 BDRVSheepdogState *s = acb-common.bs-opaque;
 struct iovec iov;
 AIOReq *aio_req;
@@ -1711,18 +1697,13 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB 
*acb)
 aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s-inode.vdi_id),
 data_len, offset, 0, 0, offset);
 

[Qemu-devel] [PATCH v5 4/8] coroutine: add co_aio_sleep_ns() to allow sleep in block drivers

2013-10-24 Thread MORITA Kazutaka
This helper function behaves similarly to co_sleep_ns(), but the
sleeping coroutine will be resumed when using qemu_aio_wait().

Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
---
 include/block/coroutine.h |9 +
 qemu-coroutine-sleep.c|   14 ++
 2 files changed, 23 insertions(+)

diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index 4232569..4d5c0cf 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -216,6 +216,15 @@ void qemu_co_rwlock_unlock(CoRwlock *lock);
 void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns);
 
 /**
+ * Yield the coroutine for a given duration
+ *
+ * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
+ * resumed when using qemu_aio_wait().
+ */
+void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
+  int64_t ns);
+
+/**
  * Yield until a file descriptor becomes readable
  *
  * Note that this function clobbers the handlers for the file descriptor.
diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c
index f6db978..ad78fba 100644
--- a/qemu-coroutine-sleep.c
+++ b/qemu-coroutine-sleep.c
@@ -13,6 +13,7 @@
 
 #include block/coroutine.h
 #include qemu/timer.h
+#include block/aio.h
 
 typedef struct CoSleepCB {
 QEMUTimer *ts;
@@ -37,3 +38,16 @@ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns)
 timer_del(sleep_cb.ts);
 timer_free(sleep_cb.ts);
 }
+
+void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
+  int64_t ns)
+{
+CoSleepCB sleep_cb = {
+.co = qemu_coroutine_self(),
+};
+sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, sleep_cb);
+timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
+qemu_coroutine_yield();
+timer_del(sleep_cb.ts);
+timer_free(sleep_cb.ts);
+}
-- 
1.7.10.4




[Qemu-devel] [PATCH v5 5/8] sheepdog: try to reconnect to sheepdog after network error

2013-10-24 Thread MORITA Kazutaka
This introduces a failed request queue and links all the inflight
requests to the list after network error happens.  After QEMU
reconnects to the sheepdog server successfully, the sheepdog block
driver will retry all the requests in the failed queue.

Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
---
 block/sheepdog.c |   80 --
 1 file changed, 66 insertions(+), 14 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 5569e54..3e98291 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -299,6 +299,8 @@ struct SheepdogAIOCB {
 };
 
 typedef struct BDRVSheepdogState {
+BlockDriverState *bs;
+
 SheepdogInode inode;
 
 uint32_t min_dirty_data_idx;
@@ -318,8 +320,11 @@ typedef struct BDRVSheepdogState {
 Coroutine *co_recv;
 
 uint32_t aioreq_seq_num;
+
+/* Every aio request must be linked to either of these queues. */
 QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
 QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head;
+QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
 } BDRVSheepdogState;
 
 static const char * sd_strerror(int err)
@@ -606,6 +611,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState 
*s, AIOReq *aio_req,
enum AIOCBState aiocb_type);
 static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
 static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char 
*tag);
+static int get_sheep_fd(BDRVSheepdogState *s);
+static void co_write_request(void *opaque);
 
 static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
 {
@@ -647,6 +654,51 @@ static void coroutine_fn 
send_pending_req(BDRVSheepdogState *s, uint64_t oid)
 }
 }
 
+static coroutine_fn void reconnect_to_sdog(void *opaque)
+{
+BDRVSheepdogState *s = opaque;
+AIOReq *aio_req, *next;
+
+qemu_aio_set_fd_handler(s-fd, NULL, NULL, NULL);
+close(s-fd);
+s-fd = -1;
+
+/* Wait for outstanding write requests to be completed. */
+while (s-co_send != NULL) {
+co_write_request(opaque);
+}
+
+/* Try to reconnect the sheepdog server every one second. */
+while (s-fd  0) {
+s-fd = get_sheep_fd(s);
+if (s-fd  0) {
+DPRINTF(Wait for connection to be established\n);
+co_aio_sleep_ns(bdrv_get_aio_context(s-bs), QEMU_CLOCK_REALTIME,
+10ULL);
+}
+};
+
+/*
+ * Now we have to resend all the request in the inflight queue.  However,
+ * resend_aioreq() can yield and newly created requests can be added to the
+ * inflight queue before the coroutine is resumed.  To avoid mixing them, 
we
+ * have to move all the inflight requests to the failed queue before
+ * resend_aioreq() is called.
+ */
+QLIST_FOREACH_SAFE(aio_req, s-inflight_aio_head, aio_siblings, next) {
+QLIST_REMOVE(aio_req, aio_siblings);
+QLIST_INSERT_HEAD(s-failed_aio_head, aio_req, aio_siblings);
+}
+
+/* Resend all the failed aio requests. */
+while (!QLIST_EMPTY(s-failed_aio_head)) {
+aio_req = QLIST_FIRST(s-failed_aio_head);
+QLIST_REMOVE(aio_req, aio_siblings);
+QLIST_INSERT_HEAD(s-inflight_aio_head, aio_req, aio_siblings);
+resend_aioreq(s, aio_req);
+}
+}
+
 /*
  * Receive responses of the I/O requests.
  *
@@ -663,15 +715,11 @@ static void coroutine_fn aio_read_response(void *opaque)
 SheepdogAIOCB *acb;
 uint64_t idx;
 
-if (QLIST_EMPTY(s-inflight_aio_head)) {
-goto out;
-}
-
 /* read a header */
 ret = qemu_co_recv(fd, rsp, sizeof(rsp));
 if (ret != sizeof(rsp)) {
 error_report(failed to get the header, %s, strerror(errno));
-goto out;
+goto err;
 }
 
 /* find the right aio_req from the inflight aio list */
@@ -682,7 +730,7 @@ static void coroutine_fn aio_read_response(void *opaque)
 }
 if (!aio_req) {
 error_report(cannot find aio_req %x, rsp.id);
-goto out;
+goto err;
 }
 
 acb = aio_req-aiocb;
@@ -722,7 +770,7 @@ static void coroutine_fn aio_read_response(void *opaque)
 aio_req-iov_offset, rsp.data_length);
 if (ret != rsp.data_length) {
 error_report(failed to get the data, %s, strerror(errno));
-goto out;
+goto err;
 }
 break;
 case AIOCB_FLUSH_CACHE:
@@ -756,10 +804,9 @@ static void coroutine_fn aio_read_response(void *opaque)
 if (s-inode.vdi_id == oid_to_vid(aio_req-oid)) {
 ret = reload_inode(s, 0, );
 if (ret  0) {
-goto out;
+goto err;
 }
 }
-
 if (is_data_obj(aio_req-oid)) {
 aio_req-oid = vid_to_data_oid(s-inode.vdi_id,
data_oid_to_idx(aio_req-oid));
@@ -787,6 +834,10 @@ static void 

[Qemu-devel] [PATCH v5 1/8] sheepdog: check return values of qemu_co_recv/send correctly

2013-10-24 Thread MORITA Kazutaka
If qemu_co_recv/send doesn't return the specified length, it means
that an error happened.

Reviewed-by: Liu Yuan namei.u...@gmail.com
Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
---
 block/sheepdog.c |   16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 5f81c93..cb681de 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -489,13 +489,13 @@ static coroutine_fn int send_co_req(int sockfd, 
SheepdogReq *hdr, void *data,
 int ret;
 
 ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
-if (ret  sizeof(*hdr)) {
+if (ret != sizeof(*hdr)) {
 error_report(failed to send a req, %s, strerror(errno));
 return ret;
 }
 
 ret = qemu_co_send(sockfd, data, *wlen);
-if (ret  *wlen) {
+if (ret != *wlen) {
 error_report(failed to send a req, %s, strerror(errno));
 }
 
@@ -541,7 +541,7 @@ static coroutine_fn void do_co_req(void *opaque)
 qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co);
 
 ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
-if (ret  sizeof(*hdr)) {
+if (ret != sizeof(*hdr)) {
 error_report(failed to get a rsp, %s, strerror(errno));
 ret = -errno;
 goto out;
@@ -553,7 +553,7 @@ static coroutine_fn void do_co_req(void *opaque)
 
 if (*rlen) {
 ret = qemu_co_recv(sockfd, data, *rlen);
-if (ret  *rlen) {
+if (ret != *rlen) {
 error_report(failed to get the data, %s, strerror(errno));
 ret = -errno;
 goto out;
@@ -664,7 +664,7 @@ static void coroutine_fn aio_read_response(void *opaque)
 
 /* read a header */
 ret = qemu_co_recv(fd, rsp, sizeof(rsp));
-if (ret  0) {
+if (ret != sizeof(rsp)) {
 error_report(failed to get the header, %s, strerror(errno));
 goto out;
 }
@@ -715,7 +715,7 @@ static void coroutine_fn aio_read_response(void *opaque)
 case AIOCB_READ_UDATA:
 ret = qemu_co_recvv(fd, acb-qiov-iov, acb-qiov-niov,
 aio_req-iov_offset, rsp.data_length);
-if (ret  0) {
+if (ret != rsp.data_length) {
 error_report(failed to get the data, %s, strerror(errno));
 goto out;
 }
@@ -1059,7 +1059,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState 
*s, AIOReq *aio_req,
 
 /* send a header */
 ret = qemu_co_send(s-fd, hdr, sizeof(hdr));
-if (ret  0) {
+if (ret != sizeof(hdr)) {
 qemu_co_mutex_unlock(s-lock);
 error_report(failed to send a req, %s, strerror(errno));
 return -errno;
@@ -1067,7 +1067,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState 
*s, AIOReq *aio_req,
 
 if (wlen) {
 ret = qemu_co_sendv(s-fd, iov, niov, aio_req-iov_offset, wlen);
-if (ret  0) {
+if (ret != wlen) {
 qemu_co_mutex_unlock(s-lock);
 error_report(failed to send a data, %s, strerror(errno));
 return -errno;
-- 
1.7.10.4




[Qemu-devel] [PATCH v5 3/8] sheepdog: reload inode outside of resend_aioreq

2013-10-24 Thread MORITA Kazutaka
This prepares for using resend_aioreq() after reconnecting to the
sheepdog server.

Reviewed-by: Liu Yuan namei.u...@gmail.com
Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
---
 block/sheepdog.c |   33 +++--
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 59cad97..5569e54 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -222,6 +222,11 @@ static inline uint64_t data_oid_to_idx(uint64_t oid)
 return oid  (MAX_DATA_OBJS - 1);
 }
 
+static inline uint32_t oid_to_vid(uint64_t oid)
+{
+return (oid  ~VDI_BIT)  VDI_SPACE_SHIFT;
+}
+
 static inline uint64_t vid_to_vdi_oid(uint32_t vid)
 {
 return VDI_BIT | ((uint64_t)vid  VDI_SPACE_SHIFT);
@@ -600,7 +605,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState 
*s, AIOReq *aio_req,
struct iovec *iov, int niov, bool create,
enum AIOCBState aiocb_type);
 static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
-
+static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char 
*tag);
 
 static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
 {
@@ -748,6 +753,19 @@ static void coroutine_fn aio_read_response(void *opaque)
 case SD_RES_SUCCESS:
 break;
 case SD_RES_READONLY:
+if (s-inode.vdi_id == oid_to_vid(aio_req-oid)) {
+ret = reload_inode(s, 0, );
+if (ret  0) {
+goto out;
+}
+}
+
+if (is_data_obj(aio_req-oid)) {
+aio_req-oid = vid_to_data_oid(s-inode.vdi_id,
+   data_oid_to_idx(aio_req-oid));
+} else {
+aio_req-oid = vid_to_vdi_oid(s-inode.vdi_id);
+}
 ret = resend_aioreq(s, aio_req);
 if (ret == SD_RES_SUCCESS) {
 goto out;
@@ -1185,19 +1203,6 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState 
*s, AIOReq *aio_req)
 {
 SheepdogAIOCB *acb = aio_req-aiocb;
 bool create = false;
-int ret;
-
-ret = reload_inode(s, 0, );
-if (ret  0) {
-return ret;
-}
-
-if (is_data_obj(aio_req-oid)) {
-aio_req-oid = vid_to_data_oid(s-inode.vdi_id,
-   data_oid_to_idx(aio_req-oid));
-} else {
-aio_req-oid = vid_to_vdi_oid(s-inode.vdi_id);
-}
 
 /* check whether this request becomes a CoW one */
 if (acb-aiocb_type == AIOCB_WRITE_UDATA  is_data_obj(aio_req-oid)) {
-- 
1.7.10.4




[Qemu-devel] [PATCH v5 0/8] sheepdog: reconnect server after connection failure

2013-10-24 Thread MORITA Kazutaka
Currently, if a sheepdog server exits, all the connecting VMs need to
be restarted.  This series implements a feature to reconnect the
server, and enables us to do online sheepdog upgrade and avoid
restarting VMs when sheepdog servers crash unexpectedly.

v5:
 - Use AioContext timer for co_aio_sleep_ns().

v4:
 - Added comment to explain why we need a failed queue.
 - Fixed a return value of sd_acb_cancelable().

v3:
 - Check return values of qemu_co_recv/send more strictly.
 - Move inflight requests to the failed list after reconnection
   completes.  This is necessary to resend I/Os while connection is
   lost.
 - Check simultaneous create in resend_aioreq().

v2:
 - Dropped nonblocking connect patches.

MORITA Kazutaka (8):
  sheepdog: check return values of qemu_co_recv/send correctly
  sheepdog: handle vdi objects in resend_aio_req
  sheepdog: reload inode outside of resend_aioreq
  coroutine: add co_aio_sleep_ns() to allow sleep in block drivers
  sheepdog: try to reconnect to sheepdog after network error
  sheepdog: make add_aio_request and send_aioreq void functions
  sheepdog: cancel aio requests if possible
  sheepdog: check simultaneous create in resend_aioreq

 block/sheepdog.c  |  322 -
 include/block/coroutine.h |9 ++
 qemu-coroutine-sleep.c|   14 ++
 3 files changed, 226 insertions(+), 119 deletions(-)

-- 
1.7.10.4




[Qemu-devel] [PATCH v5 7/8] sheepdog: cancel aio requests if possible

2013-10-24 Thread MORITA Kazutaka
This patch tries to cancel aio requests in pending queue and failed
queue.  When the sheepdog driver cannot cancel the requests, it waits
for them to be completed.

Reviewed-by: Liu Yuan namei.u...@gmail.com
Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
---
 block/sheepdog.c |   70 +-
 1 file changed, 59 insertions(+), 11 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 5846ac4..cb3a22d 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -294,7 +294,8 @@ struct SheepdogAIOCB {
 Coroutine *coroutine;
 void (*aio_done_func)(SheepdogAIOCB *);
 
-bool canceled;
+bool cancelable;
+bool *finished;
 int nr_pending;
 };
 
@@ -413,6 +414,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, 
AIOReq *aio_req)
 {
 SheepdogAIOCB *acb = aio_req-aiocb;
 
+acb-cancelable = false;
 QLIST_REMOVE(aio_req, aio_siblings);
 g_free(aio_req);
 
@@ -421,23 +423,68 @@ static inline void free_aio_req(BDRVSheepdogState *s, 
AIOReq *aio_req)
 
 static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
 {
-if (!acb-canceled) {
-qemu_coroutine_enter(acb-coroutine, NULL);
+qemu_coroutine_enter(acb-coroutine, NULL);
+if (acb-finished) {
+*acb-finished = true;
 }
 qemu_aio_release(acb);
 }
 
+/*
+ * Check whether the specified acb can be canceled
+ *
+ * We can cancel aio when any request belonging to the acb is:
+ *  - Not processed by the sheepdog server.
+ *  - Not linked to the inflight queue.
+ */
+static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
+{
+BDRVSheepdogState *s = acb-common.bs-opaque;
+AIOReq *aioreq;
+
+if (!acb-cancelable) {
+return false;
+}
+
+QLIST_FOREACH(aioreq, s-inflight_aio_head, aio_siblings) {
+if (aioreq-aiocb == acb) {
+return false;
+}
+}
+
+return true;
+}
+
 static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
 {
 SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
+BDRVSheepdogState *s = acb-common.bs-opaque;
+AIOReq *aioreq, *next;
+bool finished = false;
+
+acb-finished = finished;
+while (!finished) {
+if (sd_acb_cancelable(acb)) {
+/* Remove outstanding requests from pending and failed queues.  */
+QLIST_FOREACH_SAFE(aioreq, s-pending_aio_head, aio_siblings,
+   next) {
+if (aioreq-aiocb == acb) {
+free_aio_req(s, aioreq);
+}
+}
+QLIST_FOREACH_SAFE(aioreq, s-failed_aio_head, aio_siblings,
+   next) {
+if (aioreq-aiocb == acb) {
+free_aio_req(s, aioreq);
+}
+}
 
-/*
- * Sheepdog cannot cancel the requests which are already sent to
- * the servers, so we just complete the request with -EIO here.
- */
-acb-ret = -EIO;
-qemu_coroutine_enter(acb-coroutine, NULL);
-acb-canceled = true;
+assert(acb-nr_pending == 0);
+sd_finish_aiocb(acb);
+return;
+}
+qemu_aio_wait();
+}
 }
 
 static const AIOCBInfo sd_aiocb_info = {
@@ -458,7 +505,8 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, 
QEMUIOVector *qiov,
 acb-nb_sectors = nb_sectors;
 
 acb-aio_done_func = NULL;
-acb-canceled = false;
+acb-cancelable = true;
+acb-finished = NULL;
 acb-coroutine = qemu_coroutine_self();
 acb-ret = 0;
 acb-nr_pending = 0;
-- 
1.7.10.4




[Qemu-devel] [PATCH v5 8/8] sheepdog: check simultaneous create in resend_aioreq

2013-10-24 Thread MORITA Kazutaka
After reconnection happens, all the inflight requests are moved to the
failed request list.  As a result, sd_co_rw_vector() can send another
create request before resend_aioreq() resends a create request from
the failed list.

This patch adds a helper function check_simultaneous_create() and
checks simultaneous create requests more strictly in resend_aioreq().

Reviewed-by: Liu Yuan namei.u...@gmail.com
Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
---
 block/sheepdog.c |   64 +++---
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index cb3a22d..c9ee273 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1283,6 +1283,29 @@ out:
 return ret;
 }
 
+/* Return true if the specified request is linked to the pending list. */
+static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
+{
+AIOReq *areq;
+QLIST_FOREACH(areq, s-inflight_aio_head, aio_siblings) {
+if (areq != aio_req  areq-oid == aio_req-oid) {
+/*
+ * Sheepdog cannot handle simultaneous create requests to the same
+ * object, so we cannot send the request until the previous request
+ * finishes.
+ */
+DPRINTF(simultaneous create to % PRIx64 \n, aio_req-oid);
+aio_req-flags = 0;
+aio_req-base_oid = 0;
+QLIST_REMOVE(aio_req, aio_siblings);
+QLIST_INSERT_HEAD(s-pending_aio_head, aio_req, aio_siblings);
+return true;
+}
+}
+
+return false;
+}
+
 static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
 {
 SheepdogAIOCB *acb = aio_req-aiocb;
@@ -1291,29 +1314,19 @@ static void coroutine_fn 
resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
 /* check whether this request becomes a CoW one */
 if (acb-aiocb_type == AIOCB_WRITE_UDATA  is_data_obj(aio_req-oid)) {
 int idx = data_oid_to_idx(aio_req-oid);
-AIOReq *areq;
 
-if (s-inode.data_vdi_id[idx] == 0) {
-create = true;
-goto out;
-}
 if (is_data_obj_writable(s-inode, idx)) {
 goto out;
 }
 
-/* link to the pending list if there is another CoW request to
- * the same object */
-QLIST_FOREACH(areq, s-inflight_aio_head, aio_siblings) {
-if (areq != aio_req  areq-oid == aio_req-oid) {
-DPRINTF(simultaneous CoW to % PRIx64 \n, aio_req-oid);
-QLIST_REMOVE(aio_req, aio_siblings);
-QLIST_INSERT_HEAD(s-pending_aio_head, aio_req, aio_siblings);
-return;
-}
+if (check_simultaneous_create(s, aio_req)) {
+return;
 }
 
-aio_req-base_oid = vid_to_data_oid(s-inode.data_vdi_id[idx], idx);
-aio_req-flags |= SD_FLAG_CMD_COW;
+if (s-inode.data_vdi_id[idx]) {
+aio_req-base_oid = vid_to_data_oid(s-inode.data_vdi_id[idx], 
idx);
+aio_req-flags |= SD_FLAG_CMD_COW;
+}
 create = true;
 }
 out:
@@ -1937,27 +1950,14 @@ static int coroutine_fn sd_co_rw_vector(void *p)
 }
 
 aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, 
done);
+QLIST_INSERT_HEAD(s-inflight_aio_head, aio_req, aio_siblings);
 
 if (create) {
-AIOReq *areq;
-QLIST_FOREACH(areq, s-inflight_aio_head, aio_siblings) {
-if (areq-oid == oid) {
-/*
- * Sheepdog cannot handle simultaneous create
- * requests to the same object.  So we cannot send
- * the request until the previous request
- * finishes.
- */
-aio_req-flags = 0;
-aio_req-base_oid = 0;
-QLIST_INSERT_HEAD(s-pending_aio_head, aio_req,
-  aio_siblings);
-goto done;
-}
+if (check_simultaneous_create(s, aio_req)) {
+goto done;
 }
 }
 
-QLIST_INSERT_HEAD(s-inflight_aio_head, aio_req, aio_siblings);
 add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov, create,
 acb-aiocb_type);
 done:
-- 
1.7.10.4




[Qemu-devel] [PATCH v5 2/8] sheepdog: handle vdi objects in resend_aio_req

2013-10-24 Thread MORITA Kazutaka
The current resend_aio_req() doesn't work when the request is against
vdi objects.  This fixes the problem.

Reviewed-by: Liu Yuan namei.u...@gmail.com
Signed-off-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
---
 block/sheepdog.c |   21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index cb681de..59cad97 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1192,11 +1192,15 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState 
*s, AIOReq *aio_req)
 return ret;
 }
 
-aio_req-oid = vid_to_data_oid(s-inode.vdi_id,
-   data_oid_to_idx(aio_req-oid));
+if (is_data_obj(aio_req-oid)) {
+aio_req-oid = vid_to_data_oid(s-inode.vdi_id,
+   data_oid_to_idx(aio_req-oid));
+} else {
+aio_req-oid = vid_to_vdi_oid(s-inode.vdi_id);
+}
 
 /* check whether this request becomes a CoW one */
-if (acb-aiocb_type == AIOCB_WRITE_UDATA) {
+if (acb-aiocb_type == AIOCB_WRITE_UDATA  is_data_obj(aio_req-oid)) {
 int idx = data_oid_to_idx(aio_req-oid);
 AIOReq *areq;
 
@@ -1224,8 +1228,15 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState 
*s, AIOReq *aio_req)
 create = true;
 }
 out:
-return add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov,
-   create, acb-aiocb_type);
+if (is_data_obj(aio_req-oid)) {
+return add_aio_request(s, aio_req, acb-qiov-iov, acb-qiov-niov,
+   create, acb-aiocb_type);
+} else {
+struct iovec iov;
+iov.iov_base = s-inode;
+iov.iov_len = sizeof(s-inode);
+return add_aio_request(s, aio_req, iov, 1, false, AIOCB_WRITE_UDATA);
+}
 }
 
 /* TODO Convert to fine grained options */
-- 
1.7.10.4




[Qemu-devel] [PATCHv2] block/vpc: check that the image has not been truncated

2013-10-24 Thread Peter Lieven
this adds a check that a dynamic VHD file has not been
accidently truncated (e.g. during transfer or upload).

Signed-off-by: Peter Lieven p...@kamp.de
---
v1-v2: used the errp argument as Eric suggested

 block/vpc.c |7 +++
 1 file changed, 7 insertions(+)

diff --git a/block/vpc.c b/block/vpc.c
index b5dca39..627d11c 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -260,6 +260,13 @@ static int vpc_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 }
 
+if (s-free_data_block_offset  bdrv_getlength(bs-file)) {
+error_setg(errp, block-vpc: free_data_block_offset points after 
+ the end of file. The image has been truncated.);
+ret = -EINVAL;
+goto fail;
+}
+
 s-last_bitmap_offset = (int64_t) -1;
 
 #ifdef CACHE
-- 
1.7.9.5




[Qemu-devel] [RESEND][PATCH] migration: drop MADVISE_DONT_NEED for incoming zero pages

2013-10-24 Thread Peter Lieven
The madvise for zeroed out pages was introduced when every transferred
zero page was memset to zero and thus allocated. Since commit
211ea740 we check for zeroness of a target page before we memset
it to zero. Additionally we memmap target memory so it is essentially
zero initialized (except for e.g. option roms and bios which are loaded
into target memory although they shouldn't).

It was reported recently that this madvise causes a performance degradation
in some situations. As the madvise should only be called rarely and if it's 
called
it is likely on a busy page (it was non-zero and changed to zero during 
migration)
drop it completely.

Reported-By: Zhang Haoyu haoyu.zh...@huawei.com
Acked-by: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 arch_init.c |8 
 1 file changed, 8 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 7545d96..e0acbc5 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -850,14 +850,6 @@ void ram_handle_compressed(void *host, uint8_t ch, 
uint64_t size)
 {
 if (ch != 0 || !is_zero_range(host, size)) {
 memset(host, ch, size);
-#ifndef _WIN32
-if (ch == 0  (!kvm_enabled() || kvm_has_sync_mmu())) {
-size = size  ~(getpagesize() - 1);
-if (size  0) {
-qemu_madvise(host, size, QEMU_MADV_DONTNEED);
-}
-}
-#endif
 }
 }
 
-- 
1.7.9.5




Re: [Qemu-devel] [PATCH 1/2] vga: allow non-global vmstate

2013-10-24 Thread Hervé Poussineau

Gerd Hoffmann a écrit :

Need a way to opt-out from vga.vram being global vmstate, for
secondary vga cards.  Add a bool parameter to vga_common_init
to support this.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/display/cirrus_vga.c | 4 ++--
 hw/display/qxl.c| 2 +-
 hw/display/vga-isa-mm.c | 2 +-
 hw/display/vga-isa.c| 2 +-
 hw/display/vga-pci.c| 2 +-
 hw/display/vga.c| 4 ++--
 hw/display/vga_int.h| 2 +-
 hw/display/vmware_vga.c | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

  

[...]


diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c
index b3a45c8..dee180f 100644
--- a/hw/display/vga-pci.c
+++ b/hw/display/vga-pci.c
@@ -147,7 +147,7 @@ static int pci_std_vga_initfn(PCIDevice *dev)
 VGACommonState *s = d-vga;
 
 /* vga + console init */

-vga_common_init(s, OBJECT(dev));
+vga_common_init(s, OBJECT(dev), true);
 vga_init(s, OBJECT(dev), pci_address_space(dev), pci_address_space_io(dev),
  true);
 
diff --git a/hw/display/vga.c b/hw/display/vga.c

index 7b91d9c..fea30e5 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -2257,7 +2257,7 @@ static const GraphicHwOps vga_ops = {
 .text_update = vga_update_text,
 };
 
-void vga_common_init(VGACommonState *s, Object *obj)

+void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
 {
 int i, j, v, b;
 
@@ -2294,7 +2294,7 @@ void vga_common_init(VGACommonState *s, Object *obj)
 
 s-is_vbe_vmstate = 1;

 memory_region_init_ram(s-vram, obj, vga.vram, s-vram_size);
-vmstate_register_ram_global(s-vram);
+vmstate_register_ram(s-vram, global_vmstate ? NULL : DEVICE(obj));
  


Is it possible to do it depending of the QEMU compatibility version, or 
of some property, so we don't have to keep this global vmstate forever?
With this, I think we can also expect to be able to specify -device 
VGA twice, and see the BIOS messages on one card or another.


Hervé



[Qemu-devel] [PATCHv6 00/17] block: logical block provisioning enhancements

2013-10-24 Thread Peter Lieven
this patch adds the ability for targets to stay sparse during
block migration (if the zero_blocks capability is set) and qemu-img convert
even if the target does not have has_zero_init = 1.

the series was especially developed for iSCSI, but it should also work
with other drivers with little or no adjustments. these adjustments
should be limited to providing block provisioning information through
get_block_info and/or honouring BDRV_REQ_MAY_UNMAP on writing zeroes.

v5-v6:
 - protected iscsi_co_write_zeroes by the existence of the
   SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED macro. This is ugly
   but necessary because the semantic of iscsi_writesame16_task
   silently changed between libiscsi 1.8.0 and 1.9.0. The above
   macro was the first added after the change. I already contacted
   Ronnie to introduce an API version macro which has to be bumped
   on each new function that will be added. Changes to the parameters
   should not happen at all of course.

v4-v5:
 - new patches 4-6 to move the block provisioning information
   to the BlockDriverInfo.
 - kept 2 wrappers to read the information from the BDI and
   renamed them to make more clear what they do:

 bdrv_has_discard_zeroes - bdrv_unallocated_blocks_are_zero
 bdrv_has_discard_write_zeroes - bdrv_can_write_zeroes_with_unmap

 - added additional information about the 2 flags in the
   BDI struct in block.h

v3-v4:
 - changed BlockLimits struct to typedef (Stefan, Eric)
 - renamed bdrv_zeroize to bdrv_make_zero (Stefan)
 - added comment about the -S flag of qemu-img convert in
   qemu-img.texi (Eric)
 - used struct assignment for bs-bl in raw_open (Stefan, Eric)
 - dropped 3 get_block_status fixes that are independent of
   this series and already partly merged.

v2-v3:
 - fix merge conflict in block/qcow2_cluster.c
 - changed return type of bdrv_has_discard_zeroes and
   bdrv_has_discard_write_zeroes to bool.
 - moved alignment and limits info to a BlockLimits struct (Paolo).
 - added magic constanst for default maximum in bdrv_co_do_write_zeroes
   and bdrv_co_discard (Eric).
 - bdrv_co_do_write_zeroes: allocating the bounce buffer only once (Eric),
   fixed bounce iov_len in the fall back path.
 - bdrv_zeroize: added inline docu (Eric) and do not mask flags passed
   to bdrv_write_zeroes (Eric).
 - qemu-img: changed the default hint for -S (min_sparse) in the usage
   help to 4k. not changing the default as it is unclear why this default
   was set. size suffixes are already supported (Eric).

v1-v2:
 - moved block max_discard and max_write_zeroes to BlockDriverState
 - added discard_alignment and write_zeroes_alignment to BlockDriverState
 - added bdrv_has_discard_zeroes() and bdrv_has_discard_write_zeroes()
 - added logic to bdrv_co_discard and bdrv_co_do_write_zeroes to honour
   limit and alignment info.
 - added support for -S 0 in qemu-img convert.

Peter Lieven (17):
  block: make BdrvRequestFlags public
  block: add flags to bdrv_*_write_zeroes
  block: introduce BDRV_REQ_MAY_UNMAP request flag
  block: add logical block provisioning info to BlockDriverInfo
  block: add wrappers for logical block provisioning information
  block/iscsi: add .bdrv_get_info
  block: add BlockLimits structure to BlockDriverState
  block: honour BlockLimits in bdrv_co_do_write_zeroes
  block: honour BlockLimits in bdrv_co_discard
  iscsi: simplify iscsi_co_discard
  iscsi: set limits in BlockDriverState
  iscsi: add bdrv_co_write_zeroes
  block: introduce bdrv_make_zero
  block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
  qemu-img: add support for fully allocated images
  qemu-img: conditionally zero out target on convert
  block/raw: copy BlockLimits on raw_open

Peter Lieven (17):
  block: make BdrvRequestFlags public
  block: add flags to bdrv_*_write_zeroes
  block: introduce BDRV_REQ_MAY_UNMAP request flag
  block: add logical block provisioning info to BlockDriverInfo
  block: add wrappers for logical block provisioning information
  block/iscsi: add .bdrv_get_info
  block: add BlockLimits structure to BlockDriverState
  block: honour BlockLimits in bdrv_co_do_write_zeroes
  block: honour BlockLimits in bdrv_co_discard
  iscsi: simplify iscsi_co_discard
  iscsi: set limits in BlockDriverState
  iscsi: add bdrv_co_write_zeroes
  block: introduce bdrv_make_zero
  block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
  qemu-img: add support for fully allocated images
  qemu-img: conditionally zero out target on convert
  block/raw: copy BlockLimits on raw_open

 block-migration.c |3 +-
 block.c   |  200 +
 block/backup.c|3 +-
 block/iscsi.c |  150 +-
 block/qcow2-cluster.c |2 +-
 block/qcow2.c |2 +-
 block/qed.c   |3 +-
 block/raw_bsd.c   |6 +-
 block/vmdk.c  |3 +-
 include/block/block.h |   35 +++-
 

[Qemu-devel] [PATCHv6 04/17] block: add logical block provisioning info to BlockDriverInfo

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 include/block/block.h |   16 
 1 file changed, 16 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index 1f30a56..9c76967 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -18,6 +18,22 @@ typedef struct BlockDriverInfo {
 /* offset at which the VM state can be saved (0 if not possible) */
 int64_t vm_state_offset;
 bool is_dirty;
+/*
+ * True if unallocated blocks read back as zeroes. This is equivalent
+ * to the the LBPRZ flag in the SCSI logical block provisioning page.
+ */
+bool unallocated_blocks_are_zero;
+/*
+ * True if the driver can optimize writing zeroes by unmapping
+ * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux
+ * with the difference that in qemu a discard is allowed to silently
+ * fail. Therefore we have to use bdrv_write_zeroes with the
+ * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping.
+ * After this call the driver has to guarantee that the contents read
+ * back as zero. It is additionally required that the block device is
+ * opened with BDRV_O_UNMAP flag for this to work.
+ */
+bool can_write_zeroes_with_unmap;
 } BlockDriverInfo;
 
 typedef struct BlockFragInfo {
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 01/17] block: make BdrvRequestFlags public

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c   |5 -
 include/block/block.h |5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block.c b/block.c
index fd05a80..eb11a07 100644
--- a/block.c
+++ b/block.c
@@ -51,11 +51,6 @@
 
 #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
 
-typedef enum {
-BDRV_REQ_COPY_ON_READ = 0x1,
-BDRV_REQ_ZERO_WRITE   = 0x2,
-} BdrvRequestFlags;
-
 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
diff --git a/include/block/block.h b/include/block/block.h
index 3560deb..ba2082c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -62,6 +62,11 @@ typedef struct BlockDevOps {
 void (*resize_cb)(void *opaque);
 } BlockDevOps;
 
+typedef enum {
+BDRV_REQ_COPY_ON_READ = 0x1,
+BDRV_REQ_ZERO_WRITE   = 0x2,
+} BdrvRequestFlags;
+
 #define BDRV_O_RDWR0x0002
 #define BDRV_O_SNAPSHOT0x0008 /* open the file read only and save writes 
in a snapshot */
 #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 03/17] block: introduce BDRV_REQ_MAY_UNMAP request flag

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block-migration.c |3 ++-
 block.c   |4 
 block/backup.c|2 +-
 include/block/block.h |7 +++
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 713a8e3..fc4ef93 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -780,7 +780,8 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 }
 
 if (flags  BLK_MIG_FLAG_ZERO_BLOCK) {
-ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0);
+ret = bdrv_write_zeroes(bs, addr, nr_sectors,
+BDRV_REQ_MAY_UNMAP);
 } else {
 buf = g_malloc(BLOCK_SIZE);
 qemu_get_buffer(f, buf, BLOCK_SIZE);
diff --git a/block.c b/block.c
index 3259429..0d97ce6 100644
--- a/block.c
+++ b/block.c
@@ -2810,6 +2810,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState 
*bs,
 {
 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
 
+if (!(bs-open_flags  BDRV_O_UNMAP)) {
+flags = ~BDRV_REQ_MAY_UNMAP;
+}
+
 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
  BDRV_REQ_ZERO_WRITE | flags);
 }
diff --git a/block/backup.c b/block/backup.c
index 830a179..0198514 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -139,7 +139,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
 ret = bdrv_co_write_zeroes(job-target,
start * BACKUP_SECTORS_PER_CLUSTER,
-   n, 0);
+   n, BDRV_REQ_MAY_UNMAP);
 } else {
 ret = bdrv_co_writev(job-target,
  start * BACKUP_SECTORS_PER_CLUSTER, n,
diff --git a/include/block/block.h b/include/block/block.h
index 8ba9f0c..1f30a56 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -65,6 +65,13 @@ typedef struct BlockDevOps {
 typedef enum {
 BDRV_REQ_COPY_ON_READ = 0x1,
 BDRV_REQ_ZERO_WRITE   = 0x2,
+/* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver
+ * is allowed to optimize a write zeroes request by unmapping (discarding)
+ * blocks if it is guaranteed that the result will read back as
+ * zeroes. The flag is only passed to the driver if the block device is
+ * opened with BDRV_O_UNMAP.
+ */
+BDRV_REQ_MAY_UNMAP= 0x4,
 } BdrvRequestFlags;
 
 #define BDRV_O_RDWR0x0002
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 08/17] block: honour BlockLimits in bdrv_co_do_write_zeroes

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c |   65 +++
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/block.c b/block.c
index 0601b02..0c0b0ac 100644
--- a/block.c
+++ b/block.c
@@ -2703,32 +2703,65 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState 
*bs,
 BDRV_REQ_COPY_ON_READ);
 }
 
+/* if no limit is specified in the BlockLimits use a default
+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_WRITE_ZEROES_DEFAULT 32768
+
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
 {
 BlockDriver *drv = bs-drv;
 QEMUIOVector qiov;
-struct iovec iov;
-int ret;
+struct iovec iov = {0};
+int ret = 0;
 
-/* TODO Emulate only part of misaligned requests instead of letting block
- * drivers return -ENOTSUP and emulate everything */
+int max_write_zeroes = bs-bl.max_write_zeroes ?
+   bs-bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
 
-/* First try the efficient write zeroes operation */
-if (drv-bdrv_co_write_zeroes) {
-ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
-if (ret != -ENOTSUP) {
-return ret;
+while (nb_sectors  0  !ret) {
+int num = nb_sectors;
+
+/* align request */
+if (bs-bl.write_zeroes_alignment 
+num = bs-bl.write_zeroes_alignment 
+sector_num % bs-bl.write_zeroes_alignment) {
+if (num  bs-bl.write_zeroes_alignment) {
+num = bs-bl.write_zeroes_alignment;
+}
+num -= sector_num % bs-bl.write_zeroes_alignment;
 }
-}
 
-/* Fall back to bounce buffer if write zeroes is unsupported */
-iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE;
-iov.iov_base = qemu_blockalign(bs, iov.iov_len);
-memset(iov.iov_base, 0, iov.iov_len);
-qemu_iovec_init_external(qiov, iov, 1);
+/* limit request size */
+if (num  max_write_zeroes) {
+num = max_write_zeroes;
+}
+
+ret = -ENOTSUP;
+/* First try the efficient write zeroes operation */
+if (drv-bdrv_co_write_zeroes) {
+ret = drv-bdrv_co_write_zeroes(bs, sector_num, num, flags);
+}
+
+if (ret == -ENOTSUP) {
+/* Fall back to bounce buffer if write zeroes is unsupported */
+iov.iov_len = num * BDRV_SECTOR_SIZE;
+if (iov.iov_base == NULL) {
+/* allocate bounce buffer only once and ensure that it
+ * is big enough for this and all future requests.
+ */
+size_t bufsize = num = nb_sectors ? num : max_write_zeroes;
+iov.iov_base = qemu_blockalign(bs, bufsize * BDRV_SECTOR_SIZE);
+memset(iov.iov_base, 0, bufsize * BDRV_SECTOR_SIZE);
+}
+qemu_iovec_init_external(qiov, iov, 1);
 
-ret = drv-bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ret = drv-bdrv_co_writev(bs, sector_num, num, qiov);
+}
+
+sector_num += num;
+nb_sectors -= num;
+}
 
 qemu_vfree(iov.iov_base);
 return ret;
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 02/17] block: add flags to bdrv_*_write_zeroes

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block-migration.c |2 +-
 block.c   |   20 +++-
 block/backup.c|3 ++-
 block/qcow2-cluster.c |2 +-
 block/qcow2.c |2 +-
 block/qed.c   |3 ++-
 block/raw_bsd.c   |5 +++--
 block/vmdk.c  |3 ++-
 include/block/block.h |4 ++--
 include/block/block_int.h |2 +-
 qemu-io-cmds.c|2 +-
 11 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index daf9ec1..713a8e3 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -780,7 +780,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 }
 
 if (flags  BLK_MIG_FLAG_ZERO_BLOCK) {
-ret = bdrv_write_zeroes(bs, addr, nr_sectors);
+ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0);
 } else {
 buf = g_malloc(BLOCK_SIZE);
 qemu_get_buffer(f, buf, BLOCK_SIZE);
diff --git a/block.c b/block.c
index eb11a07..3259429 100644
--- a/block.c
+++ b/block.c
@@ -79,7 +79,7 @@ static BlockDriverAIOCB 
*bdrv_co_aio_rw_vector(BlockDriverState *bs,
bool is_write);
 static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors);
+int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
 
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
 QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -2384,10 +2384,11 @@ int bdrv_writev(BlockDriverState *bs, int64_t 
sector_num, QEMUIOVector *qiov)
 return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
 }
 
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+  int nb_sectors, BdrvRequestFlags flags)
 {
 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
-  BDRV_REQ_ZERO_WRITE);
+  BDRV_REQ_ZERO_WRITE | flags);
 }
 
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
@@ -2569,7 +2570,7 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BlockDriverState *bs,
 if (drv-bdrv_co_write_zeroes 
 buffer_is_zero(bounce_buffer, iov.iov_len)) {
 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
-  cluster_nb_sectors);
+  cluster_nb_sectors, 0);
 } else {
 /* This does not change the data on the disk, it is not necessary
  * to flush even in cache=writethrough mode.
@@ -2703,7 +2704,7 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState 
*bs,
 }
 
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors)
+int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
 {
 BlockDriver *drv = bs-drv;
 QEMUIOVector qiov;
@@ -2715,7 +2716,7 @@ static int coroutine_fn 
bdrv_co_do_write_zeroes(BlockDriverState *bs,
 
 /* First try the efficient write zeroes operation */
 if (drv-bdrv_co_write_zeroes) {
-ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
 if (ret != -ENOTSUP) {
 return ret;
 }
@@ -2770,7 +2771,7 @@ static int coroutine_fn 
bdrv_co_do_writev(BlockDriverState *bs,
 if (ret  0) {
 /* Do nothing, write notifier decided to fail this request */
 } else if (flags  BDRV_REQ_ZERO_WRITE) {
-ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
 } else {
 ret = drv-bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
 }
@@ -2804,12 +2805,13 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, 
int64_t sector_num,
 }
 
 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
-  int64_t sector_num, int nb_sectors)
+  int64_t sector_num, int nb_sectors,
+  BdrvRequestFlags flags)
 {
 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
 
 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE);
+ BDRV_REQ_ZERO_WRITE | flags);
 }
 
 /**
diff --git a/block/backup.c b/block/backup.c
index cad14c9..830a179 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -138,7 +138,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 
 if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
 ret = bdrv_co_write_zeroes(job-target,
-   start * BACKUP_SECTORS_PER_CLUSTER, n);
+   

[Qemu-devel] [PATCHv6 06/17] block/iscsi: add .bdrv_get_info

2013-10-24 Thread Peter Lieven
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/iscsi.c |9 +
 1 file changed, 9 insertions(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index a2a961e..1dbbcad 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1506,6 +1506,14 @@ out:
 return ret;
 }
 
+static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+IscsiLun *iscsilun = bs-opaque;
+bdi-unallocated_blocks_are_zero = !!iscsilun-lbprz;
+bdi-can_write_zeroes_with_unmap = iscsilun-lbprz  iscsilun-lbp.lbpws;
+return 0;
+}
+
 static QEMUOptionParameter iscsi_create_options[] = {
 {
 .name = BLOCK_OPT_SIZE,
@@ -1527,6 +1535,7 @@ static BlockDriver bdrv_iscsi = {
 .create_options  = iscsi_create_options,
 
 .bdrv_getlength  = iscsi_getlength,
+.bdrv_get_info   = iscsi_get_info,
 .bdrv_truncate   = iscsi_truncate,
 
 #if defined(LIBISCSI_FEATURE_IOVECTOR)
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 11/17] iscsi: set limits in BlockDriverState

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/iscsi.c |   14 ++
 1 file changed, 14 insertions(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index 47b9cc9..c0465aa 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1367,6 +1367,20 @@ static int iscsi_open(BlockDriverState *bs, QDict 
*options, int flags,
sizeof(struct scsi_inquiry_block_limits));
 scsi_free_scsi_task(task);
 task = NULL;
+
+if (iscsilun-bl.max_unmap  0x) {
+bs-bl.max_discard = sector_lun2qemu(iscsilun-bl.max_unmap,
+ iscsilun);
+}
+bs-bl.discard_alignment = sector_lun2qemu(iscsilun-bl.opt_unmap_gran,
+   iscsilun);
+
+if (iscsilun-bl.max_ws_len  0x) {
+bs-bl.max_write_zeroes = sector_lun2qemu(iscsilun-bl.max_ws_len,
+  iscsilun);
+}
+bs-bl.write_zeroes_alignment = 
sector_lun2qemu(iscsilun-bl.opt_unmap_gran,
+iscsilun);
 }
 
 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 05/17] block: add wrappers for logical block provisioning information

2013-10-24 Thread Peter Lieven
This adds 2 wrappers to read the unallocated_blocks_are_zero and
can_write_zeroes_with_unmap info from the BDI. The wrappers are
required to check for the existence of a backing_hd and
if the devices are opened with the correct flags.

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c   |   30 ++
 include/block/block.h |2 ++
 2 files changed, 32 insertions(+)

diff --git a/block.c b/block.c
index 0d97ce6..0601b02 100644
--- a/block.c
+++ b/block.c
@@ -3094,6 +3094,36 @@ int bdrv_has_zero_init(BlockDriverState *bs)
 return 0;
 }
 
+bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
+{
+BlockDriverInfo bdi;
+
+if (bs-backing_hd) {
+return false;
+}
+
+if (bdrv_get_info(bs, bdi) == 0) {
+return bdi.unallocated_blocks_are_zero;
+}
+
+return false;
+}
+
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
+{
+BlockDriverInfo bdi;
+
+if (bs-backing_hd || !(bs-open_flags  BDRV_O_UNMAP)) {
+return false;
+}
+
+if (bdrv_get_info(bs, bdi) == 0) {
+return bdi.can_write_zeroes_with_unmap;
+}
+
+return false;
+}
+
 typedef struct BdrvCoGetBlockStatusData {
 BlockDriverState *bs;
 BlockDriverState *base;
diff --git a/include/block/block.h b/include/block/block.h
index 9c76967..803c5ca 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -344,6 +344,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, 
int nb_sectors);
 int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
 int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
+bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
 int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
   int nb_sectors, int *pnum);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 09/17] block: honour BlockLimits in bdrv_co_discard

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c |   37 -
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 0c0b0ac..b28dd42 100644
--- a/block.c
+++ b/block.c
@@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void 
*opaque)
 rwco-ret = bdrv_co_discard(rwco-bs, rwco-sector_num, rwco-nb_sectors);
 }
 
+/* if no limit is specified in the BlockLimits use a default
+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_DISCARD_DEFAULT 32768
+
 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
  int nb_sectors)
 {
@@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, 
int64_t sector_num,
 }
 
 if (bs-drv-bdrv_co_discard) {
-return bs-drv-bdrv_co_discard(bs, sector_num, nb_sectors);
+int max_discard = bs-bl.max_discard ?
+  bs-bl.max_discard : MAX_DISCARD_DEFAULT;
+
+while (nb_sectors  0) {
+int ret;
+int num = nb_sectors;
+
+/* align request */
+if (bs-bl.discard_alignment 
+num = bs-bl.discard_alignment 
+sector_num % bs-bl.discard_alignment) {
+if (num  bs-bl.discard_alignment) {
+num = bs-bl.discard_alignment;
+}
+num -= sector_num % bs-bl.discard_alignment;
+}
+
+/* limit request size */
+if (num  max_discard) {
+num = max_discard;
+}
+
+ret = bs-drv-bdrv_co_discard(bs, sector_num, num);
+if (ret) {
+return ret;
+}
+
+sector_num += num;
+nb_sectors -= num;
+}
+return 0;
 } else if (bs-drv-bdrv_aio_discard) {
 BlockDriverAIOCB *acb;
 CoroutineIOCompletion co = {
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 13/17] block: introduce bdrv_make_zero

2013-10-24 Thread Peter Lieven
this patch adds a call to completely zero out a block device.
the operation is sped up by checking the block status and
only writing zeroes to the device if they currently do not
return zeroes. optionally the zero writing can be sped up
by setting the flag BDRV_REQ_MAY_UNMAP to emulate the zero
write by unmapping if the driver supports it.

Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c   |   37 +
 include/block/block.h |1 +
 2 files changed, 38 insertions(+)

diff --git a/block.c b/block.c
index b28dd42..21a992a 100644
--- a/block.c
+++ b/block.c
@@ -2391,6 +2391,43 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t 
sector_num,
   BDRV_REQ_ZERO_WRITE | flags);
 }
 
+/*
+ * Completely zero out a block device with the help of bdrv_write_zeroes.
+ * The operation is sped up by checking the block status and only writing
+ * zeroes to the device if they currently do not return zeroes. Optional
+ * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
+ *
+ * Returns  0 on error, 0 on success. For error codes see bdrv_write().
+ */
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
+{
+int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+int64_t ret, nb_sectors, sector_num = 0;
+int n;
+
+for (;;) {
+nb_sectors = target_size - sector_num;
+if (nb_sectors = 0) {
+return 0;
+}
+if (nb_sectors  INT_MAX) {
+nb_sectors = INT_MAX;
+}
+ret = bdrv_get_block_status(bs, sector_num, nb_sectors, n);
+if (ret  BDRV_BLOCK_ZERO) {
+sector_num += n;
+continue;
+}
+ret = bdrv_write_zeroes(bs, sector_num, n, flags);
+if (ret  0) {
+error_report(error writing zeroes at sector % PRId64 : %s,
+ sector_num, strerror(-ret));
+return ret;
+}
+sector_num += n;
+}
+}
+
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count1)
 {
diff --git a/include/block/block.h b/include/block/block.h
index 803c5ca..4d9e67c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -216,6 +216,7 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
 int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags);
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
 int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 07/17] block: add BlockLimits structure to BlockDriverState

2013-10-24 Thread Peter Lieven
this patch adds BlockLimits which introduces discard and write_zeroes
limits and alignment information to the BlockDriverState.

Signed-off-by: Peter Lieven p...@kamp.de
---
 include/block/block_int.h |   17 +
 1 file changed, 17 insertions(+)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9bbaa29..33be247 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -227,6 +227,20 @@ struct BlockDriver {
 QLIST_ENTRY(BlockDriver) list;
 };
 
+typedef struct BlockLimits {
+/* maximum number of sectors that can be discarded at once */
+int max_discard;
+
+/* optimal alignment for discard requests in sectors */
+int64_t discard_alignment;
+
+/* maximum number of sectors that can zeroized at once */
+int max_write_zeroes;
+
+/* optimal alignment for write zeroes requests in sectors */
+int64_t write_zeroes_alignment;
+} BlockLimits;
+
 /*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
@@ -280,6 +294,9 @@ struct BlockDriverState {
 uint64_t total_time_ns[BDRV_MAX_IOTYPE];
 uint64_t wr_highest_sector;
 
+/* I/O Limits */
+BlockLimits bl;
+
 /* Whether the disk can expand beyond total_sectors */
 int growable;
 
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 10/17] iscsi: simplify iscsi_co_discard

2013-10-24 Thread Peter Lieven
now that bdrv_co_discard can handle limits we do not need
the request split logic here anymore.

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/iscsi.c |   67 +
 1 file changed, 25 insertions(+), 42 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 1dbbcad..47b9cc9 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -87,7 +87,6 @@ typedef struct IscsiAIOCB {
 #define NOP_INTERVAL 5000
 #define MAX_NOP_FAILURES 3
 #define ISCSI_CMD_RETRIES 5
-#define ISCSI_MAX_UNMAP 131072
 
 static void
 iscsi_bh_cb(void *p)
@@ -912,8 +911,6 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t 
sector_num,
 IscsiLun *iscsilun = bs-opaque;
 struct IscsiTask iTask;
 struct unmap_list list;
-uint32_t nb_blocks;
-uint32_t max_unmap;
 
 if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 return -EINVAL;
@@ -925,52 +922,38 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, 
int64_t sector_num,
 }
 
 list.lba = sector_qemu2lun(sector_num, iscsilun);
-nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+list.num = sector_qemu2lun(nb_sectors, iscsilun);
 
-max_unmap = iscsilun-bl.max_unmap;
-if (max_unmap == 0x) {
-max_unmap = ISCSI_MAX_UNMAP;
-}
-
-while (nb_blocks  0) {
-iscsi_co_init_iscsitask(iscsilun, iTask);
-list.num = nb_blocks;
-if (list.num  max_unmap) {
-list.num = max_unmap;
-}
+iscsi_co_init_iscsitask(iscsilun, iTask);
 retry:
-if (iscsi_unmap_task(iscsilun-iscsi, iscsilun-lun, 0, 0, list, 1,
- iscsi_co_generic_cb, iTask) == NULL) {
-return -EIO;
-}
-
-while (!iTask.complete) {
-iscsi_set_events(iscsilun);
-qemu_coroutine_yield();
-}
+if (iscsi_unmap_task(iscsilun-iscsi, iscsilun-lun, 0, 0, list, 1,
+ iscsi_co_generic_cb, iTask) == NULL) {
+return -EIO;
+}
 
-if (iTask.task != NULL) {
-scsi_free_scsi_task(iTask.task);
-iTask.task = NULL;
-}
+while (!iTask.complete) {
+iscsi_set_events(iscsilun);
+qemu_coroutine_yield();
+}
 
-if (iTask.do_retry) {
-goto retry;
-}
+if (iTask.task != NULL) {
+scsi_free_scsi_task(iTask.task);
+iTask.task = NULL;
+}
 
-if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
-/* the target might fail with a check condition if it
-   is not happy with the alignment of the UNMAP request
-   we silently fail in this case */
-return 0;
-}
+if (iTask.do_retry) {
+goto retry;
+}
 
-if (iTask.status != SCSI_STATUS_GOOD) {
-return -EIO;
-}
+if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
+/* the target might fail with a check condition if it
+   is not happy with the alignment of the UNMAP request
+   we silently fail in this case */
+return 0;
+}
 
-list.lba += list.num;
-nb_blocks -= list.num;
+if (iTask.status != SCSI_STATUS_GOOD) {
+return -EIO;
 }
 
 return 0;
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 12/17] iscsi: add bdrv_co_write_zeroes

2013-10-24 Thread Peter Lieven
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/iscsi.c |   64 +
 1 file changed, 64 insertions(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index c0465aa..014475d 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -56,6 +56,7 @@ typedef struct IscsiLun {
 uint8_t lbprz;
 struct scsi_inquiry_logical_block_provisioning lbp;
 struct scsi_inquiry_block_limits bl;
+unsigned char *zeroblock;
 } IscsiLun;
 
 typedef struct IscsiTask {
@@ -959,6 +960,65 @@ retry:
 return 0;
 }
 
+#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
+
+static int
+coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+   int nb_sectors, BdrvRequestFlags flags)
+{
+IscsiLun *iscsilun = bs-opaque;
+struct IscsiTask iTask;
+uint64_t lba;
+uint32_t nb_blocks;
+
+if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+return -EINVAL;
+}
+
+if (!iscsilun-lbp.lbpws) {
+/* WRITE SAME is not supported by the target */
+return -ENOTSUP;
+}
+
+lba = sector_qemu2lun(sector_num, iscsilun);
+nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+
+if (iscsilun-zeroblock == NULL) {
+iscsilun-zeroblock = g_malloc0(iscsilun-block_size);
+}
+
+iscsi_co_init_iscsitask(iscsilun, iTask);
+retry:
+if (iscsi_writesame16_task(iscsilun-iscsi, iscsilun-lun, lba,
+   iscsilun-zeroblock, iscsilun-block_size,
+   nb_blocks, 0, !!(flags  BDRV_REQ_MAY_UNMAP),
+   0, 0, iscsi_co_generic_cb, iTask) == NULL) {
+return -EIO;
+}
+
+while (!iTask.complete) {
+iscsi_set_events(iscsilun);
+qemu_coroutine_yield();
+}
+
+if (iTask.task != NULL) {
+scsi_free_scsi_task(iTask.task);
+iTask.task = NULL;
+}
+
+if (iTask.do_retry) {
+goto retry;
+}
+
+if (iTask.status != SCSI_STATUS_GOOD) {
+return -EIO;
+}
+
+return 0;
+}
+
+#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */
+
 static int parse_chap(struct iscsi_context *iscsi, const char *target)
 {
 QemuOptsList *list;
@@ -1421,6 +1481,7 @@ static void iscsi_close(BlockDriverState *bs)
 }
 qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
 iscsi_destroy_context(iscsi);
+g_free(iscsilun-zeroblock);
 memset(iscsilun, 0, sizeof(IscsiLun));
 }
 
@@ -1539,6 +1600,9 @@ static BlockDriver bdrv_iscsi = {
 .bdrv_co_get_block_status = iscsi_co_get_block_status,
 #endif
 .bdrv_co_discard  = iscsi_co_discard,
+#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
+.bdrv_co_write_zeroes = iscsi_co_write_zeroes,
+#endif
 
 .bdrv_aio_readv  = iscsi_aio_readv,
 .bdrv_aio_writev = iscsi_aio_writev,
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 14/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks

2013-10-24 Thread Peter Lieven
this patch does 2 things:
a) only do additional call outs if BDRV_BLOCK_ZERO is not already set.
b) use the newly introduced bdrv_has_discard_zeroes() to return the
   zero state of an unallocated block. the used callout to
   bdrv_has_zero_init() is only valid right after bdrv_create.

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 21a992a..69a2d2b 100644
--- a/block.c
+++ b/block.c
@@ -3263,8 +3263,8 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
  *pnum, pnum);
 }
 
-if (!(ret  BDRV_BLOCK_DATA)) {
-if (bdrv_has_zero_init(bs)) {
+if (!(ret  BDRV_BLOCK_DATA)  !(ret  BDRV_BLOCK_ZERO)) {
+if (bdrv_unallocated_blocks_are_zero(bs)) {
 ret |= BDRV_BLOCK_ZERO;
 } else if (bs-backing_hd) {
 BlockDriverState *bs2 = bs-backing_hd;
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 16/17] qemu-img: conditionally zero out target on convert

2013-10-24 Thread Peter Lieven
If the target has_zero_init = 0, but supports efficiently
writing zeroes by unmapping we call bdrv_make_zero to
avoid fully allocating the target. This currently
is designed especially for iscsi.

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 qemu-img.c |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/qemu-img.c b/qemu-img.c
index c6eff15..fe0bdb1 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1353,7 +1353,7 @@ static int img_convert(int argc, char **argv)
 }
 }
 
-flags = BDRV_O_RDWR;
+flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
 ret = bdrv_parse_cache_flags(cache, flags);
 if (ret  0) {
 error_report(Invalid cache option: %s, cache);
@@ -1469,6 +1469,14 @@ static int img_convert(int argc, char **argv)
 } else {
 int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
 
+if (!has_zero_init  bdrv_can_write_zeroes_with_unmap(out_bs)) {
+ret = bdrv_make_zero(out_bs, BDRV_REQ_MAY_UNMAP);
+if (ret  0) {
+goto out;
+}
+has_zero_init = 1;
+}
+
 sector_num = 0; // total number of sectors converted so far
 nb_sectors = total_sectors - sector_num;
 if (nb_sectors != 0) {
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 17/17] block/raw: copy BlockLimits on raw_open

2013-10-24 Thread Peter Lieven
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/raw_bsd.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index b0dd23f..49ac18c 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 Error **errp)
 {
 bs-sg = bs-file-sg;
+bs-bl = bs-file-bl;
 return 0;
 }
 
-- 
1.7.9.5




[Qemu-devel] [PATCHv6 15/17] qemu-img: add support for fully allocated images

2013-10-24 Thread Peter Lieven
Signed-off-by: Peter Lieven p...@kamp.de
---
 qemu-img.c|8 +---
 qemu-img.texi |5 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 926f0a0..c6eff15 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -100,8 +100,10 @@ static void help(void)
  '-h' with or without a command shows this help and lists the 
supported formats\n
  '-p' show progress of command (only certain commands)\n
  '-q' use Quiet mode - do not print any output (except errors)\n
- '-S' indicates the consecutive number of bytes that must contain 
only zeros\n
-  for qemu-img to create a sparse image during conversion\n
+ '-S' indicates the consecutive number of bytes (defaults to 4k) 
that must\n
+  contain only zeros for qemu-img to create a sparse image 
during\n
+  conversion. if the number of bytes is 0 sparse files are 
disabled and\n
+  images will always be fully allocated\n
  '--output' takes the format in which the output must be done 
(human or json)\n
  '-n' skips the target volume creation (useful if the volume is 
created\n
   prior to running qemu-img)\n
@@ -1465,7 +1467,7 @@ static int img_convert(int argc, char **argv)
 /* signal EOF to align */
 bdrv_write_compressed(out_bs, 0, NULL, 0);
 } else {
-int has_zero_init = bdrv_has_zero_init(out_bs);
+int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
 
 sector_num = 0; // total number of sectors converted so far
 nb_sectors = total_sectors - sector_num;
diff --git a/qemu-img.texi b/qemu-img.texi
index 768054e..51a1ee5 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -193,6 +193,11 @@ Image conversion is also useful to get smaller image when 
using a
 growable format such as @code{qcow} or @code{cow}: the empty sectors
 are detected and suppressed from the destination image.
 
+@var{sparse_size} indicates the consecutive number of bytes (defaults to 4k)
+that must contain only zeros for qemu-img to create a sparse image during
+conversion. If the number of bytes is 0 sparse files are disabled and
+images will always be fully allocated.
+
 You can use the @var{backing_file} option to force the output image to be
 created as a copy on write image of the specified base image; the
 @var{backing_file} should have the same content as the input's base image,
-- 
1.7.9.5




Re: [Qemu-devel] [PATCH 02/10] sysbus: Set cannot_instantiate_with_device_add_yet

2013-10-24 Thread Markus Armbruster
Peter Maydell peter.mayd...@linaro.org writes:

 On 17 October 2013 14:54,  arm...@redhat.com wrote:
 From: Markus Armbruster arm...@redhat.com

 device_add plugs devices into suitable bus.  For real buses, that
 actually connects the device.  For sysbus, the connections need to be
 made separately, and device_add can't do that.  The device would be
 left unconncected, and could not possibly work.

 unconnected

Will fix.

 Many, but not all sysbus devices alreasy set

 already

Will fix.

 cannot_instantiate_with_device_add_yet in their class init function.

 Set it in their abstract base's class init function
 sysbus_device_class_init(), and remove the now redundant assignments
 from device class init functions.

 So I think this change is probably OK (but see my comments on
 patch 1 about what our definition of the flag is supposed to be).
 But I'd like to see a list of the devices which this patch makes no-user
 which previously weren't. Then I could eyeball the list and check
 whether there's anything in it which shouldn't be.

I'll include that list in v2.

Thanks!



Re: [Qemu-devel] [PATCH 09/10] isa: Clean up use of cannot_instantiate_with_device_add_yet

2013-10-24 Thread Markus Armbruster
Peter Maydell peter.mayd...@linaro.org writes:

 On 17 October 2013 14:55,  arm...@redhat.com wrote:
 From: Markus Armbruster arm...@redhat.com

 Drop it when there's no obvious reason why device_add could not work.
 Else keep and document why.

 * isa-fdc, port92, i8042, m48t59_isa, mc146818rtc, isa-pit, kvm-pit:
   drop (from the last two by dropping it from their abstract base
   pit-common)

 port92 needs its a20_out qemu_irq line wiring up, doesn't it?

 the pit devices have an output IRQ line that needs wiring up.

Good points; I'll have a second look at these two.  Thanks!



Re: [Qemu-devel] [PATCHv2] block/vpc: check that the image has not been truncated

2013-10-24 Thread Eric Blake
On 10/24/2013 08:16 AM, Peter Lieven wrote:
 this adds a check that a dynamic VHD file has not been
 accidently truncated (e.g. during transfer or upload).
 
 Signed-off-by: Peter Lieven p...@kamp.de
 ---
 v1-v2: used the errp argument as Eric suggested
 
  block/vpc.c |7 +++
  1 file changed, 7 insertions(+)

Reviewed-by: Eric Blake ebl...@redhat.com

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCHv6 11/17] iscsi: set limits in BlockDriverState

2013-10-24 Thread Paolo Bonzini
Il 24/10/2013 08:46, Peter Lieven ha scritto:
 Reviewed-by: Eric Blake ebl...@redhat.com
 Signed-off-by: Peter Lieven p...@kamp.de
 ---
  block/iscsi.c |   14 ++
  1 file changed, 14 insertions(+)
 
 diff --git a/block/iscsi.c b/block/iscsi.c
 index 47b9cc9..c0465aa 100644
 --- a/block/iscsi.c
 +++ b/block/iscsi.c
 @@ -1367,6 +1367,20 @@ static int iscsi_open(BlockDriverState *bs, QDict 
 *options, int flags,
 sizeof(struct scsi_inquiry_block_limits));
  scsi_free_scsi_task(task);
  task = NULL;
 +
 +if (iscsilun-bl.max_unmap  0x) {
 +bs-bl.max_discard = sector_lun2qemu(iscsilun-bl.max_unmap,
 + iscsilun);
 +}
 +bs-bl.discard_alignment = 
 sector_lun2qemu(iscsilun-bl.opt_unmap_gran,
 +   iscsilun);
 +
 +if (iscsilun-bl.max_ws_len  0x) {
 +bs-bl.max_write_zeroes = 
 sector_lun2qemu(iscsilun-bl.max_ws_len,
 +  iscsilun);
 +}
 +bs-bl.write_zeroes_alignment = 
 sector_lun2qemu(iscsilun-bl.opt_unmap_gran,
 +iscsilun);
  }
  
  #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
 

This patch and the previous one needs to be swapped, but maintainers can
do that.

Paolo



Re: [Qemu-devel] [RESEND][PATCH] migration: drop MADVISE_DONT_NEED for incoming zero pages

2013-10-24 Thread Juan Quintela
Peter Lieven p...@kamp.de wrote:
 The madvise for zeroed out pages was introduced when every transferred
 zero page was memset to zero and thus allocated. Since commit
 211ea740 we check for zeroness of a target page before we memset
 it to zero. Additionally we memmap target memory so it is essentially
 zero initialized (except for e.g. option roms and bios which are loaded
 into target memory although they shouldn't).

 It was reported recently that this madvise causes a performance degradation
 in some situations. As the madvise should only be called rarely and if it's 
 called
 it is likely on a busy page (it was non-zero and changed to zero during 
 migration)
 drop it completely.

Reviewed-by: Juan Quintela quint...@redhat.com

I take it.  I am on KVM Forum/LinuxCon this week.  Will send when back
at home.

Thanks.


 Reported-By: Zhang Haoyu haoyu.zh...@huawei.com
 Acked-by: Paolo Bonzini pbonz...@redhat.com
 Signed-off-by: Peter Lieven p...@kamp.de
 ---
  arch_init.c |8 
  1 file changed, 8 deletions(-)

 diff --git a/arch_init.c b/arch_init.c
 index 7545d96..e0acbc5 100644
 --- a/arch_init.c
 +++ b/arch_init.c
 @@ -850,14 +850,6 @@ void ram_handle_compressed(void *host, uint8_t ch, 
 uint64_t size)
  {
  if (ch != 0 || !is_zero_range(host, size)) {
  memset(host, ch, size);
 -#ifndef _WIN32
 -if (ch == 0  (!kvm_enabled() || kvm_has_sync_mmu())) {
 -size = size  ~(getpagesize() - 1);
 -if (size  0) {
 -qemu_madvise(host, size, QEMU_MADV_DONTNEED);
 -}
 -}
 -#endif
  }
  }



Re: [Qemu-devel] [PATCHv6 14/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks

2013-10-24 Thread Paolo Bonzini
Il 24/10/2013 08:46, Peter Lieven ha scritto:
 this patch does 2 things:
 a) only do additional call outs if BDRV_BLOCK_ZERO is not already set.
 b) use the newly introduced bdrv_has_discard_zeroes()

... whose name became bdrv_unallocated_blocks_are_zero :)  No big deal.

Paolo

 to return the
zero state of an unallocated block. the used callout to
bdrv_has_zero_init() is only valid right after bdrv_create.
 
 Reviewed-by: Eric Blake ebl...@redhat.com
 Signed-off-by: Peter Lieven p...@kamp.de
 ---
  block.c |4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/block.c b/block.c
 index 21a992a..69a2d2b 100644
 --- a/block.c
 +++ b/block.c
 @@ -3263,8 +3263,8 @@ static int64_t coroutine_fn 
 bdrv_co_get_block_status(BlockDriverState *bs,
   *pnum, pnum);
  }
  
 -if (!(ret  BDRV_BLOCK_DATA)) {
 -if (bdrv_has_zero_init(bs)) {
 +if (!(ret  BDRV_BLOCK_DATA)  !(ret  BDRV_BLOCK_ZERO)) {
 +if (bdrv_unallocated_blocks_are_zero(bs)) {
  ret |= BDRV_BLOCK_ZERO;
  } else if (bs-backing_hd) {
  BlockDriverState *bs2 = bs-backing_hd;
 




Re: [Qemu-devel] [PATCHv6 17/17] block/raw: copy BlockLimits on raw_open

2013-10-24 Thread Paolo Bonzini
Il 24/10/2013 08:46, Peter Lieven ha scritto:
 Signed-off-by: Peter Lieven p...@kamp.de
 ---
  block/raw_bsd.c |1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/block/raw_bsd.c b/block/raw_bsd.c
 index b0dd23f..49ac18c 100644
 --- a/block/raw_bsd.c
 +++ b/block/raw_bsd.c
 @@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
 int flags,
  Error **errp)
  {
  bs-sg = bs-file-sg;
 +bs-bl = bs-file-bl;
  return 0;
  }
  
 

This must be moved before the introduction of BlockLimits in the iscsi
driver, or patches that use BlockLimits in block.c will not have any effect.

Paolo



Re: [Qemu-devel] [PATCHv6 15/17] qemu-img: add support for fully allocated images

2013-10-24 Thread Paolo Bonzini
Il 24/10/2013 08:46, Peter Lieven ha scritto:
 +@var{sparse_size} indicates the consecutive number of bytes (defaults to 4k)
 +that must contain only zeros for qemu-img to create a sparse image during
 +conversion. If the number of bytes is 0 sparse files are disabled and
 +images will always be fully allocated.
 +

If @var{sparse_size} is 0, the source will not be scanned for
unallocated or zero sectors, and the destination image will always be
fully allocated.

Paolo



Re: [Qemu-devel] [PATCHv6 17/17] block/raw: copy BlockLimits on raw_open

2013-10-24 Thread Peter Lieven

On 24.10.2013 11:10, Paolo Bonzini wrote:

Il 24/10/2013 08:46, Peter Lieven ha scritto:

Signed-off-by: Peter Lieven p...@kamp.de
---
  block/raw_bsd.c |1 +
  1 file changed, 1 insertion(+)

diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index b0dd23f..49ac18c 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
  Error **errp)
  {
  bs-sg = bs-file-sg;
+bs-bl = bs-file-bl;
  return 0;
  }
  


This must be moved before the introduction of BlockLimits in the iscsi
driver, or patches that use BlockLimits in block.c will not have any effect.

You are the first to mention this. I was thinking the whole series will
be seen as once so it shouldn't matter.

Peter



Re: [Qemu-devel] [PATCHv6 16/17] qemu-img: conditionally zero out target on convert

2013-10-24 Thread Paolo Bonzini
Il 24/10/2013 08:46, Peter Lieven ha scritto:
 This currently is designed especially for iscsi.

I'm not sure this is the way you want to spin this. :)

Perhaps This currently works only for iscsi.  It can be extended to
raw with BLKDISCARDZEROES for example.

Paolo



Re: [Qemu-devel] [RESEND][PATCH 1.7] migration: drop MADVISE_DONT_NEED for incoming zero pages

2013-10-24 Thread Paolo Bonzini
Il 24/10/2013 08:21, Peter Lieven ha scritto:
 Additionally we memmap target memory so it is essentially
 zero initialized (except for e.g. option roms and bios which are loaded
 into target memory although they shouldn't).
 
 It was reported recently that this madvise causes a performance degradation
 in some situations. As the madvise should only be called rarely and if it's 
 called
 it is likely on a busy page (it was non-zero and changed to zero during 
 migration)
 drop it completely.

Tagging this patch for 1.7.

Paolo



Re: [Qemu-devel] [PATCH 1/2] vga: allow non-global vmstate

2013-10-24 Thread Gerd Hoffmann
  diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c
  index b3a45c8..dee180f 100644
  --- a/hw/display/vga-pci.c
  +++ b/hw/display/vga-pci.c
  @@ -147,7 +147,7 @@ static int pci_std_vga_initfn(PCIDevice *dev)
   VGACommonState *s = d-vga;
   
   /* vga + console init */
  -vga_common_init(s, OBJECT(dev));
  +vga_common_init(s, OBJECT(dev), true);

  -void vga_common_init(VGACommonState *s, Object *obj)
  +void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)

  -vmstate_register_ram_global(s-vram);
  +vmstate_register_ram(s-vram, global_vmstate ? NULL : DEVICE(obj));

 
 Is it possible to do it depending of the QEMU compatibility version, or 
 of some property, so we don't have to keep this global vmstate forever?

Sure.  We can easily add a property to stdvga, then make the new
vga_common_init paramater depend on the property.

 With this, I think we can also expect to be able to specify -device 
 VGA twice, and see the BIOS messages on one card or another.

It would get us a step closer, but the two vga cards would still compete
for the legacy vga io ports at 0x3c0.

Thats why there is patch #2 of this series, which adds a stdvga variant
not allocating legacy ports, so it can be added without port conflicts.

cheers,
  Gerd





Re: [Qemu-devel] [PATCH 05/10] pci-host: Consistently set cannot_instantiate_with_device_add_yet

2013-10-24 Thread Markus Armbruster
Peter Maydell peter.mayd...@linaro.org writes:

 On 17 October 2013 14:54,  arm...@redhat.com wrote:
 From: Markus Armbruster arm...@redhat.com

 Many PCI host bridges consist of a sysbus device and a PCI device.
 You need both for the thing to work.  Arguably, these bridges should
 be modelled as a single, composite devices instead of pairs of
 seemingly independent devices you can only use together, but we're not
 there, yet.

 I disagree here -- we should be using the modularity that our
 device model provides, not arbitrarily squashing things together
 into single objects just because we've foolishly exposed to the
 end user direct command line access to create any random object
 whatsoever even if it doesn't make sense.

I'm afraid I didn't express myself clearly.  I'm not advocating
*squashing* these components together.  I'm saying that if A and B can
only be used wired together, there should be a C composed of A, B and
the necessary wiring, and that C is what actually gets put on the board
by configuration.

 Since the sysbus part can't be instantiated with device_add, yet,
 permitting it with the PCI part is useless.  We shouldn't offer
 useless options to the user, so let's set
 cannot_instantiate_with_device_add_yet for them.

 It doesn't make sense to allow the user to create the on-PCI-bus
 representation of the host controller anyway even if they could
 device_add the host controller proper: creating the host controller
 will always automatically create the on-PCI-bus part.

Technically, a device_add i440FX-pcihost doesn't automatically create
i440FX *now*.

I suspect we're arguing only about what exact kind of crazy device_add
of the PCI-facing part of the PCI host bridge is.  Assuming we actually
agree it's crazy in *today's* state of things, does it matter what kind
of crazy it is?  If it doesn't matter, perhaps you could give me a hint
on how to rephrase the commit message.

 --- a/hw/mips/gt64xxx_pci.c
 +++ b/hw/mips/gt64xxx_pci.c
 @@ -1157,6 +1157,11 @@ static void
 gt64120_pci_class_init(ObjectClass *klass, void *data)
  k-device_id = PCI_DEVICE_ID_MARVELL_GT6412X;
  k-revision = 0x10;
  k-class_id = PCI_CLASS_BRIDGE_HOST;
 +/*
 + * PCI-facing part of the host bridge, not usable without the
 + * host-facing part, which can't be device_add'ed, yet.
 + */
 +k-parent_class.cannot_instantiate_with_device_add_yet = true;

 Please don't directly access parent_class -- you should be using
 the proper QOM cast macros to get the DeviceClass pointer.

Will fix, thanks!



Re: [Qemu-devel] [PATCHv6 16/17] qemu-img: conditionally zero out target on convert

2013-10-24 Thread Peter Lieven

On 24.10.2013 11:13, Paolo Bonzini wrote:

Il 24/10/2013 08:46, Peter Lieven ha scritto:

This currently is designed especially for iscsi.

I'm not sure this is the way you want to spin this. :)

Perhaps This currently works only for iscsi.  It can be extended to
raw with BLKDISCARDZEROES for example.

Thanks for your comments. Will respin.

Peter



[Qemu-devel] kvm binary is deprecated - solved!

2013-10-24 Thread Alexander Binun
Hi Stefan ,
 Great thanks - your easy trick works! (after I upgraded Ubuntu 13.04 to 13.10).

As for sniffing the traffic between VMs - I have yet one idea and I would 
appreciate your feedback.

The activities at VM that involve modifying data can be divided into the 
following categories:
   1. Talk through network (sending net packets to other hosts)
   2. Disk operations
   3. Memory accesses

In essence memory accesses are always performed BEFORE disk or network 
operations are executed (and the corresponding drivers are employed). For 
example, we prepare data in a buffer and send it into a socket.

That is, a sniffer in the Linux should be put at a kernel driver that makes 
physical memory available to user space. 

Thanks,
   Alex

P.S. I CC  my colleague Dr.Reuven Yagel, a member of the team I am working in.




On Mon 14 Oct 16:16 2013 Stefan Hajnoczi wrote:
 On Mon, Oct 14, 2013 at 12:36 PM, Alexander Binun bi...@cs.bgu.ac.il wrote:
  The workaround offered in bug trackers is: change the path associated with 
  the emulation tag in the xml definition file. Change it to 
  qemu-system-x86_64.
 
  Well, I am familiar with XML definition files for VMs: they are used 
  manually when defining VMs in virsh (virsh define xmldef.xml and so on). 
  There is the emulation tag there, pointing to the path to the emulator.
 
  virt-manager (used by me) creates such a file also (putting in into 
  /etc/libvirt/qemu).
 
  But so far I do not have valid XML definition files. So I intend to try the 
  following ways:
 --- find an example definition file and create a VM manually (through 
  virsh)
 --- use qemu  kvm compiled from the Git sources referred to by you.
 
 An easy trick:
 # mv /usr/bin/kvm /usr/bin/kvm.orig
 # ln -s /usr/bin/qemu-system-x86_64 /usr/bin/kvm
 
 Hopefully libvirt will be happier with the actual qemu-system-x86_64
 binary.  If this doesn't work you can move /usr/bin/kvm.orig back and
 try the other methods.
 
 Stefan
 







Re: [Qemu-devel] [PATCH 01/10] qdev: Replace no_user by cannot_instantiate_with_device_add_yet

2013-10-24 Thread Markus Armbruster
Peter Maydell peter.mayd...@linaro.org writes:

 On 17 October 2013 14:54,  arm...@redhat.com wrote:
 From: Markus Armbruster arm...@redhat.com

 In an ideal world, machines can be built by wiring devices together
 with configuration, not code.  Unfortunately, that's not the world we
 live in right now.  We still have quite a few devices that need to be
 wired up by code.  If you try to device_add such a device, it'll fail
 in sometimes mysterious ways.  If you're lucky, you get an
 unmysterious immediate crash.

 +/*
 + * Shall we hide this device model from -device / device_add?
 + * All devices should support instantiation with device_add, and
 + * this flag should not exist.  But we're not there, yet.  Some
 + * devices fail to instantiate with cryptic error messages.
 + * Others instantiate, but don't work.  Exposing users to such
 + * behavior would be cruel; this flag serves to protect them.  It
 + * should never be set without a comment explaining why it is set.
 + * TODO remove once we're there
 + */
 +bool cannot_instantiate_with_device_add_yet;

 So reading this I'm still not entirely sure what the scope of this
 flag is intended to be. I can think of two possibilities:

 (1) the minimal definition: this device would actually crash
 or cause QEMU to break if you created it with device_add
 (2) a larger definition, which includes also devices which
 are completely useless if created with device_add because
 there's no way for the user to wire them up properly.

 I think most sysbus devices are going to be in (2) but not (1),
 because they should be fine to create and initialize, but they'll
 just be sitting completely pointlessly totally disconnected from
 the machine model.

 Definition (1) is a harder boundary and more straightforward
 to check against, but definition (2) is arguably a bit more useful
 for the end user.

I agree, and I'd like us to adopt definition (2).  I tried to express
this when I wrote instantiate, but don't work.  Care to suggest
clearer language for this comment?

Regarding (2) being less straightforward to check against: I think we
should try hard to make our cannot_instantiate_with_device_add_yet use
correct (any device we mark that way is actually useless with
device_add), but I view completeness (all the devices that are actually
useless with -device are marked) as not quite that important.



Re: [Qemu-devel] [PATCH] qemu-iotests: Test for loading VM state from qcow2

2013-10-24 Thread Kevin Wolf
Am 23.10.2013 um 20:26 hat Max Reitz geschrieben:
 Add a test for saving a VM state from a qcow2 image and loading it back
 (with having restarted qemu in between); this should work without any
 problems.
 
 Signed-off-by: Max Reitz mre...@redhat.com
 ---
 Follow-up to (depends on):
  - qcow2: Restore total_sectors value in save_vmstate
  - qcow2: Unset zero_beyond_eof in save_vmstate
 ---
  tests/qemu-iotests/068   | 65 
 
  tests/qemu-iotests/group |  1 +

068.out is missing.

Kevin



Re: [Qemu-devel] kvm binary is deprecated - solved!

2013-10-24 Thread Stefan Hajnoczi
On Thu, Oct 24, 2013 at 10:23 AM, Alexander Binun bi...@cs.bgu.ac.il wrote:
 As for sniffing the traffic between VMs - I have yet one idea and I would 
 appreciate your feedback.
[...]
 That is, a sniffer in the Linux should be put at a kernel driver that makes 
 physical memory available to user space.

I'm not sure what you are trying to do.  Can you describe your goal?

Depending on what you are trying to observe, there may already be
sniffing or tracing mechanisms available.

Stefan



Re: [Qemu-devel] [PATCH] qcow2: Restore total_sectors value in save_vmstate

2013-10-24 Thread Kevin Wolf
Am 23.10.2013 um 19:03 hat Max Reitz geschrieben:
 On 2013-10-21 22:36, Eric Blake wrote:
 On 10/20/2013 07:28 PM, Max Reitz wrote:
 Since df2a6f29a5, bdrv_co_do_writev increases the total_sectors value of
 a growable block devices on writes after the current end. This leads to
 the virtual disk apparently growing in qcow2_save_vmstate, which in turn
 affects the disk size captured by the internal snapshot taken directly
 afterwards through e.g. the HMP savevm command. Such a grown snapshot
 cannot be loaded after reopening the qcow2 image, since its disk size
 differs from the actual virtual disk size (writing a VM state does not
 actually increase the virtual disk size).
 
 Fix this by restoring total_sectors at the end of qcow2_save_vmstate.
 
 Signed-off-by: Max Reitz mre...@redhat.com
 ---
   block/qcow2.c | 5 +
   1 file changed, 5 insertions(+)
 
 @@ -1946,6 +1947,10 @@ static int qcow2_save_vmstate(BlockDriverState *bs, 
 QEMUIOVector *qiov,
   bs-growable = 1;
   ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
   bs-growable = growable;
 +// bdrv_co_do_writev will have increased the total_sectors value to 
 include
 +// the VM state - the VM state is however not an actual part of the 
 block
 +// device, therefore, we need to restore the old value.
 +bs-total_sectors = total_sectors;
 It looks like // comments aren't forbidden, but also uncommon; I don't
 know if /**/ would be better.  At any rate:
 
 Ah, right, sorry, I forgot.

Thanks, fixed up the command and applied to the block branch.

Kevin



Re: [Qemu-devel] [PATCH] qcow2: Unset zero_beyond_eof in save_vmstate

2013-10-24 Thread Kevin Wolf
Am 21.10.2013 um 22:37 hat Eric Blake geschrieben:
 On 10/20/2013 08:52 PM, Max Reitz wrote:
  Saving the VM state is done using bdrv_pwrite. This function may perform
  a read-modify-write, which in this case results in data being read from
  beyond the end of the virtual disk. Since we are actually trying to
  access an area which is not a part of the virtual disk, zero_beyond_eof
  has to be set to false before performing the partial write, otherwise
  the VM state may become corrupted.
  
  Signed-off-by: Max Reitz mre...@redhat.com
  ---
  Follow-up to (depends on):
   - qcow2: Restore total_sectors value in save_vmstate
 
 Reviewed-by: Eric Blake ebl...@redhat.com

Thanks, applied to the block branch.

Kevin



[Qemu-devel] observing VM actions

2013-10-24 Thread Alexander Binun
I am trying to observe the memory/disk/network accesses done by a VM. The 
resulting log can be used to decide whether a VM initiates a malicious action 
(because , say, it runs a malicious software). 


On Thu 24 Oct 11:49 2013 Stefan Hajnoczi wrote:
 On Thu, Oct 24, 2013 at 10:23 AM, Alexander Binun bi...@cs.bgu.ac.il wrote:
  As for sniffing the traffic between VMs - I have yet one idea and I would 
  appreciate your feedback.
 [...]
  That is, a sniffer in the Linux should be put at a kernel driver that makes 
  physical memory available to user space.
 
 I'm not sure what you are trying to do.  Can you describe your goal?
 
 Depending on what you are trying to observe, there may already be
 sniffing or tracing mechanisms available.
 
 Stefan
 







Re: [Qemu-devel] [PATCH] qcow2: Flush image after creation

2013-10-24 Thread Kevin Wolf
Am 23.10.2013 um 21:40 hat Max Reitz geschrieben:
 Opening the qcow2 image with BDRV_O_NO_FLUSH prevents any flushes during
 the image creation. This means that the image has not yet been flushed
 to disk when qemu-img create exits. This flush is delayed until the next
 operation on the image involving opening it without BDRV_O_NO_FLUSH and
 closing (or directly flushing) it. For large images and/or images with a
 small cluster size and preallocated metadata, this flush may take a
 significant amount of time and may occur unexpectedly.
 
 Reopening the image without BDRV_O_NO_FLUSH right before the end of
 qcow2_create2() results in preponing the potentially costly flush into
 the image creation, which is expected to take some time (whereas
 successive image operations may be not).
 
 Signed-off-by: Max Reitz mre...@redhat.com
 ---
  block/qcow2.c | 10 +-
  1 file changed, 9 insertions(+), 1 deletion(-)
 
 diff --git a/block/qcow2.c b/block/qcow2.c
 index c1abaff..8b98c3a 100644
 --- a/block/qcow2.c
 +++ b/block/qcow2.c
 @@ -1584,7 +1584,15 @@ static int qcow2_create2(const char *filename, int64_t 
 total_size,
  }
  }
  
 -ret = 0;

I would prefer to keep the explicit ret = 0 there (just like the
unnecessary last 'goto out:', it just makes things more obvious and
consistent)

 +bdrv_close(bs);
 +
 +/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning 
 */
 +ret = bdrv_open(bs, filename, NULL,
 +BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, local_err);
 +if (error_is_set(local_err)) {
 +error_propagate(errp, local_err);

So a goto here wouldn't hurt either. Note how the unnecessary goto in
the block before allowed you to just add your new code without modifying
existing parts.

 +}
 +
  out:
  bdrv_unref(bs);
  return ret;

Kevin



Re: [Qemu-devel] [PATCHv2] qemu-img: add special exit code if bdrv_check is not supported

2013-10-24 Thread Kevin Wolf
Am 24.10.2013 um 09:00 hat Eric Blake geschrieben:
 On 10/24/2013 07:53 AM, Peter Lieven wrote:
  currently it is not possible to distinguish by exitcode if there
  has been an error or if bdrv_check is not supported by the image
  format. Change the exitcode from 1 to 63 for the latter case.
  
  Signed-off-by: Peter Lieven p...@kamp.de
  ---
  v1-v2: As Eric suggested changed the exitcode from 255 to 63.
  
   qemu-img.c |2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
 Should the man page document this exit status?  Then again, it doesn't
 document ANY exit status, so that could be a separate patch.
 
 Reviewed-by: Eric Blake ebl...@redhat.com

Thanks, applied to the block branch.

Kevin



Re: [Qemu-devel] [PATCH] qcow2: Flush image after creation

2013-10-24 Thread Eric Blake
On 10/23/2013 08:40 PM, Max Reitz wrote:
 Opening the qcow2 image with BDRV_O_NO_FLUSH prevents any flushes during
 the image creation. This means that the image has not yet been flushed
 to disk when qemu-img create exits. This flush is delayed until the next
 operation on the image involving opening it without BDRV_O_NO_FLUSH and
 closing (or directly flushing) it. For large images and/or images with a
 small cluster size and preallocated metadata, this flush may take a
 significant amount of time and may occur unexpectedly.
 
 Reopening the image without BDRV_O_NO_FLUSH right before the end of
 qcow2_create2() results in preponing the potentially costly flush into

s/preponing/hoisting/

 the image creation, which is expected to take some time (whereas
 successive image operations may be not).
 
 Signed-off-by: Max Reitz mre...@redhat.com
 ---
  block/qcow2.c | 10 +-
  1 file changed, 9 insertions(+), 1 deletion(-)

Reviewed-by: Eric Blake ebl...@redhat.com

 -ret = 0;
 +bdrv_close(bs);
 +
 +/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning 
 */
 +ret = bdrv_open(bs, filename, NULL,
 +BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, local_err);

I would probably have indented as:

ret = bdrv_open(bs, filename, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB,
drv, local_err);

but it's trivial enough that I'm also fine with your choice.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCHv6 17/17] block/raw: copy BlockLimits on raw_open

2013-10-24 Thread Paolo Bonzini
Il 24/10/2013 10:12, Peter Lieven ha scritto:
 This must be moved before the introduction of BlockLimits in the iscsi
 driver, or patches that use BlockLimits in block.c will not have any
 effect.
 You are the first to mention this. I was thinking the whole series will
 be seen as once so it shouldn't matter.

In general, series should keep old functionality at all stages.  This
helps when someone reports a regression, because we can ask them to
bisect and not have them burdened by problems in the middle of a series.
 (It would also help you debugging things, if this series turned out to
have a bug).

After patch 10 of this series, an iSCSI array will stop receiving split
requests for large discards.  This may introduce spurious failures.

I made the same remark on patch 11, but that patch alone is not enough
to restore this; you need this one too for patch 11 to have any effect.
 So the correct order is patch 17 first, then patch 11, then patch 10.
In other word, remove code only after it has become dead.

Paolo



Re: [Qemu-devel] [PATCHv2] block/vpc: check that the image has not been truncated

2013-10-24 Thread Kevin Wolf
Am 24.10.2013 um 09:16 hat Peter Lieven geschrieben:
 this adds a check that a dynamic VHD file has not been
 accidently truncated (e.g. during transfer or upload).
 
 Signed-off-by: Peter Lieven p...@kamp.de

Thanks, applied to the block branch.

Kevin



[Qemu-devel] [PATCHv7 00/17] block: logical block provisioning enhancements

2013-10-24 Thread Peter Lieven
this patch adds the ability for targets to stay sparse during
block migration (if the zero_blocks capability is set) and qemu-img convert
even if the target does not have has_zero_init = 1.

the series was especially developed for iSCSI, but it should also work
with other drivers with little or no adjustments. these adjustments
should be limited to providing block provisioning information through
get_block_info and/or honouring BDRV_REQ_MAY_UNMAP on writing zeroes.

v6-v7:
 - switched position of iscsi: set limits in BlockDriverState and
   iscsi: simplify iscsi_co_discard. (Paolo)
 - fixed commit message of 
   block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks.
   (Paolo)
 - moved block/raw: copy BlockLimits on raw_open right after
   block: add BlockLimits structure to BlockDriverState. (Paolo)
 - Reworded desciption for -S 0 in
   qemu-img: add support for fully allocated images as suggested
   by Paolo.
 - Reworded commit message of:
   qemu-img: conditionally zero out target on convert.
   regarding iscsi (Paolo)
 
v5-v6:
 - protected iscsi_co_write_zeroes by the existence of the
   SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED macro. This is ugly
   but necessary because the semantic of iscsi_writesame16_task
   silently changed between libiscsi 1.8.0 and 1.9.0. The above
   macro was the first added after the change. I already contacted
   Ronnie to introduce an API version macro which has to be bumped
   on each new function that will be added. Changes to the parameters
   should not happen at all of course.

v4-v5:
 - new patches 4-6 to move the block provisioning information
   to the BlockDriverInfo.
 - kept 2 wrappers to read the information from the BDI and
   renamed them to make more clear what they do:

 bdrv_has_discard_zeroes - bdrv_unallocated_blocks_are_zero
 bdrv_has_discard_write_zeroes - bdrv_can_write_zeroes_with_unmap

 - added additional information about the 2 flags in the
   BDI struct in block.h

v3-v4:
 - changed BlockLimits struct to typedef (Stefan, Eric)
 - renamed bdrv_zeroize to bdrv_make_zero (Stefan)
 - added comment about the -S flag of qemu-img convert in
   qemu-img.texi (Eric)
 - used struct assignment for bs-bl in raw_open (Stefan, Eric)
 - dropped 3 get_block_status fixes that are independent of
   this series and already partly merged.

v2-v3:
 - fix merge conflict in block/qcow2_cluster.c
 - changed return type of bdrv_has_discard_zeroes and
   bdrv_has_discard_write_zeroes to bool.
 - moved alignment and limits info to a BlockLimits struct (Paolo).
 - added magic constanst for default maximum in bdrv_co_do_write_zeroes
   and bdrv_co_discard (Eric).
 - bdrv_co_do_write_zeroes: allocating the bounce buffer only once (Eric),
   fixed bounce iov_len in the fall back path.
 - bdrv_zeroize: added inline docu (Eric) and do not mask flags passed
   to bdrv_write_zeroes (Eric).
 - qemu-img: changed the default hint for -S (min_sparse) in the usage
   help to 4k. not changing the default as it is unclear why this default
   was set. size suffixes are already supported (Eric).

v1-v2:
 - moved block max_discard and max_write_zeroes to BlockDriverState
 - added discard_alignment and write_zeroes_alignment to BlockDriverState
 - added bdrv_has_discard_zeroes() and bdrv_has_discard_write_zeroes()
 - added logic to bdrv_co_discard and bdrv_co_do_write_zeroes to honour
   limit and alignment info.
 - added support for -S 0 in qemu-img convert.

Peter Lieven (17):
  block: make BdrvRequestFlags public
  block: add flags to bdrv_*_write_zeroes
  block: introduce BDRV_REQ_MAY_UNMAP request flag
  block: add logical block provisioning info to BlockDriverInfo
  block: add wrappers for logical block provisioning information
  block/iscsi: add .bdrv_get_info
  block: add BlockLimits structure to BlockDriverState
  block/raw: copy BlockLimits on raw_open
  block: honour BlockLimits in bdrv_co_do_write_zeroes
  block: honour BlockLimits in bdrv_co_discard
  iscsi: set limits in BlockDriverState
  iscsi: simplify iscsi_co_discard
  iscsi: add bdrv_co_write_zeroes
  block: introduce bdrv_make_zero
  block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks
  qemu-img: add support for fully allocated images
  qemu-img: conditionally zero out target on convert

 block-migration.c |3 +-
 block.c   |  200 +
 block/backup.c|3 +-
 block/iscsi.c |  150 +-
 block/qcow2-cluster.c |2 +-
 block/qcow2.c |2 +-
 block/qed.c   |3 +-
 block/raw_bsd.c   |6 +-
 block/vmdk.c  |3 +-
 include/block/block.h |   35 +++-
 include/block/block_int.h |   19 -
 qemu-img.c|   20 -
 qemu-img.texi |6 ++
 qemu-io-cmds.c|2 +-
 14 files changed, 366 insertions(+), 88 deletions(-)

-- 
1.7.9.5




[Qemu-devel] [PATCHv7 06/17] block/iscsi: add .bdrv_get_info

2013-10-24 Thread Peter Lieven
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/iscsi.c |9 +
 1 file changed, 9 insertions(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index a2a961e..1dbbcad 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1506,6 +1506,14 @@ out:
 return ret;
 }
 
+static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+IscsiLun *iscsilun = bs-opaque;
+bdi-unallocated_blocks_are_zero = !!iscsilun-lbprz;
+bdi-can_write_zeroes_with_unmap = iscsilun-lbprz  iscsilun-lbp.lbpws;
+return 0;
+}
+
 static QEMUOptionParameter iscsi_create_options[] = {
 {
 .name = BLOCK_OPT_SIZE,
@@ -1527,6 +1535,7 @@ static BlockDriver bdrv_iscsi = {
 .create_options  = iscsi_create_options,
 
 .bdrv_getlength  = iscsi_getlength,
+.bdrv_get_info   = iscsi_get_info,
 .bdrv_truncate   = iscsi_truncate,
 
 #if defined(LIBISCSI_FEATURE_IOVECTOR)
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 05/17] block: add wrappers for logical block provisioning information

2013-10-24 Thread Peter Lieven
This adds 2 wrappers to read the unallocated_blocks_are_zero and
can_write_zeroes_with_unmap info from the BDI. The wrappers are
required to check for the existence of a backing_hd and
if the devices are opened with the correct flags.

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c   |   30 ++
 include/block/block.h |2 ++
 2 files changed, 32 insertions(+)

diff --git a/block.c b/block.c
index 0d97ce6..0601b02 100644
--- a/block.c
+++ b/block.c
@@ -3094,6 +3094,36 @@ int bdrv_has_zero_init(BlockDriverState *bs)
 return 0;
 }
 
+bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
+{
+BlockDriverInfo bdi;
+
+if (bs-backing_hd) {
+return false;
+}
+
+if (bdrv_get_info(bs, bdi) == 0) {
+return bdi.unallocated_blocks_are_zero;
+}
+
+return false;
+}
+
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
+{
+BlockDriverInfo bdi;
+
+if (bs-backing_hd || !(bs-open_flags  BDRV_O_UNMAP)) {
+return false;
+}
+
+if (bdrv_get_info(bs, bdi) == 0) {
+return bdi.can_write_zeroes_with_unmap;
+}
+
+return false;
+}
+
 typedef struct BdrvCoGetBlockStatusData {
 BlockDriverState *bs;
 BlockDriverState *base;
diff --git a/include/block/block.h b/include/block/block.h
index 9c76967..803c5ca 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -344,6 +344,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, 
int nb_sectors);
 int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
 int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
+bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
 int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
   int nb_sectors, int *pnum);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 07/17] block: add BlockLimits structure to BlockDriverState

2013-10-24 Thread Peter Lieven
this patch adds BlockLimits which introduces discard and write_zeroes
limits and alignment information to the BlockDriverState.

Signed-off-by: Peter Lieven p...@kamp.de
---
 include/block/block_int.h |   17 +
 1 file changed, 17 insertions(+)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9bbaa29..33be247 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -227,6 +227,20 @@ struct BlockDriver {
 QLIST_ENTRY(BlockDriver) list;
 };
 
+typedef struct BlockLimits {
+/* maximum number of sectors that can be discarded at once */
+int max_discard;
+
+/* optimal alignment for discard requests in sectors */
+int64_t discard_alignment;
+
+/* maximum number of sectors that can zeroized at once */
+int max_write_zeroes;
+
+/* optimal alignment for write zeroes requests in sectors */
+int64_t write_zeroes_alignment;
+} BlockLimits;
+
 /*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
@@ -280,6 +294,9 @@ struct BlockDriverState {
 uint64_t total_time_ns[BDRV_MAX_IOTYPE];
 uint64_t wr_highest_sector;
 
+/* I/O Limits */
+BlockLimits bl;
+
 /* Whether the disk can expand beyond total_sectors */
 int growable;
 
-- 
1.7.9.5




Re: [Qemu-devel] [PATCHv2] block/vpc: check that the image has not been truncated

2013-10-24 Thread Peter Lieven

On 24.10.2013 12:06, Kevin Wolf wrote:

Am 24.10.2013 um 09:16 hat Peter Lieven geschrieben:

this adds a check that a dynamic VHD file has not been
accidently truncated (e.g. during transfer or upload).

Signed-off-by: Peter Lieven p...@kamp.de

Thanks, applied to the block branch.

Can you have a look at:
 block/vpc: fix virtual size for images created with disk2vhd

as well please.

Peter



[Qemu-devel] [PATCHv7 14/17] block: introduce bdrv_make_zero

2013-10-24 Thread Peter Lieven
this patch adds a call to completely zero out a block device.
the operation is sped up by checking the block status and
only writing zeroes to the device if they currently do not
return zeroes. optionally the zero writing can be sped up
by setting the flag BDRV_REQ_MAY_UNMAP to emulate the zero
write by unmapping if the driver supports it.

Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c   |   37 +
 include/block/block.h |1 +
 2 files changed, 38 insertions(+)

diff --git a/block.c b/block.c
index b28dd42..21a992a 100644
--- a/block.c
+++ b/block.c
@@ -2391,6 +2391,43 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t 
sector_num,
   BDRV_REQ_ZERO_WRITE | flags);
 }
 
+/*
+ * Completely zero out a block device with the help of bdrv_write_zeroes.
+ * The operation is sped up by checking the block status and only writing
+ * zeroes to the device if they currently do not return zeroes. Optional
+ * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
+ *
+ * Returns  0 on error, 0 on success. For error codes see bdrv_write().
+ */
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
+{
+int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+int64_t ret, nb_sectors, sector_num = 0;
+int n;
+
+for (;;) {
+nb_sectors = target_size - sector_num;
+if (nb_sectors = 0) {
+return 0;
+}
+if (nb_sectors  INT_MAX) {
+nb_sectors = INT_MAX;
+}
+ret = bdrv_get_block_status(bs, sector_num, nb_sectors, n);
+if (ret  BDRV_BLOCK_ZERO) {
+sector_num += n;
+continue;
+}
+ret = bdrv_write_zeroes(bs, sector_num, n, flags);
+if (ret  0) {
+error_report(error writing zeroes at sector % PRId64 : %s,
+ sector_num, strerror(-ret));
+return ret;
+}
+sector_num += n;
+}
+}
+
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count1)
 {
diff --git a/include/block/block.h b/include/block/block.h
index 803c5ca..4d9e67c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -216,6 +216,7 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
 int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags);
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
 int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 11/17] iscsi: set limits in BlockDriverState

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/iscsi.c |   14 ++
 1 file changed, 14 insertions(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index 1dbbcad..16d8052 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1384,6 +1384,20 @@ static int iscsi_open(BlockDriverState *bs, QDict 
*options, int flags,
sizeof(struct scsi_inquiry_block_limits));
 scsi_free_scsi_task(task);
 task = NULL;
+
+if (iscsilun-bl.max_unmap  0x) {
+bs-bl.max_discard = sector_lun2qemu(iscsilun-bl.max_unmap,
+ iscsilun);
+}
+bs-bl.discard_alignment = sector_lun2qemu(iscsilun-bl.opt_unmap_gran,
+   iscsilun);
+
+if (iscsilun-bl.max_ws_len  0x) {
+bs-bl.max_write_zeroes = sector_lun2qemu(iscsilun-bl.max_ws_len,
+  iscsilun);
+}
+bs-bl.write_zeroes_alignment = 
sector_lun2qemu(iscsilun-bl.opt_unmap_gran,
+iscsilun);
 }
 
 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 13/17] iscsi: add bdrv_co_write_zeroes

2013-10-24 Thread Peter Lieven
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/iscsi.c |   64 +
 1 file changed, 64 insertions(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index c0465aa..014475d 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -56,6 +56,7 @@ typedef struct IscsiLun {
 uint8_t lbprz;
 struct scsi_inquiry_logical_block_provisioning lbp;
 struct scsi_inquiry_block_limits bl;
+unsigned char *zeroblock;
 } IscsiLun;
 
 typedef struct IscsiTask {
@@ -959,6 +960,65 @@ retry:
 return 0;
 }
 
+#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
+
+static int
+coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+   int nb_sectors, BdrvRequestFlags flags)
+{
+IscsiLun *iscsilun = bs-opaque;
+struct IscsiTask iTask;
+uint64_t lba;
+uint32_t nb_blocks;
+
+if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+return -EINVAL;
+}
+
+if (!iscsilun-lbp.lbpws) {
+/* WRITE SAME is not supported by the target */
+return -ENOTSUP;
+}
+
+lba = sector_qemu2lun(sector_num, iscsilun);
+nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+
+if (iscsilun-zeroblock == NULL) {
+iscsilun-zeroblock = g_malloc0(iscsilun-block_size);
+}
+
+iscsi_co_init_iscsitask(iscsilun, iTask);
+retry:
+if (iscsi_writesame16_task(iscsilun-iscsi, iscsilun-lun, lba,
+   iscsilun-zeroblock, iscsilun-block_size,
+   nb_blocks, 0, !!(flags  BDRV_REQ_MAY_UNMAP),
+   0, 0, iscsi_co_generic_cb, iTask) == NULL) {
+return -EIO;
+}
+
+while (!iTask.complete) {
+iscsi_set_events(iscsilun);
+qemu_coroutine_yield();
+}
+
+if (iTask.task != NULL) {
+scsi_free_scsi_task(iTask.task);
+iTask.task = NULL;
+}
+
+if (iTask.do_retry) {
+goto retry;
+}
+
+if (iTask.status != SCSI_STATUS_GOOD) {
+return -EIO;
+}
+
+return 0;
+}
+
+#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */
+
 static int parse_chap(struct iscsi_context *iscsi, const char *target)
 {
 QemuOptsList *list;
@@ -1421,6 +1481,7 @@ static void iscsi_close(BlockDriverState *bs)
 }
 qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
 iscsi_destroy_context(iscsi);
+g_free(iscsilun-zeroblock);
 memset(iscsilun, 0, sizeof(IscsiLun));
 }
 
@@ -1539,6 +1600,9 @@ static BlockDriver bdrv_iscsi = {
 .bdrv_co_get_block_status = iscsi_co_get_block_status,
 #endif
 .bdrv_co_discard  = iscsi_co_discard,
+#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
+.bdrv_co_write_zeroes = iscsi_co_write_zeroes,
+#endif
 
 .bdrv_aio_readv  = iscsi_aio_readv,
 .bdrv_aio_writev = iscsi_aio_writev,
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 09/17] block: honour BlockLimits in bdrv_co_do_write_zeroes

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c |   65 +++
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/block.c b/block.c
index 0601b02..0c0b0ac 100644
--- a/block.c
+++ b/block.c
@@ -2703,32 +2703,65 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState 
*bs,
 BDRV_REQ_COPY_ON_READ);
 }
 
+/* if no limit is specified in the BlockLimits use a default
+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_WRITE_ZEROES_DEFAULT 32768
+
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
 {
 BlockDriver *drv = bs-drv;
 QEMUIOVector qiov;
-struct iovec iov;
-int ret;
+struct iovec iov = {0};
+int ret = 0;
 
-/* TODO Emulate only part of misaligned requests instead of letting block
- * drivers return -ENOTSUP and emulate everything */
+int max_write_zeroes = bs-bl.max_write_zeroes ?
+   bs-bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
 
-/* First try the efficient write zeroes operation */
-if (drv-bdrv_co_write_zeroes) {
-ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
-if (ret != -ENOTSUP) {
-return ret;
+while (nb_sectors  0  !ret) {
+int num = nb_sectors;
+
+/* align request */
+if (bs-bl.write_zeroes_alignment 
+num = bs-bl.write_zeroes_alignment 
+sector_num % bs-bl.write_zeroes_alignment) {
+if (num  bs-bl.write_zeroes_alignment) {
+num = bs-bl.write_zeroes_alignment;
+}
+num -= sector_num % bs-bl.write_zeroes_alignment;
 }
-}
 
-/* Fall back to bounce buffer if write zeroes is unsupported */
-iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE;
-iov.iov_base = qemu_blockalign(bs, iov.iov_len);
-memset(iov.iov_base, 0, iov.iov_len);
-qemu_iovec_init_external(qiov, iov, 1);
+/* limit request size */
+if (num  max_write_zeroes) {
+num = max_write_zeroes;
+}
+
+ret = -ENOTSUP;
+/* First try the efficient write zeroes operation */
+if (drv-bdrv_co_write_zeroes) {
+ret = drv-bdrv_co_write_zeroes(bs, sector_num, num, flags);
+}
+
+if (ret == -ENOTSUP) {
+/* Fall back to bounce buffer if write zeroes is unsupported */
+iov.iov_len = num * BDRV_SECTOR_SIZE;
+if (iov.iov_base == NULL) {
+/* allocate bounce buffer only once and ensure that it
+ * is big enough for this and all future requests.
+ */
+size_t bufsize = num = nb_sectors ? num : max_write_zeroes;
+iov.iov_base = qemu_blockalign(bs, bufsize * BDRV_SECTOR_SIZE);
+memset(iov.iov_base, 0, bufsize * BDRV_SECTOR_SIZE);
+}
+qemu_iovec_init_external(qiov, iov, 1);
 
-ret = drv-bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ret = drv-bdrv_co_writev(bs, sector_num, num, qiov);
+}
+
+sector_num += num;
+nb_sectors -= num;
+}
 
 qemu_vfree(iov.iov_base);
 return ret;
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 12/17] iscsi: simplify iscsi_co_discard

2013-10-24 Thread Peter Lieven
now that bdrv_co_discard can handle limits we do not need
the request split logic here anymore.

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/iscsi.c |   67 +
 1 file changed, 25 insertions(+), 42 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 16d8052..c0465aa 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -87,7 +87,6 @@ typedef struct IscsiAIOCB {
 #define NOP_INTERVAL 5000
 #define MAX_NOP_FAILURES 3
 #define ISCSI_CMD_RETRIES 5
-#define ISCSI_MAX_UNMAP 131072
 
 static void
 iscsi_bh_cb(void *p)
@@ -912,8 +911,6 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t 
sector_num,
 IscsiLun *iscsilun = bs-opaque;
 struct IscsiTask iTask;
 struct unmap_list list;
-uint32_t nb_blocks;
-uint32_t max_unmap;
 
 if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
 return -EINVAL;
@@ -925,52 +922,38 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, 
int64_t sector_num,
 }
 
 list.lba = sector_qemu2lun(sector_num, iscsilun);
-nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+list.num = sector_qemu2lun(nb_sectors, iscsilun);
 
-max_unmap = iscsilun-bl.max_unmap;
-if (max_unmap == 0x) {
-max_unmap = ISCSI_MAX_UNMAP;
-}
-
-while (nb_blocks  0) {
-iscsi_co_init_iscsitask(iscsilun, iTask);
-list.num = nb_blocks;
-if (list.num  max_unmap) {
-list.num = max_unmap;
-}
+iscsi_co_init_iscsitask(iscsilun, iTask);
 retry:
-if (iscsi_unmap_task(iscsilun-iscsi, iscsilun-lun, 0, 0, list, 1,
- iscsi_co_generic_cb, iTask) == NULL) {
-return -EIO;
-}
-
-while (!iTask.complete) {
-iscsi_set_events(iscsilun);
-qemu_coroutine_yield();
-}
+if (iscsi_unmap_task(iscsilun-iscsi, iscsilun-lun, 0, 0, list, 1,
+ iscsi_co_generic_cb, iTask) == NULL) {
+return -EIO;
+}
 
-if (iTask.task != NULL) {
-scsi_free_scsi_task(iTask.task);
-iTask.task = NULL;
-}
+while (!iTask.complete) {
+iscsi_set_events(iscsilun);
+qemu_coroutine_yield();
+}
 
-if (iTask.do_retry) {
-goto retry;
-}
+if (iTask.task != NULL) {
+scsi_free_scsi_task(iTask.task);
+iTask.task = NULL;
+}
 
-if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
-/* the target might fail with a check condition if it
-   is not happy with the alignment of the UNMAP request
-   we silently fail in this case */
-return 0;
-}
+if (iTask.do_retry) {
+goto retry;
+}
 
-if (iTask.status != SCSI_STATUS_GOOD) {
-return -EIO;
-}
+if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
+/* the target might fail with a check condition if it
+   is not happy with the alignment of the UNMAP request
+   we silently fail in this case */
+return 0;
+}
 
-list.lba += list.num;
-nb_blocks -= list.num;
+if (iTask.status != SCSI_STATUS_GOOD) {
+return -EIO;
 }
 
 return 0;
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 16/17] qemu-img: add support for fully allocated images

2013-10-24 Thread Peter Lieven
Signed-off-by: Peter Lieven p...@kamp.de
---
 qemu-img.c|   10 +++---
 qemu-img.texi |6 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 926f0a0..7f08364 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -100,8 +100,12 @@ static void help(void)
  '-h' with or without a command shows this help and lists the 
supported formats\n
  '-p' show progress of command (only certain commands)\n
  '-q' use Quiet mode - do not print any output (except errors)\n
- '-S' indicates the consecutive number of bytes that must contain 
only zeros\n
-  for qemu-img to create a sparse image during conversion\n
+ '-S' indicates the consecutive number of bytes (defaults to 4k) 
that must\n
+  contain only zeros for qemu-img to create a sparse image 
during\n
+  conversion. If the number of bytes is 0, the source will 
not be scanned for\n
+  unallocated or zero sectors, and the destination image will 
always be\n
+  fully allocated\n
+  images will always be fully allocated\n
  '--output' takes the format in which the output must be done 
(human or json)\n
  '-n' skips the target volume creation (useful if the volume is 
created\n
   prior to running qemu-img)\n
@@ -1465,7 +1469,7 @@ static int img_convert(int argc, char **argv)
 /* signal EOF to align */
 bdrv_write_compressed(out_bs, 0, NULL, 0);
 } else {
-int has_zero_init = bdrv_has_zero_init(out_bs);
+int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
 
 sector_num = 0; // total number of sectors converted so far
 nb_sectors = total_sectors - sector_num;
diff --git a/qemu-img.texi b/qemu-img.texi
index 768054e..da36975 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -193,6 +193,12 @@ Image conversion is also useful to get smaller image when 
using a
 growable format such as @code{qcow} or @code{cow}: the empty sectors
 are detected and suppressed from the destination image.
 
+@var{sparse_size} indicates the consecutive number of bytes (defaults to 4k)
+that must contain only zeros for qemu-img to create a sparse image during
+conversion. If @var{sparse_size} is 0, the source will not be scanned for
+unallocated or zero sectors, and the destination image will always be
+fully allocated.
+
 You can use the @var{backing_file} option to force the output image to be
 created as a copy on write image of the specified base image; the
 @var{backing_file} should have the same content as the input's base image,
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c |   37 -
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 0c0b0ac..b28dd42 100644
--- a/block.c
+++ b/block.c
@@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void 
*opaque)
 rwco-ret = bdrv_co_discard(rwco-bs, rwco-sector_num, rwco-nb_sectors);
 }
 
+/* if no limit is specified in the BlockLimits use a default
+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_DISCARD_DEFAULT 32768
+
 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
  int nb_sectors)
 {
@@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, 
int64_t sector_num,
 }
 
 if (bs-drv-bdrv_co_discard) {
-return bs-drv-bdrv_co_discard(bs, sector_num, nb_sectors);
+int max_discard = bs-bl.max_discard ?
+  bs-bl.max_discard : MAX_DISCARD_DEFAULT;
+
+while (nb_sectors  0) {
+int ret;
+int num = nb_sectors;
+
+/* align request */
+if (bs-bl.discard_alignment 
+num = bs-bl.discard_alignment 
+sector_num % bs-bl.discard_alignment) {
+if (num  bs-bl.discard_alignment) {
+num = bs-bl.discard_alignment;
+}
+num -= sector_num % bs-bl.discard_alignment;
+}
+
+/* limit request size */
+if (num  max_discard) {
+num = max_discard;
+}
+
+ret = bs-drv-bdrv_co_discard(bs, sector_num, num);
+if (ret) {
+return ret;
+}
+
+sector_num += num;
+nb_sectors -= num;
+}
+return 0;
 } else if (bs-drv-bdrv_aio_discard) {
 BlockDriverAIOCB *acb;
 CoroutineIOCompletion co = {
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 15/17] block/get_block_status: fix BDRV_BLOCK_ZERO for unallocated blocks

2013-10-24 Thread Peter Lieven
this patch does 2 things:
a) only do additional call outs if BDRV_BLOCK_ZERO is not already set.
b) use the newly introduced bdrv_unallocated_blocks_are_zero()
   to return the zero state of an unallocated block. the used callout
   to bdrv_has_zero_init() is only valid right after bdrv_create.

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 21a992a..69a2d2b 100644
--- a/block.c
+++ b/block.c
@@ -3263,8 +3263,8 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
  *pnum, pnum);
 }
 
-if (!(ret  BDRV_BLOCK_DATA)) {
-if (bdrv_has_zero_init(bs)) {
+if (!(ret  BDRV_BLOCK_DATA)  !(ret  BDRV_BLOCK_ZERO)) {
+if (bdrv_unallocated_blocks_are_zero(bs)) {
 ret |= BDRV_BLOCK_ZERO;
 } else if (bs-backing_hd) {
 BlockDriverState *bs2 = bs-backing_hd;
-- 
1.7.9.5




Re: [Qemu-devel] [PATCH] MAINTAINERS: add block driver sub-maintainers

2013-10-24 Thread Peter Lieven

On 23.10.2013 08:23, Paolo Bonzini wrote:

Il 21/10/2013 14:26, Stefan Hajnoczi ha scritto:

+iSCSI
+M: Ronnie Sahlberg ronniesahlb...@gmail.com
+M: Paolo Bonzini pbonz...@redhat.com
+S: Supported
+F: block/iscsi.c

As I have worked a lot on the iSCSI driver recently I would like
to receive patches as well if possible.

Peter



[Qemu-devel] [PATCHv7 17/17] qemu-img: conditionally zero out target on convert

2013-10-24 Thread Peter Lieven
If the target has_zero_init = 0, but supports efficiently
writing zeroes by unmapping we call bdrv_make_zero to
avoid fully allocating the target. This currently works
only for iscsi.  It can be extended to raw with
BLKDISCARDZEROES for example.

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 qemu-img.c |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/qemu-img.c b/qemu-img.c
index 7f08364..bec6da3 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1355,7 +1355,7 @@ static int img_convert(int argc, char **argv)
 }
 }
 
-flags = BDRV_O_RDWR;
+flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
 ret = bdrv_parse_cache_flags(cache, flags);
 if (ret  0) {
 error_report(Invalid cache option: %s, cache);
@@ -1471,6 +1471,14 @@ static int img_convert(int argc, char **argv)
 } else {
 int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
 
+if (!has_zero_init  bdrv_can_write_zeroes_with_unmap(out_bs)) {
+ret = bdrv_make_zero(out_bs, BDRV_REQ_MAY_UNMAP);
+if (ret  0) {
+goto out;
+}
+has_zero_init = 1;
+}
+
 sector_num = 0; // total number of sectors converted so far
 nb_sectors = total_sectors - sector_num;
 if (nb_sectors != 0) {
-- 
1.7.9.5




Re: [Qemu-devel] [PATCH v2] block: support dropping active in bdrv_drop_intermediate

2013-10-24 Thread Jeff Cody
On Tue, Oct 15, 2013 at 03:25:00PM +0800, Fam Zheng wrote:
 There is only one failure point: bdrv_change_backing_file in this
 function, so we can drop the qlist and try to change the backing file
 before deleting anything.
 
 This way bdrv_drop_intermediate is simplified while keeping the
 operation transactional. A bonus is dropping an active BDS is supported
 too by swapping the base and top. Although no caller uses this yet, the
 comment is updated to reflect the change.
 
 Signed-off-by: Fam Zheng f...@redhat.com
 
 ---
 v2: check for active, top and base being in a backing chain. (Jeff)

This does check for that, but it doesn't catch all errors.

It will verify:

[base] - [active]

And verifies:

[top] - [active]   (when active is != top)

However, it does not verify that the following is true:

[base] - [top]

(e.g., it will pass on [top] - [base] - [active])

Rather than add another call to bdrv_find_overlay to verify the last
case, would just adding the bdrv_swap() and a check for active ==
top to the existing function do what you need for the active layer
support?


 Signed-off-by: Fam Zheng f...@redhat.com
 ---
  block.c| 103 
 -
  block/commit.c |   1 +
  2 files changed, 37 insertions(+), 67 deletions(-)
 
 diff --git a/block.c b/block.c
 index fd05a80..9ead554 100644
 --- a/block.c
 +++ b/block.c
 @@ -2130,18 +2130,11 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState 
 *active,
  return overlay;
  }
  
 -typedef struct BlkIntermediateStates {
 -BlockDriverState *bs;
 -QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
 -} BlkIntermediateStates;
 -
 -
  /*
 - * Drops images above 'base' up to and including 'top', and sets the image
 - * above 'top' to have base as its backing file.
 - *
 - * Requires that the overlay to 'top' is opened r/w, so that the backing file
 - * information in 'bs' can be properly updated.
 + * Drops images above 'base' up to and including 'top', and sets new 'base'
 + * as backing_hd of top_overlay (the image orignally has 'top' as backing
 + * file). top_overlay may be NULL if 'top' is active, no such update needed.
 + * Requires that the top_overlay to 'top' is opened r/w.
   *
   * E.g., this will convert the following chain:
   * bottom - base - intermediate - top - active
 @@ -2158,86 +2151,62 @@ typedef struct BlkIntermediateStates {
   *
   * base - active
   *
 - * Error conditions:
 - *  if active == top, that is considered an error
 + * It also allows active==top, in which case it converts:
 + *
 + * base - intermediate - active (also top)
 + *
 + * to
 + *
 + * base == active == top, i.e. only base remains: *top == *base when return.
   *
   */
  int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
 BlockDriverState *base)
  {
 -BlockDriverState *intermediate;
 +BlockDriverState *pbs;
 +BlockDriverState *overlay = NULL;
  BlockDriverState *base_bs = NULL;
 -BlockDriverState *new_top_bs = NULL;
 -BlkIntermediateStates *intermediate_state, *next;
 -int ret = -EIO;
 -
 -QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
 -QSIMPLEQ_INIT(states_to_delete);
 +int ret = -EINVAL;
  
  if (!top-drv || !base-drv) {
  goto exit;
  }
  
 -new_top_bs = bdrv_find_overlay(active, top);
 -
 -if (new_top_bs == NULL) {
 -/* we could not find the image above 'top', this is an error */
 -goto exit;
 -}
 -
 -/* special case of new_top_bs-backing_hd already pointing to base - 
 nothing
 - * to do, no intermediate images */
 -if (new_top_bs-backing_hd == base) {
 -ret = 0;
 +if (!bdrv_find_overlay(active, base)) {
  goto exit;
  }
  
 -intermediate = top;
 -
 -/* now we will go down through the list, and add each BDS we find
 - * into our deletion queue, until we hit the 'base'
 - */
 -while (intermediate) {
 -intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
 -intermediate_state-bs = intermediate;
 -QSIMPLEQ_INSERT_TAIL(states_to_delete, intermediate_state, entry);
 -
 -if (intermediate-backing_hd == base) {
 -base_bs = intermediate-backing_hd;
 -break;
 +if (active != top) {
 +/* If there's an overlay, its backing_hd points to top's BDS now,
 + * the top image is dropped but this BDS structure is kept and 
 swapped
 + * with base, this way we keep the pointers valid after dropping top 
 */
 +overlay = bdrv_find_overlay(active, top);
 +if (!overlay) {
 +goto exit;
 +}
 +ret = bdrv_change_backing_file(overlay, base-filename,
 +   base-drv ?
 +base-drv-format_name : );
 +if (ret) {
 +goto exit;
  }
 -intermediate = intermediate-backing_hd;
 -   

[Qemu-devel] [PATCHv7 01/17] block: make BdrvRequestFlags public

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block.c   |5 -
 include/block/block.h |5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block.c b/block.c
index fd05a80..eb11a07 100644
--- a/block.c
+++ b/block.c
@@ -51,11 +51,6 @@
 
 #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
 
-typedef enum {
-BDRV_REQ_COPY_ON_READ = 0x1,
-BDRV_REQ_ZERO_WRITE   = 0x2,
-} BdrvRequestFlags;
-
 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
diff --git a/include/block/block.h b/include/block/block.h
index 3560deb..ba2082c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -62,6 +62,11 @@ typedef struct BlockDevOps {
 void (*resize_cb)(void *opaque);
 } BlockDevOps;
 
+typedef enum {
+BDRV_REQ_COPY_ON_READ = 0x1,
+BDRV_REQ_ZERO_WRITE   = 0x2,
+} BdrvRequestFlags;
+
 #define BDRV_O_RDWR0x0002
 #define BDRV_O_SNAPSHOT0x0008 /* open the file read only and save writes 
in a snapshot */
 #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
-- 
1.7.9.5




Re: [Qemu-devel] [PATCH v2] configure: create fsdev/ directory

2013-10-24 Thread Paolo Bonzini
Il 21/10/2013 09:35, Michael Tokarev ha scritto:
 In some cases when building with parallelism (make -jN),
 build fails because the directory where output files are
 supposed to be does not exist.  In particular, when make
 decides to build virtfs-proxy-helper.1 before other files
 in fsdev/, build will fail with the following error:
 
 perl -Ww -- BUILDDIR/scripts/texi2pod.pl 
 BUILDDIR/fsdev/virtfs-proxy-helper.texi fsdev/virtfs-proxy-helper.pod  
 pod2man --utf8 --section=1 --center=  --release=  
 fsdev/virtfs-proxy-helper.pod  fsdev/virtfs-proxy-helper.1
 opening fsdev/virtfs-proxy-helper.pod: No such file or directory
 
 Signed-off-by: Michael Tokarev m...@tls.msk.ru
 ---
  configure |1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/configure b/configure
 index 57ee62a..61bb27c 100755
 --- a/configure
 +++ b/configure
 @@ -4647,6 +4647,7 @@ fi
  
  # build tree in object directory in case the source is not in the current 
 directory
  DIRS=tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos 
 tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests
 +DIRS=$DIRS fsdev
  DIRS=$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw
  DIRS=$DIRS roms/seabios roms/vgabios
  DIRS=$DIRS qapi-generated
 

Acked-by: Paolo Bonzini pbonz...@redhat.com

Please queue it through trivial.

Paolo



[Qemu-devel] [PATCHv7 08/17] block/raw: copy BlockLimits on raw_open

2013-10-24 Thread Peter Lieven
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/raw_bsd.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index b0dd23f..49ac18c 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -150,6 +150,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 Error **errp)
 {
 bs-sg = bs-file-sg;
+bs-bl = bs-file-bl;
 return 0;
 }
 
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 04/17] block: add logical block provisioning info to BlockDriverInfo

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 include/block/block.h |   16 
 1 file changed, 16 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index 1f30a56..9c76967 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -18,6 +18,22 @@ typedef struct BlockDriverInfo {
 /* offset at which the VM state can be saved (0 if not possible) */
 int64_t vm_state_offset;
 bool is_dirty;
+/*
+ * True if unallocated blocks read back as zeroes. This is equivalent
+ * to the the LBPRZ flag in the SCSI logical block provisioning page.
+ */
+bool unallocated_blocks_are_zero;
+/*
+ * True if the driver can optimize writing zeroes by unmapping
+ * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux
+ * with the difference that in qemu a discard is allowed to silently
+ * fail. Therefore we have to use bdrv_write_zeroes with the
+ * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping.
+ * After this call the driver has to guarantee that the contents read
+ * back as zero. It is additionally required that the block device is
+ * opened with BDRV_O_UNMAP flag for this to work.
+ */
+bool can_write_zeroes_with_unmap;
 } BlockDriverInfo;
 
 typedef struct BlockFragInfo {
-- 
1.7.9.5




[Qemu-devel] [PATCHv7 03/17] block: introduce BDRV_REQ_MAY_UNMAP request flag

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block-migration.c |3 ++-
 block.c   |4 
 block/backup.c|2 +-
 include/block/block.h |7 +++
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 713a8e3..fc4ef93 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -780,7 +780,8 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 }
 
 if (flags  BLK_MIG_FLAG_ZERO_BLOCK) {
-ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0);
+ret = bdrv_write_zeroes(bs, addr, nr_sectors,
+BDRV_REQ_MAY_UNMAP);
 } else {
 buf = g_malloc(BLOCK_SIZE);
 qemu_get_buffer(f, buf, BLOCK_SIZE);
diff --git a/block.c b/block.c
index 3259429..0d97ce6 100644
--- a/block.c
+++ b/block.c
@@ -2810,6 +2810,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState 
*bs,
 {
 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
 
+if (!(bs-open_flags  BDRV_O_UNMAP)) {
+flags = ~BDRV_REQ_MAY_UNMAP;
+}
+
 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
  BDRV_REQ_ZERO_WRITE | flags);
 }
diff --git a/block/backup.c b/block/backup.c
index 830a179..0198514 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -139,7 +139,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
 ret = bdrv_co_write_zeroes(job-target,
start * BACKUP_SECTORS_PER_CLUSTER,
-   n, 0);
+   n, BDRV_REQ_MAY_UNMAP);
 } else {
 ret = bdrv_co_writev(job-target,
  start * BACKUP_SECTORS_PER_CLUSTER, n,
diff --git a/include/block/block.h b/include/block/block.h
index 8ba9f0c..1f30a56 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -65,6 +65,13 @@ typedef struct BlockDevOps {
 typedef enum {
 BDRV_REQ_COPY_ON_READ = 0x1,
 BDRV_REQ_ZERO_WRITE   = 0x2,
+/* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver
+ * is allowed to optimize a write zeroes request by unmapping (discarding)
+ * blocks if it is guaranteed that the result will read back as
+ * zeroes. The flag is only passed to the driver if the block device is
+ * opened with BDRV_O_UNMAP.
+ */
+BDRV_REQ_MAY_UNMAP= 0x4,
 } BdrvRequestFlags;
 
 #define BDRV_O_RDWR0x0002
-- 
1.7.9.5




Re: [Qemu-devel] qemu 1.6.1

2013-10-24 Thread Paolo Bonzini
Il 23/10/2013 21:26, Stefan Weil ha scritto:
 Am 23.10.2013 11:00, schrieb Paolo Bonzini:
 Il 23/10/2013 08:39, Michael W. Bombardieri ha scritto:
 Hi,

 My newly built qemu/win32 binary (v1.6.1) crashes in qemu-system-i386 and 
 qemu-system-x86_64 when
 booting from an install CD.

 C:\Program Files\qemuqemu-system-x86_64 -boot d -vnc 0.0.0.0:20 -cdrom 
 NetBSD-6.1.2-amd64.iso
 Assertion failed: qemu_in_coroutine(), file qemu-coroutine-lock.c, line 
 99

 This application has requested the Runtime to terminate it in an 
 unusual way.
 Please contact the application's support team for more information.

 I noticed that qemu-system-sparc still booted OpenBSD/sparc 5.3 install CD 
 correctly.
 No further info at this stage.
 Any ideas?
 It's a known problem that not everyone can reproduce.  Please compile
 with --disable-coroutine-pool on the configure command line.

 Paolo
 
 This patch also helps (at least for me, tested native and on Linux / Wine):
 http://repo.or.cz/w/qemu/ar7.git/commit/c777d5d62a729fd8b19847aaa0aad3d7a1f73f47
 
 It looks like a compiler problem related to thread local storage
 (variable current).

Ugh.

 I recently got several bug reports from a Windows user and included
 patches to fix them in
 my personal tree http://repo.or.cz/w/qemu/ar7.git. The binaries on
 qemu.weilnetz.de
 are based on that tree.

Does something like

 CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, current);

also work?  Then we can just remove from_.

Paolo



[Qemu-devel] [PATCHv7 02/17] block: add flags to bdrv_*_write_zeroes

2013-10-24 Thread Peter Lieven
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
 block-migration.c |2 +-
 block.c   |   20 +++-
 block/backup.c|3 ++-
 block/qcow2-cluster.c |2 +-
 block/qcow2.c |2 +-
 block/qed.c   |3 ++-
 block/raw_bsd.c   |5 +++--
 block/vmdk.c  |3 ++-
 include/block/block.h |4 ++--
 include/block/block_int.h |2 +-
 qemu-io-cmds.c|2 +-
 11 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index daf9ec1..713a8e3 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -780,7 +780,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 }
 
 if (flags  BLK_MIG_FLAG_ZERO_BLOCK) {
-ret = bdrv_write_zeroes(bs, addr, nr_sectors);
+ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0);
 } else {
 buf = g_malloc(BLOCK_SIZE);
 qemu_get_buffer(f, buf, BLOCK_SIZE);
diff --git a/block.c b/block.c
index eb11a07..3259429 100644
--- a/block.c
+++ b/block.c
@@ -79,7 +79,7 @@ static BlockDriverAIOCB 
*bdrv_co_aio_rw_vector(BlockDriverState *bs,
bool is_write);
 static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors);
+int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
 
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
 QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -2384,10 +2384,11 @@ int bdrv_writev(BlockDriverState *bs, int64_t 
sector_num, QEMUIOVector *qiov)
 return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
 }
 
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+  int nb_sectors, BdrvRequestFlags flags)
 {
 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
-  BDRV_REQ_ZERO_WRITE);
+  BDRV_REQ_ZERO_WRITE | flags);
 }
 
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
@@ -2569,7 +2570,7 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BlockDriverState *bs,
 if (drv-bdrv_co_write_zeroes 
 buffer_is_zero(bounce_buffer, iov.iov_len)) {
 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
-  cluster_nb_sectors);
+  cluster_nb_sectors, 0);
 } else {
 /* This does not change the data on the disk, it is not necessary
  * to flush even in cache=writethrough mode.
@@ -2703,7 +2704,7 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState 
*bs,
 }
 
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors)
+int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
 {
 BlockDriver *drv = bs-drv;
 QEMUIOVector qiov;
@@ -2715,7 +2716,7 @@ static int coroutine_fn 
bdrv_co_do_write_zeroes(BlockDriverState *bs,
 
 /* First try the efficient write zeroes operation */
 if (drv-bdrv_co_write_zeroes) {
-ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ret = drv-bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
 if (ret != -ENOTSUP) {
 return ret;
 }
@@ -2770,7 +2771,7 @@ static int coroutine_fn 
bdrv_co_do_writev(BlockDriverState *bs,
 if (ret  0) {
 /* Do nothing, write notifier decided to fail this request */
 } else if (flags  BDRV_REQ_ZERO_WRITE) {
-ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
 } else {
 ret = drv-bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
 }
@@ -2804,12 +2805,13 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, 
int64_t sector_num,
 }
 
 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
-  int64_t sector_num, int nb_sectors)
+  int64_t sector_num, int nb_sectors,
+  BdrvRequestFlags flags)
 {
 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
 
 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE);
+ BDRV_REQ_ZERO_WRITE | flags);
 }
 
 /**
diff --git a/block/backup.c b/block/backup.c
index cad14c9..830a179 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -138,7 +138,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 
 if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
 ret = bdrv_co_write_zeroes(job-target,
-   start * BACKUP_SECTORS_PER_CLUSTER, n);
+   

Re: [Qemu-devel] [PATCH v3 0/4] Curling: KVM Fault Tolerance

2013-10-24 Thread Stefan Hajnoczi
On Wed, Oct 23, 2013 at 1:08 AM, Jules junqing.w...@cs2c.com.cn wrote:

 On Tue, Oct 15, 2013 at 03:26:19PM +0800, Jules Wang wrote:
  v2 - v3:
  * add documentation of new option in qapi-schema.
 
  * long option name: ft - fault-tolerant
 
  v1 - v2:
  * cmdline: migrate curling:tcp:address:port
 -  migrate -f tcp:address:port
 
  * sender: use QEMU_VM_FILE_MAGIC_FT as the header of the migration
to indicate this is a ft migration.
 
  * receiver: look for the signature:
  QEMU_VM_EOF_MAGIC + QEMU_VM_FILE_MAGIC_FT(64bit total)
  which indicates the end of one migration.
  --
  Jules Wang (4):
Curling: add doc
Curling: cmdline interface.
Curling: the sender
Curling: the receiver


 First of all, thanks for your superb and spot-on comments.

 It would be helpful to clarify the status of Curling in the cover letter
 email so reviewers know what to expect.

 OK, but I'm not quite clear about how to clarify the status, would you
 pls give me an example?

That status would be an explanation of what is current included in the
patch, which functionality already works, and what you still plan to
implement before the series can be merged.

 This series does not address I/O or failover.  I guess you are aware of
 the missing topics that I mentioned, here are my thoughts on them:

 I/O needs to be held back until the destination host has acknowledged
 receiving the last full migration state.  The outside world cannot
 witness state changes in the guest until the migration state has been
 successfully transferred to the destination host.  Otherwise the guest
 may appear to act incorrectly when resuming execution from the last
 snapshot.

 The time period used by the FT sender thread determines how much latency
 is added to I/O requests.

 Yes, there is the latency. That is inevitable.

 I guess you mean the following situation:
 If a msg 'hello' is sent to the chat room server just a few seconds
 before the failover happens, there is a possibility that the msg will be
 sent to the others twice or be lost.

 Am I right?

Yes, and this is a fundamental requirement for FT.

I/O is not idempotent.  This means it is not possible to repeat the
same operation twice and get the same result.

Other fault tolerance solutions include a mechanism to hold back I/O
until the checkpoint has been committed by the other host.  This way
no I/O is repeated and applications will not break during failover.

For example, imagine a compare and swap operation.  If the VM sends
out a compare and swap command to a remote server and fails, then
your current patches may send the command again on the other host.
The problem is that the command will not succeed the second time and
therefore the application fails with an error.


 Failover functionality is missing from these patches.  We cannot simply
 start executing on the destination host when the migration connection
 ends.  If the guest disk image is located on shared storage then
 split-brain occurs when a network error terminates the migration
 connection -

 will both hosts begin accessing the shared disk?
 YES


 I have a simple way to handle that. In one word, the third point
 --gateway.

 Both the sender and the receiver check the connectivity to the gateway
 every X seconds. Let's use A and B stand for whether the sender and the
 receiver are connected to the gateway respectively.

 When the connection between the sender and the receiver is down.
 A  B is false.

 If A is false, the vm instance at the sender will be stopped.
 If B is false, the vm instance at the receiver will not be started.

 a.A false  B false: 0 vm run
 b.A false  B true: 1 vm run
 c.A true   B false: 1 vm run
 d.A true   B true : 1 vm run (normal case)

 It becomes complicated when we consider the state transitions in
 these four states.

 I suggest adding this feature to libvirt instead of qemu.

I agree that the details of the failover (aka quorum and fencing)
should be implemented as policies outside QEMU, if possible.

Also, there were two presentations about fault tolerance at KVM Forum
2013 a few days ago:
https://docs.google.com/file/d/0BzyAwvVlQckebVBrNXdlaTdWVUk/edit
https://docs.google.com/file/d/0BzyAwvVlQckeczNUZHRod28yVXc/edit

Stefan



Re: [Qemu-devel] [PATCH resend] sdl: Reverse support for video mode setting

2013-10-24 Thread Lei Li

This patch has been confirmed by the reporter himself as
link below,

https://bugs.launchpad.net/qemu/+bug/1216368

It has been on the mailing list for a while, could it be merged? PING...

On 09/04/2013 05:07 PM, Lei Li wrote:

Currently, If the setting of video mode failed, qemu will exit. It
should go back to the previous setting if the new screen resolution
failed. This patch fixes LP#1216368, add support to revert to existing
surface for the failure of video mode setting.

Reported-by: Sascha Krissler sas...@srlabs.de
Signed-off-by: Lei Li li...@linux.vnet.ibm.com
---
  ui/sdl.c |   23 +++
  1 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/ui/sdl.c b/ui/sdl.c
index 39a42d6..9d8583c 100644
--- a/ui/sdl.c
+++ b/ui/sdl.c
@@ -86,6 +86,7 @@ static void sdl_update(DisplayChangeListener *dcl,
  static void do_sdl_resize(int width, int height, int bpp)
  {
  int flags;
+SDL_Surface *tmp_screen;

  //printf(resizing to %d %d\n, w, h);

@@ -98,12 +99,26 @@ static void do_sdl_resize(int width, int height, int bpp)
  if (gui_noframe)
  flags |= SDL_NOFRAME;

-real_screen = SDL_SetVideoMode(width, height, bpp, flags);
+tmp_screen = SDL_SetVideoMode(width, height, bpp, flags);
  if (!real_screen) {
-   fprintf(stderr, Could not open SDL display (%dx%dx%d): %s\n, width,
-   height, bpp, SDL_GetError());
-exit(1);
+if (!tmp_screen) {
+fprintf(stderr, Could not open SDL display (%dx%dx%d): %s\n,
+width, height, bpp, SDL_GetError());
+exit(1);
+}
+} else {
+/*
+ * Revert to the previous video mode if the change of resizing or
+ * resolution failed.
+ */
+if (!tmp_screen) {
+fprintf(stderr, Failed to set SDL display (%dx%dx%d): %s\n,
+width, height, bpp, SDL_GetError());
+return;
+}
  }
+
+real_screen = tmp_screen;
  }

  static void sdl_switch(DisplayChangeListener *dcl,



--
Lei




Re: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply _EJ0 methods for PCIslots that support hotplug by runtime patching

2013-10-24 Thread Fabio Fantoni

Il 24/10/2013 14:17, Gonglei (Arei) ha scritto:

-Original Message-
From: Jan Beulich [mailto:jbeul...@suse.com]
Sent: Tuesday, October 22, 2013 4:06 PM
To: Gonglei (Arei)
Cc: anthony.per...@citrix.com; Ian Campbell; Stefano Stabellini; Gaowei (UVP);
Hanweidong (Randy); Huangweidong (Hardware); Luonengjun; Yanqiangjun;
xen-de...@lists.xen.org; Fabio Fantoni; qemu-devel@nongnu.org; Markus
Armbruster
Subject: RE: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply
_EJ0 methods for PCIslots that support hotplug by runtime patching


On 22.10.13 at 06:08, Gonglei (Arei) arei.gong...@huawei.com wrote:

Hi, guys. The new patch has been modified based on the principles you
suggested, thank you so much.
Last time I test the patch based on the codes of 4.3.0.
This time, I found that the system based on the codes of trunk causes the VM
reboot again and again, which I have not found out the reason.
So i can not test the patch based on the codes of trunk (details in
EJ0_ACPI_PCI_Hotplug.patch)..

I'm afraid we will need you to figure out that problem first, and
then do the verification on -unstable. Even if the code shouldn't
be that different from 4.3, we still don't want to apply completely
untested stuff.

Hi, Jan. We found that the reason that we used a wrong seabios PATH, and the 
hvmloader can't load the bios.bin.
So the VM restart again and again after we start it. That's our fault.

Now I test the patch based on the codes of trunk, which works well.
The patch has been modified after your suggestion.
The patch works well with upstream qemu and doesn't affect the system with 
traditional qemu.




--- a/tools/firmware/hvmloader/ovmf.c
+++ b/tools/firmware/hvmloader/ovmf.c
@@ -79,7 +79,11 @@ static void ovmf_acpi_build_tables(void)
  .dsdt_anycpu = dsdt_anycpu,
  .dsdt_anycpu_len = dsdt_anycpu_len,
  .dsdt_15cpu = NULL,
-.dsdt_15cpu_len = 0
+.dsdt_15cpu_len = 0,
+.aml_ej0_name = NULL,
+.aml_adr_dword = NULL,
+.aml_ej0_name_len = 0,
+.aml_adr_dword_len = 0,

I don't see why you're adding these.


Insurance purposes is that just initialize the struct.

Signed-off-by: Gaowei gao.gao...@huawei.com
Signed-off-by: gonglei arei.gong...@huawei.com


Tested-by: Fabio Fantoni fabio.fant...@m2r.biz

Tested on xen unstable with qemu 1.6.1, no problem found for now.
Only one question: this patch remove hotplug only from essentials pci 
device, right?
On windows 7 hotplug continues to show: virtio-serial driver, xen pci 
device driver and hd audio.


Thanks for any reply.


---
  tools/firmware/hvmloader/acpi/Makefile |  37 ++-
  tools/firmware/hvmloader/acpi/acpi2_0.h|   4 +
  tools/firmware/hvmloader/acpi/build.c  |  21 +-
  tools/firmware/hvmloader/acpi/dsdt.asl |   1 +
  tools/firmware/hvmloader/acpi/mk_dsdt.c|   2 +
  tools/firmware/hvmloader/ovmf.c|   6 +-
  tools/firmware/hvmloader/rombios.c |   4 +
  tools/firmware/hvmloader/seabios.c |   8 +
  tools/firmware/hvmloader/tools/acpi_extract.py | 308 +
  .../hvmloader/tools/acpi_extract_preprocess.py |  41 +++
  10 files changed, 419 insertions(+), 13 deletions(-)
  create mode 100644 tools/firmware/hvmloader/tools/acpi_extract.py
  create mode 100644 tools/firmware/hvmloader/tools/acpi_extract_preprocess.py

diff --git a/tools/firmware/hvmloader/acpi/Makefile 
b/tools/firmware/hvmloader/acpi/Makefile
index 2c50851..b96e058 100644
--- a/tools/firmware/hvmloader/acpi/Makefile
+++ b/tools/firmware/hvmloader/acpi/Makefile
@@ -24,30 +24,45 @@ OBJS  = $(patsubst %.c,%.o,$(C_SRC))
  CFLAGS += $(CFLAGS_xeninclude)
  
  vpath iasl $(PATH)

+
+.DELETE_ON_ERROR: $(filter dsdt_%.c,$(C_SRC))
+
  all: acpi.a
  
  ssdt_s3.h ssdt_s4.h ssdt_pm.h ssdt_tpm.h: %.h: %.asl iasl

iasl -vs -p $* -tc $
-   sed -e 's/AmlCode/$*/g' $*.hex $@
+   sed -e 's/AmlCode/$*/g' $*.hex  $@.tmp
+   $(call move-if-changed,$@.tmp $@)
rm -f $*.hex $*.aml
  
  mk_dsdt: mk_dsdt.c

$(HOSTCC) $(HOSTCFLAGS) $(CFLAGS_xeninclude) -o $@ mk_dsdt.c
  
  dsdt_anycpu_qemu_xen.asl: dsdt.asl mk_dsdt

-   awk 'NR  1 {print s} {s=$$0}' $  $@
-   ./mk_dsdt --dm-version qemu-xen  $@
+   awk 'NR  1 {print s} {s=$$0}' $  $@.tmp
+   sed -i 's/AmlCode/dsdt_anycpu_qemu_xen/g' $@.tmp
+   ./mk_dsdt --dm-version qemu-xen  $@.tmp
+   sed -i 's/aml_ej0_name/dsdt_anycpu_qemu_xen_aml_ej0_name/g' $@.tmp
+   sed -i 's/aml_adr_dword/dsdt_anycpu_qemu_xen_aml_adr_dword/g' $@.tmp
+   $(call move-if-changed,$@.tmp $@)
  
  # NB. awk invocation is a portable alternative to 'head -n -1'

  dsdt_%cpu.asl: dsdt.asl mk_dsdt
-   awk 'NR  1 {print s} {s=$$0}' $  $@
-   ./mk_dsdt --maxcpu $*   $@
+   awk 'NR  1 {print s} {s=$$0}' $  $@.tmp
+   sed -i 's/AmlCode/dsdt_$*cpu/g' $@.tmp
+   ./mk_dsdt --maxcpu $*   $@.tmp
+   $(call 

[Qemu-devel] [PULL for-1.7 0/7] usb fixes

2013-10-24 Thread Gerd Hoffmann
  Hi,

Here comes a collection of bugfixes for xhci and usb-host,
mostly related to usb3 streams.

please pull,
  Gerd

The following changes since commit fc8ead74674b7129e8f31c2595c76658e5622197:

  Merge remote-tracking branch 'qemu-kvm/uq/master' into staging (2013-10-18 
10:03:24 -0700)

are available in the git repository at:


  git://git.kraxel.org/qemu usb.91

for you to fetch changes up to c90daa1c109348099088c1cc954c1e9f3392ae03:

  usb-hcd-xhci: Update endpoint context dequeue pointer for streams too 
(2013-10-22 16:28:49 +0200)


Hans de Goede (7):
  usb-host-libusb: Fix reset handling
  usb-host-libusb: Configuration 0 may be a valid configuration
  usb-host-libusb: Detach kernel drivers earlier
  usb-hcd-xhci: Remove unused sstreamsm member from XHCIStreamContext
  usb-hcd-xhci: Remove unused cancelled member from XHCITransfer
  usb-hcd-xhci: Report completion of active transfer with CC_STOPPED on ep 
stop
  usb-hcd-xhci: Update endpoint context dequeue pointer for streams too

 hw/usb/hcd-xhci.c| 50 ++
 hw/usb/host-libusb.c | 26 +-
 2 files changed, 39 insertions(+), 37 deletions(-)



[Qemu-devel] [PATCH 7/7] usb-hcd-xhci: Update endpoint context dequeue pointer for streams too

2013-10-24 Thread Gerd Hoffmann
From: Hans de Goede hdego...@redhat.com

With streams the endpoint context dequeue pointer should point to the
dequeue value for the currently active stream.

At least Linux guests expect it to point to value set by an set_ep_dequeue
upon completion of the set_ep_dequeue (before kicking the ep).

Otherwise the Linux kernel will complain (and things won't work):

xhci_hcd :00:05.0: Mismatch between completed Set TR Deq Ptr command  xHCI 
internal state.
xhci_hcd :00:05.0: ep deq seg = 8800366f0880, deq ptr = 8800366ec010

Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/usb/hcd-xhci.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 0131151..fa27299 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1187,6 +1187,7 @@ static XHCIStreamContext *xhci_find_stream(XHCIEPContext 
*epctx,
 static void xhci_set_ep_state(XHCIState *xhci, XHCIEPContext *epctx,
   XHCIStreamContext *sctx, uint32_t state)
 {
+XHCIRing *ring = NULL;
 uint32_t ctx[5];
 uint32_t ctx2[2];
 
@@ -1197,6 +1198,7 @@ static void xhci_set_ep_state(XHCIState *xhci, 
XHCIEPContext *epctx,
 /* update ring dequeue ptr */
 if (epctx-nr_pstreams) {
 if (sctx != NULL) {
+ring = sctx-ring;
 xhci_dma_read_u32s(xhci, sctx-pctx, ctx2, sizeof(ctx2));
 ctx2[0] = 0xe;
 ctx2[0] |= sctx-ring.dequeue | sctx-ring.ccs;
@@ -1204,8 +1206,12 @@ static void xhci_set_ep_state(XHCIState *xhci, 
XHCIEPContext *epctx,
 xhci_dma_write_u32s(xhci, sctx-pctx, ctx2, sizeof(ctx2));
 }
 } else {
-ctx[2] = epctx-ring.dequeue | epctx-ring.ccs;
-ctx[3] = (epctx-ring.dequeue  16)  16;
+ring = epctx-ring;
+}
+if (ring) {
+ctx[2] = ring-dequeue | ring-ccs;
+ctx[3] = (ring-dequeue  16)  16;
+
 DPRINTF(xhci: set epctx:  DMA_ADDR_FMT  state=%d 
dequeue=%08x%08x\n,
 epctx-pctx, state, ctx[3], ctx[2]);
 }
-- 
1.8.3.1




[Qemu-devel] [PATCH 3/7] usb-host-libusb: Detach kernel drivers earlier

2013-10-24 Thread Gerd Hoffmann
From: Hans de Goede hdego...@redhat.com

If we detach the kernel drivers on the first set_config, then they will
be still attached when the device gets its initial reset. Causing the drivers
to re-initialize the device after the reset, dirtying the device state.

Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/usb/host-libusb.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 35bae55..fd320cd 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -137,6 +137,7 @@ static QTAILQ_HEAD(, USBHostDevice) hostdevs =
 static void usb_host_auto_check(void *unused);
 static void usb_host_release_interfaces(USBHostDevice *s);
 static void usb_host_nodev(USBHostDevice *s);
+static void usb_host_detach_kernel(USBHostDevice *s);
 static void usb_host_attach_kernel(USBHostDevice *s);
 
 /*  */
@@ -787,10 +788,13 @@ static int usb_host_open(USBHostDevice *s, libusb_device 
*dev)
 goto fail;
 }
 
-libusb_get_device_descriptor(dev, s-ddesc);
 s-dev = dev;
 s-bus_num = bus_num;
 s-addr= addr;
+
+usb_host_detach_kernel(s);
+
+libusb_get_device_descriptor(dev, s-ddesc);
 usb_host_get_port(s-dev, s-port, sizeof(s-port));
 
 usb_ep_init(udev);
@@ -1051,7 +1055,6 @@ static void usb_host_set_config(USBHostDevice *s, int 
config, USBPacket *p)
 trace_usb_host_set_config(s-bus_num, s-addr, config);
 
 usb_host_release_interfaces(s);
-usb_host_detach_kernel(s);
 rc = libusb_set_configuration(s-dh, config);
 if (rc != 0) {
 usb_host_libusb_error(libusb_set_configuration, rc);
-- 
1.8.3.1




[Qemu-devel] [PATCH 4/7] usb-hcd-xhci: Remove unused sstreamsm member from XHCIStreamContext

2013-10-24 Thread Gerd Hoffmann
From: Hans de Goede hdego...@redhat.com

Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/usb/hcd-xhci.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 469c24d..e078c50 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -374,7 +374,6 @@ struct XHCIStreamContext {
 dma_addr_t pctx;
 unsigned int sct;
 XHCIRing ring;
-XHCIStreamContext *sstreams;
 };
 
 struct XHCIEPContext {
@@ -1133,7 +1132,6 @@ static void xhci_reset_streams(XHCIEPContext *epctx)
 
 for (i = 0; i  epctx-nr_pstreams; i++) {
 epctx-pstreams[i].sct = -1;
-g_free(epctx-pstreams[i].sstreams);
 }
 }
 
@@ -1146,15 +1144,8 @@ static void xhci_alloc_streams(XHCIEPContext *epctx, 
dma_addr_t base)
 
 static void xhci_free_streams(XHCIEPContext *epctx)
 {
-int i;
-
 assert(epctx-pstreams != NULL);
 
-if (!epctx-lsa) {
-for (i = 0; i  epctx-nr_pstreams; i++) {
-g_free(epctx-pstreams[i].sstreams);
-}
-}
 g_free(epctx-pstreams);
 epctx-pstreams = NULL;
 epctx-nr_pstreams = 0;
-- 
1.8.3.1




[Qemu-devel] [PATCH 6/7] usb-hcd-xhci: Report completion of active transfer with CC_STOPPED on ep stop

2013-10-24 Thread Gerd Hoffmann
From: Hans de Goede hdego...@redhat.com

As we should per the XHCI spec 4.6.9 Stop Endpoint.

Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/usb/hcd-xhci.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 7cf89ce..0131151 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -505,6 +505,7 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int 
slotid,
  unsigned int epid, unsigned int streamid);
 static TRBCCode xhci_disable_ep(XHCIState *xhci, unsigned int slotid,
 unsigned int epid);
+static void xhci_xfer_report(XHCITransfer *xfer);
 static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v);
 static void xhci_write_event(XHCIState *xhci, XHCIEvent *event, int v);
 static USBEndpoint *xhci_epid_to_usbep(XHCIState *xhci,
@@ -1302,10 +1303,15 @@ static TRBCCode xhci_enable_ep(XHCIState *xhci, 
unsigned int slotid,
 return CC_SUCCESS;
 }
 
-static int xhci_ep_nuke_one_xfer(XHCITransfer *t)
+static int xhci_ep_nuke_one_xfer(XHCITransfer *t, TRBCCode report)
 {
 int killed = 0;
 
+if (report  (t-running_async || t-running_retry)) {
+t-status = report;
+xhci_xfer_report(t);
+}
+
 if (t-running_async) {
 usb_cancel_packet(t-packet);
 t-running_async = 0;
@@ -1318,6 +1324,7 @@ static int xhci_ep_nuke_one_xfer(XHCITransfer *t)
 timer_del(epctx-kick_timer);
 }
 t-running_retry = 0;
+killed = 1;
 }
 if (t-trbs) {
 g_free(t-trbs);
@@ -1330,7 +1337,7 @@ static int xhci_ep_nuke_one_xfer(XHCITransfer *t)
 }
 
 static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned int slotid,
-   unsigned int epid)
+   unsigned int epid, TRBCCode report)
 {
 XHCISlot *slot;
 XHCIEPContext *epctx;
@@ -1351,7 +1358,10 @@ static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned 
int slotid,
 
 xferi = epctx-next_xfer;
 for (i = 0; i  TD_QUEUE; i++) {
-killed += xhci_ep_nuke_one_xfer(epctx-transfers[xferi]);
+killed += xhci_ep_nuke_one_xfer(epctx-transfers[xferi], report);
+if (killed) {
+report = 0; /* Only report once */
+}
 epctx-transfers[xferi].packet.ep = NULL;
 xferi = (xferi + 1) % TD_QUEUE;
 }
@@ -1381,7 +1391,7 @@ static TRBCCode xhci_disable_ep(XHCIState *xhci, unsigned 
int slotid,
 return CC_SUCCESS;
 }
 
-xhci_ep_nuke_xfers(xhci, slotid, epid);
+xhci_ep_nuke_xfers(xhci, slotid, epid, 0);
 
 epctx = slot-eps[epid-1];
 
@@ -1423,7 +1433,7 @@ static TRBCCode xhci_stop_ep(XHCIState *xhci, unsigned 
int slotid,
 return CC_EP_NOT_ENABLED_ERROR;
 }
 
-if (xhci_ep_nuke_xfers(xhci, slotid, epid)  0) {
+if (xhci_ep_nuke_xfers(xhci, slotid, epid, CC_STOPPED)  0) {
 fprintf(stderr, xhci: FIXME: endpoint stopped w/ xfers running, 
 data might be lost\n);
 }
@@ -1468,7 +1478,7 @@ static TRBCCode xhci_reset_ep(XHCIState *xhci, unsigned 
int slotid,
 return CC_CONTEXT_STATE_ERROR;
 }
 
-if (xhci_ep_nuke_xfers(xhci, slotid, epid)  0) {
+if (xhci_ep_nuke_xfers(xhci, slotid, epid, 0)  0) {
 fprintf(stderr, xhci: FIXME: endpoint reset w/ xfers running, 
 data might be lost\n);
 }
@@ -2461,7 +2471,7 @@ static void xhci_detach_slot(XHCIState *xhci, USBPort 
*uport)
 
 for (ep = 0; ep  31; ep++) {
 if (xhci-slots[slot].eps[ep]) {
-xhci_ep_nuke_xfers(xhci, slot+1, ep+1);
+xhci_ep_nuke_xfers(xhci, slot + 1, ep + 1, 0);
 }
 }
 xhci-slots[slot].uport = NULL;
@@ -3276,7 +3286,7 @@ static void xhci_complete(USBPort *port, USBPacket 
*packet)
 XHCITransfer *xfer = container_of(packet, XHCITransfer, packet);
 
 if (packet-status == USB_RET_REMOVE_FROM_QUEUE) {
-xhci_ep_nuke_one_xfer(xfer);
+xhci_ep_nuke_one_xfer(xfer, 0);
 return;
 }
 xhci_complete_packet(xfer);
-- 
1.8.3.1




[Qemu-devel] [PATCH 1/7] usb-host-libusb: Fix reset handling

2013-10-24 Thread Gerd Hoffmann
From: Hans de Goede hdego...@redhat.com

The guest will issue an initial device reset when the device is attached, but
since the current usb-host-libusb code only actually does the reset when
udev-configuration != 0, and on attach the device is not yet configured,
the reset gets ignored. This means that the device gets passed to the guest
in an unknown state, which is not good.

The udev-configuration check is there because of the release / claim
interfaces done around the libusb_device_reset call, but these are not
necessary. If interfaces are claimed when libusb_device_reset gets called
libusb will release + reclaim them itself.

The usb_host_ep_update call also is not necessary. If the reset succeeds the
original config and interface alt settings will be restored.

Last if the reset fails, that means the device has either disconnected or
morphed into an another device and has been completely re-enumerated,
so it is treated by the host as a new device and our handle is invalid,
so on reset failure we need to call usb_host_nodev().

Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/usb/host-libusb.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 128955d..428c7c5 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -1256,16 +1256,14 @@ static void usb_host_flush_ep_queue(USBDevice *dev, 
USBEndpoint *ep)
 static void usb_host_handle_reset(USBDevice *udev)
 {
 USBHostDevice *s = USB_HOST_DEVICE(udev);
+int rc;
 
 trace_usb_host_reset(s-bus_num, s-addr);
 
-if (udev-configuration == 0) {
-return;
+rc = libusb_reset_device(s-dh);
+if (rc != 0) {
+usb_host_nodev(s);
 }
-usb_host_release_interfaces(s);
-libusb_reset_device(s-dh);
-usb_host_claim_interfaces(s, 0);
-usb_host_ep_update(s);
 }
 
 /*
-- 
1.8.3.1




[Qemu-devel] [PATCH 2/7] usb-host-libusb: Configuration 0 may be a valid configuration

2013-10-24 Thread Gerd Hoffmann
From: Hans de Goede hdego...@redhat.com

Quoting from: linux/Documentation/ABI/stable/sysfs-bus-usb:

Note that some devices, in violation of the USB spec, have a
configuration with a value equal to 0. Writing 0 to
bConfigurationValue for these devices will install that
configuration, rather then unconfigure the device.

So don't compare the configuration value against 0 to check for unconfigured
devices, instead check for a LIBUSB_ERROR_NOT_FOUND return from
libusb_get_active_config_descriptor().

Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/usb/host-libusb.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 428c7c5..35bae55 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -992,15 +992,14 @@ static int usb_host_claim_interfaces(USBHostDevice *s, 
int configuration)
 udev-ninterfaces   = 0;
 udev-configuration = 0;
 
-if (configuration == 0) {
-/* address state - ignore */
-return USB_RET_SUCCESS;
-}
-
 usb_host_detach_kernel(s);
 
 rc = libusb_get_active_config_descriptor(s-dev, conf);
 if (rc != 0) {
+if (rc == LIBUSB_ERROR_NOT_FOUND) {
+/* address state - ignore */
+return USB_RET_SUCCESS;
+}
 return USB_RET_STALL;
 }
 
-- 
1.8.3.1




[Qemu-devel] [PATCH 5/7] usb-hcd-xhci: Remove unused cancelled member from XHCITransfer

2013-10-24 Thread Gerd Hoffmann
From: Hans de Goede hdego...@redhat.com

Since qemu's USB model is geared towards emulated devices cancellation
is instanteneous, so no need to wait for cancellation to complete, as
such there is no wait for cancellation code, and the cancelled bool
as well as the bogus comment about it can be removed.

Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/usb/hcd-xhci.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index e078c50..7cf89ce 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -346,7 +346,6 @@ typedef struct XHCITransfer {
 QEMUSGList sgl;
 bool running_async;
 bool running_retry;
-bool cancelled;
 bool complete;
 bool int_req;
 unsigned int iso_pkts;
@@ -1310,8 +1309,6 @@ static int xhci_ep_nuke_one_xfer(XHCITransfer *t)
 if (t-running_async) {
 usb_cancel_packet(t-packet);
 t-running_async = 0;
-t-cancelled = 1;
-DPRINTF(xhci: cancelling transfer, waiting for it to complete\n);
 killed = 1;
 }
 if (t-running_retry) {
@@ -1728,14 +1725,12 @@ static int xhci_complete_packet(XHCITransfer *xfer)
 xfer-running_async = 1;
 xfer-running_retry = 0;
 xfer-complete = 0;
-xfer-cancelled = 0;
 return 0;
 } else if (xfer-packet.status == USB_RET_NAK) {
 trace_usb_xhci_xfer_nak(xfer);
 xfer-running_async = 0;
 xfer-running_retry = 1;
 xfer-complete = 0;
-xfer-cancelled = 0;
 return 0;
 } else {
 xfer-running_async = 0;
-- 
1.8.3.1




Re: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply _EJ0 methods for PCIslots that support hotplug by runtime patching

2013-10-24 Thread Gonglei (Arei)
 -Original Message-
 From: Fabio Fantoni [mailto:fabio.fant...@m2r.biz]
 Sent: Thursday, October 24, 2013 8:58 PM
 To: Gonglei (Arei); Jan Beulich
 Cc: anthony.per...@citrix.com; Ian Campbell; Stefano Stabellini; Gaowei (UVP);
 Hanweidong (Randy); Huangweidong (Hardware); Luonengjun; Yanqiangjun;
 xen-de...@lists.xen.org; qemu-devel@nongnu.org; Markus Armbruster
 Subject: Re: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only supply
 _EJ0 methods for PCIslots that support hotplug by runtime patching
 
 Il 24/10/2013 14:17, Gonglei (Arei) ha scritto:
  -Original Message-
  From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Tuesday, October 22, 2013 4:06 PM
  To: Gonglei (Arei)
  Cc: anthony.per...@citrix.com; Ian Campbell; Stefano Stabellini; Gaowei
 (UVP);
  Hanweidong (Randy); Huangweidong (Hardware); Luonengjun; Yanqiangjun;
  xen-de...@lists.xen.org; Fabio Fantoni; qemu-devel@nongnu.org; Markus
  Armbruster
  Subject: RE: [Qemu-devel] [Xen-devel] Hvmloader: Modify ACPI to only
 supply
  _EJ0 methods for PCIslots that support hotplug by runtime patching
 
  On 22.10.13 at 06:08, Gonglei (Arei) arei.gong...@huawei.com
 wrote:
  Hi, guys. The new patch has been modified based on the principles you
  suggested, thank you so much.
  Last time I test the patch based on the codes of 4.3.0.
  This time, I found that the system based on the codes of trunk causes the
 VM
  reboot again and again, which I have not found out the reason.
  So i can not test the patch based on the codes of trunk (details in
  EJ0_ACPI_PCI_Hotplug.patch)..
  I'm afraid we will need you to figure out that problem first, and
  then do the verification on -unstable. Even if the code shouldn't
  be that different from 4.3, we still don't want to apply completely
  untested stuff.
  Hi, Jan. We found that the reason that we used a wrong seabios PATH, and
 the hvmloader can't load the bios.bin.
  So the VM restart again and again after we start it. That's our fault.
 
  Now I test the patch based on the codes of trunk, which works well.
  The patch has been modified after your suggestion.
  The patch works well with upstream qemu and doesn't affect the system with
 traditional qemu.
 
 
  --- a/tools/firmware/hvmloader/ovmf.c
  +++ b/tools/firmware/hvmloader/ovmf.c
  @@ -79,7 +79,11 @@ static void ovmf_acpi_build_tables(void)
.dsdt_anycpu = dsdt_anycpu,
.dsdt_anycpu_len = dsdt_anycpu_len,
.dsdt_15cpu = NULL,
  -.dsdt_15cpu_len = 0
  +.dsdt_15cpu_len = 0,
  +.aml_ej0_name = NULL,
  +.aml_adr_dword = NULL,
  +.aml_ej0_name_len = 0,
  +.aml_adr_dword_len = 0,
  I don't see why you're adding these.
 
  Insurance purposes is that just initialize the struct.
 
  Signed-off-by: Gaowei gao.gao...@huawei.com
  Signed-off-by: gonglei arei.gong...@huawei.com
 
 Tested-by: Fabio Fantoni fabio.fant...@m2r.biz
 
 Tested on xen unstable with qemu 1.6.1, no problem found for now.
 Only one question: this patch remove hotplug only from essentials pci
 device, right?
 On windows 7 hotplug continues to show: virtio-serial driver, xen pci
 device driver and hd audio.
 
It depends on the property of hotplug of pci devices' class emulated by 
upstream qemu.
If you set k-no_hotplug = 1 in class_init function for those pci devices, 
which will not be shown in the Windows guest any more.

 Thanks for any reply.
 

Best regards,
-Gonglei


[Qemu-devel] [PATCH 1/1] audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* second

2013-10-24 Thread Gerd Hoffmann
From: Hans de Goede hdego...@redhat.com

Now that we no longer have MIN_REARM_TIMER_NS a bug in the audio subsys has
clearly shown it self by trying to make a timer fire every nano second.

Note we have a similar problem in 1.6, 1.5 and older but there
MIN_REARM_TIMER_NS limits the wakeups caused by audio being active to
4000 times / second. This still causes a host cpu load of 50 % for simply
playing audio, where as with this patch git master is at 13%, so we should
backport this to 1.5 and 1.6 too.

Note this will not apply to 1.5 and 1.6 as is.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 audio/audio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/audio/audio.c b/audio/audio.c
index af4cdf6..b3db679 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1124,7 +1124,8 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
 if (audio_is_timer_needed ()) {
-timer_mod (s-ts, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
+timer_mod (s-ts,
+qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
 }
 else {
 timer_del (s-ts);
-- 
1.8.3.1




[Qemu-devel] [PULL for-1.7 0/1] audio bugfix

2013-10-24 Thread Gerd Hoffmann
  Hi,

Single audio bugfix for 1.7.

please pull,
  Gerd

The following changes since commit fc8ead74674b7129e8f31c2595c76658e5622197:

  Merge remote-tracking branch 'qemu-kvm/uq/master' into staging (2013-10-18 
10:03:24 -0700)

are available in the git repository at:


  git://git.kraxel.org/qemu audio.2

for you to fetch changes up to b4350deed67b95651896ddb60cf9f765093a4848:

  audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* second 
(2013-10-23 10:37:27 +0200)


Hans de Goede (1):
  audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* 
second

 audio/audio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)



Re: [Qemu-devel] [PATCH 01/17] rename is_active to is_block_active

2013-10-24 Thread Paolo Bonzini
Il 22/10/2013 04:25, Lei Li ha scritto:
 is_active is used to identify block migration, rename to
 is_block_active to make it more clear.

No, is_active is used to identify whether a set of SaveVMHandlers is
active.  The default is true, so only block migration is using it.  But
we could use it in the future for other features (probably using
migration capabilities instead of a flag as is the case for block).

Paolo

 Signed-off-by: Lei Li li...@linux.vnet.ibm.com
 ---
  block-migration.c   |2 +-
  include/migration/vmstate.h |2 +-
  savevm.c|   16 
  3 files changed, 10 insertions(+), 10 deletions(-)
 
 diff --git a/block-migration.c b/block-migration.c
 index daf9ec1..b637695 100644
 --- a/block-migration.c
 +++ b/block-migration.c
 @@ -834,7 +834,7 @@ SaveVMHandlers savevm_block_handlers = {
  .save_live_pending = block_save_pending,
  .load_state = block_load,
  .cancel = block_migration_cancel,
 -.is_active = block_is_active,
 +.is_block_active = block_is_active,
  };
  
  void blk_mig_init(void)
 diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
 index 9d09e60..c634d65 100644
 --- a/include/migration/vmstate.h
 +++ b/include/migration/vmstate.h
 @@ -42,7 +42,7 @@ typedef struct SaveVMHandlers {
  int (*save_live_complete)(QEMUFile *f, void *opaque);
  
  /* This runs both outside and inside the iothread lock.  */
 -bool (*is_active)(void *opaque);
 +bool (*is_block_active)(void *opaque);
  
  /* This runs outside the iothread lock in the migration case, and
   * within the lock in the savevm case.  The callback had better only
 diff --git a/savevm.c b/savevm.c
 index 2f631d4..56b8643 100644
 --- a/savevm.c
 +++ b/savevm.c
 @@ -1867,8 +1867,8 @@ void qemu_savevm_state_begin(QEMUFile *f,
  if (!se-ops || !se-ops-save_live_setup) {
  continue;
  }
 -if (se-ops  se-ops-is_active) {
 -if (!se-ops-is_active(se-opaque)) {
 +if (se-ops  se-ops-is_block_active) {
 +if (!se-ops-is_block_active(se-opaque)) {
  continue;
  }
  }
 @@ -1907,8 +1907,8 @@ int qemu_savevm_state_iterate(QEMUFile *f)
  if (!se-ops || !se-ops-save_live_iterate) {
  continue;
  }
 -if (se-ops  se-ops-is_active) {
 -if (!se-ops-is_active(se-opaque)) {
 +if (se-ops  se-ops-is_block_active) {
 +if (!se-ops-is_block_active(se-opaque)) {
  continue;
  }
  }
 @@ -1948,8 +1948,8 @@ void qemu_savevm_state_complete(QEMUFile *f)
  if (!se-ops || !se-ops-save_live_complete) {
  continue;
  }
 -if (se-ops  se-ops-is_active) {
 -if (!se-ops-is_active(se-opaque)) {
 +if (se-ops  se-ops-is_block_active) {
 +if (!se-ops-is_block_active(se-opaque)) {
  continue;
  }
  }
 @@ -2002,8 +2002,8 @@ uint64_t qemu_savevm_state_pending(QEMUFile *f, 
 uint64_t max_size)
  if (!se-ops || !se-ops-save_live_pending) {
  continue;
  }
 -if (se-ops  se-ops-is_active) {
 -if (!se-ops-is_active(se-opaque)) {
 +if (se-ops  se-ops-is_block_active) {
 +if (!se-ops-is_block_active(se-opaque)) {
  continue;
  }
  }
 




Re: [Qemu-devel] [PATCH 02/17] QAPI: introduce magration capability unix_page_flipping

2013-10-24 Thread Paolo Bonzini
Il 22/10/2013 04:25, Lei Li ha scritto:
 +# @unix-page-flipping: If enabled, QEMU will support localhost migration. 
 This
 +#  feature allows live upgrade of a running QEMU instance by doing 
 localhost
 +#  migration with page flipping. It requires the source and 
 destination
 +#  are both on localhost. Disabled by default. (since 1.7)
 +#

If enabled, QEMU can optimize migration when the destination is a QEMU
process that runs on the same host as the source (as is the case for
live upgrade).  If the migration transport is a Unix socket, QEMU will
flip RAM pages directly to the destination, so that memory is only
allocated twice for the source and destination processes. Disabled by
default. (since 1.8)

Paolo



Re: [Qemu-devel] [PATCH 03/17] migration: add migrate_unix_page_flipping()

2013-10-24 Thread Paolo Bonzini
Il 22/10/2013 04:25, Lei Li ha scritto:
 Add migrate_unix_page_flipping() to check if
 MIGRATION_CAPABILITY_UNIX_PAGE_FLIPPING is enabled.
 
 Signed-off-by: Lei Li li...@linux.vnet.ibm.com
 ---
  include/migration/migration.h |3 +++
  migration.c   |9 +
  2 files changed, 12 insertions(+), 0 deletions(-)
 
 diff --git a/include/migration/migration.h b/include/migration/migration.h
 index 140e6b4..7e5d01a 100644
 --- a/include/migration/migration.h
 +++ b/include/migration/migration.h
 @@ -131,10 +131,13 @@ void migrate_add_blocker(Error *reason);
  void migrate_del_blocker(Error *reason);
  
  bool migrate_rdma_pin_all(void);
 +
  bool migrate_zero_blocks(void);
  
  bool migrate_auto_converge(void);
  
 +bool migrate_unix_page_flipping(void);
 +
  int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
   uint8_t *dst, int dlen);
  int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
 diff --git a/migration.c b/migration.c
 index 2b1ab20..4ac466b 100644
 --- a/migration.c
 +++ b/migration.c
 @@ -541,6 +541,15 @@ int64_t migrate_xbzrle_cache_size(void)
  return s-xbzrle_cache_size;
  }
  
 +bool migrate_unix_page_flipping(void)
 +{
 +MigrationState *s;
 +
 +s = migrate_get_current();
 +
 +return s-enabled_capabilities[MIGRATION_CAPABILITY_UNIX_PAGE_FLIPPING];
 +}
 +
  /* migration thread support */
  
  static void *migration_thread(void *opaque)
 

Reviewed-by: Paolo Bonzini pbonz...@redhat.com




Re: [Qemu-devel] [PATCH 04/17] qmp-command.hx: add missing docs for migration capabilites

2013-10-24 Thread Paolo Bonzini
Il 22/10/2013 04:25, Lei Li ha scritto:
 Signed-off-by: Lei Li li...@linux.vnet.ibm.com
 ---
  qmp-commands.hx |8 
  1 files changed, 8 insertions(+), 0 deletions(-)
 
 diff --git a/qmp-commands.hx b/qmp-commands.hx
 index fba15cd..650a3a8 100644
 --- a/qmp-commands.hx
 +++ b/qmp-commands.hx
 @@ -2898,6 +2898,10 @@ migrate-set-capabilities
  Enable/Disable migration capabilities
  
  - xbzrle: XBZRLE support
 +- x-rdma-pin-all: RDMA support

Pin all pages during RDMA support.

 +- zero-blocks: zero-blocks support

Compress zero blocks during block migration.

 +- auto-converge: Auto converge support

Block VCPU to help convergence of migration

 +- unix-page-flipping: Page flipping support

Page flipping for live QEMU upgrade

  Arguments:
  
 @@ -2922,6 +2926,10 @@ Query current migration capabilities
  
  - capabilities: migration capabilities state
   - xbzrle : XBZRLE state (json-bool)
 + - x-rdma-pin-all: RDMA state (json-bool)
 + - zero-blocks: zero-blocks state (json-bool)
 + - auto-converge: Auto converge state (json-bool)
 + - unix-page-flipping: Page flipping state (json-bool)
  
  Arguments:
  
 

Please separate page flipping in a separate patch and send it for 1.7.
Once you do that, patches 2/3/4 can be merged.

Paolo



[Qemu-devel] [Bug 1243968] [NEW] VMware ESXi on QEmu Kernel Panic

2013-10-24 Thread Nathan Shearer
Public bug reported:

I attempted to install ESXi 5.5 (the free version) into a QEmu 1.6.1 VM.
The guest OS does have the svm capabilities, but it appears VMware is
trying to do some kind of hypercall that crashes the guest.

There is more information here:
https://communities.vmware.com/message/2297382

It seems to me that this stubbed feature should just be disabled if it
is unusable. Or at the very least I should be able to disable it at run-
time with a command-line argument.

Is there some way to disable all the hypervisor features that makes it
very obvious to a guest os that it is running inside a VM? It would be
great if I could install a software and it would actually work (even if
it's slow with those features disabled).

FYI, my guest OS capabilities are:

# cat /proc/cpuinfo
processor   : 0
vendor_id   : AuthenticAMD
cpu family  : 6
model   : 2
model name  : QEMU Virtual CPU version 1.5.3
stepping: 3
microcode   : 0x165
cpu MHz : 1999.999
cache size  : 512 KB
fpu : yes
fpu_exception   : yes
cpuid level : 4
wp  : yes
flags   : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat 
pse36 clflush mmx fxsr sse sse2 syscall nx lm nopl pni cx16 popcnt hypervisor 
lahf_lm svm abm sse4a
bogomips: 3999.99
TLB size: 1024 4K pages
clflush size: 64
cache_alignment : 64
address sizes   : 40 bits physical, 48 bits virtual
power management:

** Affects: qemu
 Importance: Undecided
 Status: Confirmed


** Tags: esxi hypercall vmware

** Changed in: qemu
   Status: New = Confirmed

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1243968

Title:
  VMware ESXi on QEmu Kernel Panic

Status in QEMU:
  Confirmed

Bug description:
  I attempted to install ESXi 5.5 (the free version) into a QEmu 1.6.1
  VM. The guest OS does have the svm capabilities, but it appears VMware
  is trying to do some kind of hypercall that crashes the guest.

  There is more information here:
  https://communities.vmware.com/message/2297382

  It seems to me that this stubbed feature should just be disabled if it
  is unusable. Or at the very least I should be able to disable it at
  run-time with a command-line argument.

  Is there some way to disable all the hypervisor features that makes it
  very obvious to a guest os that it is running inside a VM? It would be
  great if I could install a software and it would actually work (even
  if it's slow with those features disabled).

  FYI, my guest OS capabilities are:

  # cat /proc/cpuinfo
  processor   : 0
  vendor_id   : AuthenticAMD
  cpu family  : 6
  model   : 2
  model name  : QEMU Virtual CPU version 1.5.3
  stepping: 3
  microcode   : 0x165
  cpu MHz : 1999.999
  cache size  : 512 KB
  fpu : yes
  fpu_exception   : yes
  cpuid level : 4
  wp  : yes
  flags   : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 syscall nx lm nopl pni cx16 popcnt 
hypervisor lahf_lm svm abm sse4a
  bogomips: 3999.99
  TLB size: 1024 4K pages
  clflush size: 64
  cache_alignment : 64
  address sizes   : 40 bits physical, 48 bits virtual
  power management:

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1243968/+subscriptions



  1   2   3   >