date:20200821

Re: Suspicious QOM types without instance/class size

2020-08-21 Thread Eduardo Habkost

On Fri, Aug 21, 2020 at 11:43:35AM +0200, Cornelia Huck wrote:
> On Thu, 20 Aug 2020 17:55:29 -0400
> Eduardo Habkost  wrote:
> 
> > While trying to convert TypeInfo declarations to the new
> > OBJECT_DECLARE* macros, I've stumbled on a few suspicious cases
> > where instance_size or class_size is not set, despite having type
> > checker macros that use a specific type.
> > 
> > The ones with "WARNING" are abstract types (maybe not serious if
> > subclasses set the appropriate sizes).  The ones with "ERROR"
> > don't seem to be abstract types.
> > 
> > ERROR: hw/s390x/virtio-ccw.c:1237:1: class_size should be set to 
> > sizeof(VirtioCcwBusClass)?
> > ERROR: hw/virtio/virtio-pci.c:2101:1: class_size should be set to 
> > sizeof(VirtioPCIBusClass)?
> 
> VirtioCcwBusClass and VirtioPCIBusClass are both simple typedefs of
> VirtioBusClass (it's likely that I copied the ccw definition from the
> pci one). virtio-mmio instead uses VirtioBusClass directly in its
> checker macros.
> 
> I don't see a real reason for the typedefs, maybe ccw and pci should
> use the mmio approach as well?

I think it's OK to keep the typedefs if the code is consistent
(i.e. we set instance_size and class_size just in case the
typedefs are replaced by a real struct one day).

I'm not sure about the TYPE_VIRTIO_MMIO_BUS approach.  If the
code just needs VirtioBusState or VirtioBusClass pointers, it can
already use the VIRTIO_BUS* macros.

The OBJECT_DECLARE_TYPE macro Daniel sent expects each QOM type
to have a separate struct being defined, which isn't true in many
cases.  I'm considering removing the "typedef struct Foo Foo"
lines from OBJECT_DECLARE_TYPE(), to make initial conversion
easier.

-- 
Eduardo

[PATCH v6 15/15] block/nvme: Use an array of EventNotifier

2020-08-21 Thread Philippe Mathieu-Daudé

In preparation of using multiple IRQ (thus multiple eventfds)
make BDRVNVMeState::irq_notifier an array (for now of a single
element, the admin queue notifier).

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 1cc2e9493d0..86bfd487e2c 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -106,6 +106,12 @@ QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000);
 #define INDEX_ADMIN 0
 #define INDEX_IO(n) (1 + n)
 
+/* This driver shares a single MSIX IRQ for the admin and I/O queues */
+enum {
+MSIX_SHARED_IRQ_IDX = 0,
+MSIX_IRQ_COUNT = 1
+};
+
 struct BDRVNVMeState {
 AioContext *aio_context;
 QEMUVFIOState *vfio;
@@ -120,7 +126,7 @@ struct BDRVNVMeState {
 /* How many uint32_t elements does each doorbell entry take. */
 size_t doorbell_scale;
 bool write_cache_supported;
-EventNotifier irq_notifier;
+EventNotifier irq_notifier[MSIX_IRQ_COUNT];
 
 uint64_t nsze; /* Namespace size reported by identify command */
 int nsid;  /* The namespace id to read/write data. */
@@ -631,7 +637,8 @@ static bool nvme_poll_queues(BDRVNVMeState *s)
 
 static void nvme_handle_event(EventNotifier *n)
 {
-BDRVNVMeState *s = container_of(n, BDRVNVMeState, irq_notifier);
+BDRVNVMeState *s = container_of(n, BDRVNVMeState,
+irq_notifier[MSIX_SHARED_IRQ_IDX]);
 
 trace_nvme_handle_event(s);
 event_notifier_test_and_clear(n);
@@ -683,7 +690,8 @@ out_error:
 static bool nvme_poll_cb(void *opaque)
 {
 EventNotifier *e = opaque;
-BDRVNVMeState *s = container_of(e, BDRVNVMeState, irq_notifier);
+BDRVNVMeState *s = container_of(e, BDRVNVMeState,
+irq_notifier[MSIX_SHARED_IRQ_IDX]);
 
 trace_nvme_poll_cb(s);
 return nvme_poll_queues(s);
@@ -705,7 +713,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 s->device = g_strdup(device);
 s->nsid = namespace;
 s->aio_context = bdrv_get_aio_context(bs);
-ret = event_notifier_init(&s->irq_notifier, 0);
+ret = event_notifier_init(&s->irq_notifier[MSIX_SHARED_IRQ_IDX], 0);
 if (ret) {
 error_setg(errp, "Failed to init event notifier");
 return ret;
@@ -784,12 +792,13 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 }
 }
 
-ret = qemu_vfio_pci_init_irq(s->vfio, &s->irq_notifier,
+ret = qemu_vfio_pci_init_irq(s->vfio, s->irq_notifier,
  VFIO_PCI_MSIX_IRQ_INDEX, errp);
 if (ret) {
 goto out;
 }
-aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
+aio_set_event_notifier(bdrv_get_aio_context(bs),
+   &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, nvme_handle_event, nvme_poll_cb);
 
 nvme_identify(bs, namespace, &local_err);
@@ -872,9 +881,10 @@ static void nvme_close(BlockDriverState *bs)
 nvme_free_queue_pair(s->queues[i]);
 }
 g_free(s->queues);
-aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
+aio_set_event_notifier(bdrv_get_aio_context(bs),
+   &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, NULL, NULL);
-event_notifier_cleanup(&s->irq_notifier);
+event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
 qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
 qemu_vfio_close(s->vfio);
 
@@ -1381,7 +1391,8 @@ static void nvme_detach_aio_context(BlockDriverState *bs)
 q->completion_bh = NULL;
 }
 
-aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
+aio_set_event_notifier(bdrv_get_aio_context(bs),
+   &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, NULL, NULL);
 }
 
@@ -1391,7 +1402,7 @@ static void nvme_attach_aio_context(BlockDriverState *bs,
 BDRVNVMeState *s = bs->opaque;
 
 s->aio_context = new_context;
-aio_set_event_notifier(new_context, &s->irq_notifier,
+aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, nvme_handle_event, nvme_poll_cb);
 
 for (int i = 0; i < s->nr_queues; i++) {
-- 
2.26.2

[PATCH v6 12/15] block/nvme: Replace BDRV_POLL_WHILE by AIO_WAIT_WHILE

2020-08-21 Thread Philippe Mathieu-Daudé

BDRV_POLL_WHILE() is defined as:

  #define BDRV_POLL_WHILE(bs, cond) ({  \
  BlockDriverState *bs_ = (bs); \
  AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
 cond); })

As we will remove the BlockDriverState use in the next commit,
start by using the exploded version of BDRV_POLL_WHILE().

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/nvme.c b/block/nvme.c
index f98ca067144..3d49ff81fb7 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -493,6 +493,7 @@ static void nvme_cmd_sync_cb(void *opaque, int ret)
 static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
  NvmeCmd *cmd)
 {
+AioContext *aio_context = bdrv_get_aio_context(bs);
 NVMeRequest *req;
 int ret = -EINPROGRESS;
 req = nvme_get_free_req(q);
@@ -501,7 +502,7 @@ static int nvme_cmd_sync(BlockDriverState *bs, 
NVMeQueuePair *q,
 }
 nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret);
 
-BDRV_POLL_WHILE(bs, ret == -EINPROGRESS);
+AIO_WAIT_WHILE(aio_context, ret == -EINPROGRESS);
 return ret;
 }
 
-- 
2.26.2

[PATCH v6 14/15] block/nvme: Extract nvme_poll_queue()

2020-08-21 Thread Philippe Mathieu-Daudé

As we want to do per-queue polling, extract the nvme_poll_queue()
method which operates on a single queue.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 44 +++-
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 85b235c8e6d..1cc2e9493d0 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -590,31 +590,41 @@ out:
 qemu_vfree(id);
 }
 
+static bool nvme_poll_queue(NVMeQueuePair *q)
+{
+bool progress = false;
+
+const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
+NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
+
+/*
+ * Do an early check for completions. q->lock isn't needed because
+ * nvme_process_completion() only runs in the event loop thread and
+ * cannot race with itself.
+ */
+if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) {
+return false;
+}
+
+qemu_mutex_lock(&q->lock);
+while (nvme_process_completion(q)) {
+/* Keep polling */
+progress = true;
+}
+qemu_mutex_unlock(&q->lock);
+
+return progress;
+}
+
 static bool nvme_poll_queues(BDRVNVMeState *s)
 {
 bool progress = false;
 int i;
 
 for (i = 0; i < s->nr_queues; i++) {
-NVMeQueuePair *q = s->queues[i];
-const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
-NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
-
-/*
- * Do an early check for completions. q->lock isn't needed because
- * nvme_process_completion() only runs in the event loop thread and
- * cannot race with itself.
- */
-if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) {
-continue;
-}
-
-qemu_mutex_lock(&q->lock);
-while (nvme_process_completion(q)) {
-/* Keep polling */
+if (nvme_poll_queue(s->queues[i])) {
 progress = true;
 }
-qemu_mutex_unlock(&q->lock);
 }
 return progress;
 }
-- 
2.26.2

[PATCH v6 13/15] block/nvme: Simplify nvme_create_queue_pair() arguments

2020-08-21 Thread Philippe Mathieu-Daudé

nvme_create_queue_pair() doesn't require BlockDriverState anymore.
Replace it by BDRVNVMeState and AioContext to simplify.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 3d49ff81fb7..85b235c8e6d 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -208,12 +208,12 @@ static void nvme_free_req_queue_cb(void *opaque)
 qemu_mutex_unlock(&q->lock);
 }
 
-static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
+static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
+ AioContext *aio_context,
  int idx, int size,
  Error **errp)
 {
 int i, r;
-BDRVNVMeState *s = bs->opaque;
 Error *local_err = NULL;
 NVMeQueuePair *q;
 uint64_t prp_list_iova;
@@ -232,8 +232,7 @@ static NVMeQueuePair 
*nvme_create_queue_pair(BlockDriverState *bs,
 q->s = s;
 q->index = idx;
 qemu_co_queue_init(&q->free_req_queue);
-q->completion_bh = aio_bh_new(bdrv_get_aio_context(bs),
-  nvme_process_completion_bh, q);
+q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
 r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
   s->page_size * NVME_NUM_REQS,
   false, &prp_list_iova);
@@ -637,7 +636,8 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
**errp)
 NvmeCmd cmd;
 int queue_size = NVME_QUEUE_SIZE;
 
-q = nvme_create_queue_pair(bs, n, queue_size, errp);
+q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs),
+   n, queue_size, errp);
 if (!q) {
 return false;
 }
@@ -683,6 +683,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
  Error **errp)
 {
 BDRVNVMeState *s = bs->opaque;
+AioContext *aio_context = bdrv_get_aio_context(bs);
 int ret;
 uint64_t cap;
 uint64_t timeout_ms;
@@ -743,7 +744,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 
 /* Set up admin queue. */
 s->queues = g_new(NVMeQueuePair *, 1);
-s->queues[INDEX_ADMIN] = nvme_create_queue_pair(bs, 0,
+s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
   NVME_QUEUE_SIZE,
   errp);
 if (!s->queues[INDEX_ADMIN]) {
-- 
2.26.2

[PATCH v6 09/15] block/nvme: Replace qemu_try_blockalign0 by qemu_try_blockalign/memset

2020-08-21 Thread Philippe Mathieu-Daudé

In the next commit we'll get rid of qemu_try_blockalign().
To ease review, first replace qemu_try_blockalign0() by explicit
calls to qemu_try_blockalign() and memset().

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index af3176a9669..7e21a2d7ece 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -174,12 +174,12 @@ static void nvme_init_queue(BlockDriverState *bs, 
NVMeQueue *q,
 
 bytes = ROUND_UP(nentries * entry_bytes, s->page_size);
 q->head = q->tail = 0;
-q->queue = qemu_try_blockalign0(bs, bytes);
-
+q->queue = qemu_try_blockalign(bs, bytes);
 if (!q->queue) {
 error_setg(errp, "Cannot allocate queue");
 return;
 }
+memset(q->queue, 0, bytes);
 r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova);
 if (r) {
 error_setg(errp, "Cannot map queue");
@@ -223,11 +223,12 @@ static NVMeQueuePair 
*nvme_create_queue_pair(BlockDriverState *bs,
 if (!q) {
 return NULL;
 }
-q->prp_list_pages = qemu_try_blockalign0(bs,
+q->prp_list_pages = qemu_try_blockalign(bs,
   s->page_size * NVME_NUM_REQS);
 if (!q->prp_list_pages) {
 goto fail;
 }
+memset(q->prp_list_pages, 0, s->page_size * NVME_NUM_REQS);
 qemu_mutex_init(&q->lock);
 q->s = s;
 q->index = idx;
@@ -521,7 +522,7 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 .cdw10 = cpu_to_le32(0x1),
 };
 
-id = qemu_try_blockalign0(bs, sizeof(*id));
+id = qemu_try_blockalign(bs, sizeof(*id));
 if (!id) {
 error_setg(errp, "Cannot allocate buffer for identify response");
 goto out;
@@ -531,8 +532,9 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 error_setg(errp, "Cannot map buffer for DMA");
 goto out;
 }
-cmd.prp1 = cpu_to_le64(iova);
 
+memset(id, 0, sizeof(*id));
+cmd.prp1 = cpu_to_le64(iova);
 if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
 error_setg(errp, "Failed to identify controller");
 goto out;
@@ -1283,11 +1285,11 @@ static int coroutine_fn 
nvme_co_pdiscard(BlockDriverState *bs,
 
 assert(s->nr_queues > 1);
 
-buf = qemu_try_blockalign0(bs, s->page_size);
+buf = qemu_try_blockalign(bs, s->page_size);
 if (!buf) {
 return -ENOMEM;
 }
-
+memset(buf, 0, s->page_size);
 buf->nlb = cpu_to_le32(bytes >> s->blkshift);
 buf->slba = cpu_to_le64(offset >> s->blkshift);
 buf->cattr = 0;
-- 
2.26.2

[PATCH v6 11/15] block/nvme: Simplify nvme_init_queue() arguments

2020-08-21 Thread Philippe Mathieu-Daudé

nvme_init_queue() doesn't require BlockDriverState anymore.
Replace it by BDRVNVMeState to simplify.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 3742e0535aa..f98ca067144 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -165,10 +165,9 @@ static QemuOptsList runtime_opts = {
 },
 };
 
-static void nvme_init_queue(BlockDriverState *bs, NVMeQueue *q,
+static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
 int nentries, int entry_bytes, Error **errp)
 {
-BDRVNVMeState *s = bs->opaque;
 size_t bytes;
 int r;
 
@@ -251,14 +250,14 @@ static NVMeQueuePair 
*nvme_create_queue_pair(BlockDriverState *bs,
 req->prp_list_iova = prp_list_iova + i * s->page_size;
 }
 
-nvme_init_queue(bs, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err);
+nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto fail;
 }
 q->sq.doorbell = &s->regs->doorbells[idx * 2 * s->doorbell_scale];
 
-nvme_init_queue(bs, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
+nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto fail;
-- 
2.26.2

[PATCH v6 10/15] block/nvme: Replace qemu_try_blockalign(bs) by qemu_try_memalign(pg_sz)

2020-08-21 Thread Philippe Mathieu-Daudé

qemu_try_blockalign() is a generic API that call back to the
block driver to return its page alignment. As we call from
within the very same driver, we already know to page alignment
stored in our state. Remove indirections and use the value from
BDRVNVMeState.
This change is required to later remove the BlockDriverState
argument, to make nvme_init_queue() per hardware, and not per
block driver.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 7e21a2d7ece..3742e0535aa 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -174,7 +174,7 @@ static void nvme_init_queue(BlockDriverState *bs, NVMeQueue 
*q,
 
 bytes = ROUND_UP(nentries * entry_bytes, s->page_size);
 q->head = q->tail = 0;
-q->queue = qemu_try_blockalign(bs, bytes);
+q->queue = qemu_try_memalign(s->page_size, bytes);
 if (!q->queue) {
 error_setg(errp, "Cannot allocate queue");
 return;
@@ -223,7 +223,7 @@ static NVMeQueuePair 
*nvme_create_queue_pair(BlockDriverState *bs,
 if (!q) {
 return NULL;
 }
-q->prp_list_pages = qemu_try_blockalign(bs,
+q->prp_list_pages = qemu_try_memalign(s->page_size,
   s->page_size * NVME_NUM_REQS);
 if (!q->prp_list_pages) {
 goto fail;
@@ -522,7 +522,7 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 .cdw10 = cpu_to_le32(0x1),
 };
 
-id = qemu_try_blockalign(bs, sizeof(*id));
+id = qemu_try_memalign(s->page_size, sizeof(*id));
 if (!id) {
 error_setg(errp, "Cannot allocate buffer for identify response");
 goto out;
@@ -1141,7 +1141,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 return nvme_co_prw_aligned(bs, offset, bytes, qiov, is_write, flags);
 }
 trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
-buf = qemu_try_blockalign(bs, bytes);
+buf = qemu_try_memalign(s->page_size, bytes);
 
 if (!buf) {
 return -ENOMEM;
@@ -1285,7 +1285,7 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState 
*bs,
 
 assert(s->nr_queues > 1);
 
-buf = qemu_try_blockalign(bs, s->page_size);
+buf = qemu_try_memalign(s->page_size, s->page_size);
 if (!buf) {
 return -ENOMEM;
 }
-- 
2.26.2

[PATCH v6 08/15] block/nvme: Use union of NvmeIdCtrl / NvmeIdNs structures

2020-08-21 Thread Philippe Mathieu-Daudé

We allocate an unique chunk of memory then use it for two
different structures. By using an union, we make it clear
the data is overlapping (and we can remove the casts).

Suggested-by: Stefan Hajnoczi 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 15c5202c03c..af3176a9669 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -508,9 +508,10 @@ static int nvme_cmd_sync(BlockDriverState *bs, 
NVMeQueuePair *q,
 static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
 {
 BDRVNVMeState *s = bs->opaque;
-NvmeIdCtrl *idctrl;
-NvmeIdNs *idns;
-uint8_t *id;
+union {
+NvmeIdCtrl ctrl;
+NvmeIdNs ns;
+} *id;
 NvmeLBAF *lbaf;
 uint16_t oncs;
 int r;
@@ -520,14 +521,12 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 .cdw10 = cpu_to_le32(0x1),
 };
 
-id = qemu_try_blockalign0(bs, sizeof(NvmeIdCtrl));
+id = qemu_try_blockalign0(bs, sizeof(*id));
 if (!id) {
 error_setg(errp, "Cannot allocate buffer for identify response");
 goto out;
 }
-idctrl = (NvmeIdCtrl *)id;
-idns = (NvmeIdNs *)id;
-r = qemu_vfio_dma_map(s->vfio, id, sizeof(NvmeIdCtrl), true, &iova);
+r = qemu_vfio_dma_map(s->vfio, id, sizeof(*id), true, &iova);
 if (r) {
 error_setg(errp, "Cannot map buffer for DMA");
 goto out;
@@ -539,22 +538,22 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 goto out;
 }
 
-if (le32_to_cpu(idctrl->nn) < namespace) {
+if (le32_to_cpu(id->ctrl.nn) < namespace) {
 error_setg(errp, "Invalid namespace");
 goto out;
 }
-s->write_cache_supported = le32_to_cpu(idctrl->vwc) & 0x1;
-s->max_transfer = (idctrl->mdts ? 1 << idctrl->mdts : 0) * s->page_size;
+s->write_cache_supported = le32_to_cpu(id->ctrl.vwc) & 0x1;
+s->max_transfer = (id->ctrl.mdts ? 1 << id->ctrl.mdts : 0) * s->page_size;
 /* For now the page list buffer per command is one page, to hold at most
  * s->page_size / sizeof(uint64_t) entries. */
 s->max_transfer = MIN_NON_ZERO(s->max_transfer,
   s->page_size / sizeof(uint64_t) * s->page_size);
 
-oncs = le16_to_cpu(idctrl->oncs);
+oncs = le16_to_cpu(id->ctrl.oncs);
 s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROS);
 s->supports_discard = !!(oncs & NVME_ONCS_DSM);
 
-memset(id, 0, 4096);
+memset(id, 0, sizeof(*id));
 cmd.cdw10 = 0;
 cmd.nsid = cpu_to_le32(namespace);
 if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
@@ -562,11 +561,11 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 goto out;
 }
 
-s->nsze = le64_to_cpu(idns->nsze);
-lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)];
+s->nsze = le64_to_cpu(id->ns.nsze);
+lbaf = &id->ns.lbaf[NVME_ID_NS_FLBAS_INDEX(id->ns.flbas)];
 
-if (NVME_ID_NS_DLFEAT_WRITE_ZEROES(idns->dlfeat) &&
-NVME_ID_NS_DLFEAT_READ_BEHAVIOR(idns->dlfeat) ==
+if (NVME_ID_NS_DLFEAT_WRITE_ZEROES(id->ns.dlfeat) &&
+NVME_ID_NS_DLFEAT_READ_BEHAVIOR(id->ns.dlfeat) ==
 NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ZEROES) {
 bs->supported_write_flags |= BDRV_REQ_MAY_UNMAP;
 }
-- 
2.26.2

[PATCH v6 06/15] block/nvme: Use common error path in nvme_add_io_queue()

2020-08-21 Thread Philippe Mathieu-Daudé

Rearrange nvme_add_io_queue() by using a common error path.
This will be proven useful in few commits where we add IRQ
notification to the IO queues.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index c63629d3b45..419178adda3 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -649,8 +649,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
**errp)
 };
 if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
 error_setg(errp, "Failed to create CQ io queue [%d]", n);
-nvme_free_queue_pair(q);
-return false;
+goto out_error;
 }
 cmd = (NvmeCmd) {
 .opcode = NVME_ADM_CMD_CREATE_SQ,
@@ -660,13 +659,15 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
**errp)
 };
 if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
 error_setg(errp, "Failed to create SQ io queue [%d]", n);
-nvme_free_queue_pair(q);
-return false;
+goto out_error;
 }
 s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1);
 s->queues[n] = q;
 s->nr_queues++;
 return true;
+out_error:
+nvme_free_queue_pair(q);
+return false;
 }
 
 static bool nvme_poll_cb(void *opaque)
-- 
2.26.2

[PATCH v6 07/15] block/nvme: Rename local variable

2020-08-21 Thread Philippe Mathieu-Daudé

We are going to modify the code in the next commit. Renaming
the 'resp' variable to 'id' first makes the next commit easier
to review. No logical changes.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 419178adda3..15c5202c03c 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -510,8 +510,8 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 BDRVNVMeState *s = bs->opaque;
 NvmeIdCtrl *idctrl;
 NvmeIdNs *idns;
+uint8_t *id;
 NvmeLBAF *lbaf;
-uint8_t *resp;
 uint16_t oncs;
 int r;
 uint64_t iova;
@@ -520,14 +520,14 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 .cdw10 = cpu_to_le32(0x1),
 };
 
-resp = qemu_try_blockalign0(bs, sizeof(NvmeIdCtrl));
-if (!resp) {
+id = qemu_try_blockalign0(bs, sizeof(NvmeIdCtrl));
+if (!id) {
 error_setg(errp, "Cannot allocate buffer for identify response");
 goto out;
 }
-idctrl = (NvmeIdCtrl *)resp;
-idns = (NvmeIdNs *)resp;
-r = qemu_vfio_dma_map(s->vfio, resp, sizeof(NvmeIdCtrl), true, &iova);
+idctrl = (NvmeIdCtrl *)id;
+idns = (NvmeIdNs *)id;
+r = qemu_vfio_dma_map(s->vfio, id, sizeof(NvmeIdCtrl), true, &iova);
 if (r) {
 error_setg(errp, "Cannot map buffer for DMA");
 goto out;
@@ -554,8 +554,7 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROS);
 s->supports_discard = !!(oncs & NVME_ONCS_DSM);
 
-memset(resp, 0, 4096);
-
+memset(id, 0, 4096);
 cmd.cdw10 = 0;
 cmd.nsid = cpu_to_le32(namespace);
 if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
@@ -587,8 +586,8 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 
 s->blkshift = lbaf->ds;
 out:
-qemu_vfio_dma_unmap(s->vfio, resp);
-qemu_vfree(resp);
+qemu_vfio_dma_unmap(s->vfio, id);
+qemu_vfree(id);
 }
 
 static bool nvme_poll_queues(BDRVNVMeState *s)
-- 
2.26.2

[PATCH v6 04/15] block/nvme: Define INDEX macros to ease code review

2020-08-21 Thread Philippe Mathieu-Daudé

Use definitions instead of '0' or '1' indexes. Also this will
be useful when using multi-queues later.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 33 +++--
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index a6e5537aaaf..b4c1a6690e4 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -103,6 +103,9 @@ typedef volatile struct {
 
 QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000);
 
+#define INDEX_ADMIN 0
+#define INDEX_IO(n) (1 + n)
+
 struct BDRVNVMeState {
 AioContext *aio_context;
 QEMUVFIOState *vfio;
@@ -531,7 +534,7 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 }
 cmd.prp1 = cpu_to_le64(iova);
 
-if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
+if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
 error_setg(errp, "Failed to identify controller");
 goto out;
 }
@@ -555,7 +558,7 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 
 cmd.cdw10 = 0;
 cmd.nsid = cpu_to_le32(namespace);
-if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
+if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
 error_setg(errp, "Failed to identify namespace");
 goto out;
 }
@@ -644,7 +647,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
**errp)
 .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0x)),
 .cdw11 = cpu_to_le32(0x3),
 };
-if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
+if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
 error_setg(errp, "Failed to create io queue [%d]", n);
 nvme_free_queue_pair(q);
 return false;
@@ -655,7 +658,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
**errp)
 .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0x)),
 .cdw11 = cpu_to_le32(0x1 | (n << 16)),
 };
-if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
+if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
 error_setg(errp, "Failed to create io queue [%d]", n);
 nvme_free_queue_pair(q);
 return false;
@@ -739,16 +742,18 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 
 /* Set up admin queue. */
 s->queues = g_new(NVMeQueuePair *, 1);
-s->queues[0] = nvme_create_queue_pair(bs, 0, NVME_QUEUE_SIZE, errp);
-if (!s->queues[0]) {
+s->queues[INDEX_ADMIN] = nvme_create_queue_pair(bs, 0,
+  NVME_QUEUE_SIZE,
+  errp);
+if (!s->queues[INDEX_ADMIN]) {
 ret = -EINVAL;
 goto out;
 }
 s->nr_queues = 1;
 QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
 s->regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
-s->regs->asq = cpu_to_le64(s->queues[0]->sq.iova);
-s->regs->acq = cpu_to_le64(s->queues[0]->cq.iova);
+s->regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
+s->regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
 
 /* After setting up all control registers we can enable device now. */
 s->regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
@@ -839,7 +844,7 @@ static int nvme_enable_disable_write_cache(BlockDriverState 
*bs, bool enable,
 .cdw11 = cpu_to_le32(enable ? 0x01 : 0x00),
 };
 
-ret = nvme_cmd_sync(bs, s->queues[0], &cmd);
+ret = nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd);
 if (ret) {
 error_setg(errp, "Failed to configure NVMe write cache");
 }
@@ -1056,7 +1061,7 @@ static coroutine_fn int 
nvme_co_prw_aligned(BlockDriverState *bs,
 {
 int r;
 BDRVNVMeState *s = bs->opaque;
-NVMeQueuePair *ioq = s->queues[1];
+NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
 NVMeRequest *req;
 
 uint32_t cdw12 = (((bytes >> s->blkshift) - 1) & 0x) |
@@ -1171,7 +1176,7 @@ static coroutine_fn int nvme_co_pwritev(BlockDriverState 
*bs,
 static coroutine_fn int nvme_co_flush(BlockDriverState *bs)
 {
 BDRVNVMeState *s = bs->opaque;
-NVMeQueuePair *ioq = s->queues[1];
+NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
 NVMeRequest *req;
 NvmeCmd cmd = {
 .opcode = NVME_CMD_FLUSH,
@@ -1202,7 +1207,7 @@ static coroutine_fn int 
nvme_co_pwrite_zeroes(BlockDriverState *bs,
   BdrvRequestFlags flags)
 {
 BDRVNVMeState *s = bs->opaque;
-NVMeQueuePair *ioq = s->queues[1];
+NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
 NVMeRequest *req;
 
 uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0x;
@@ -1255,7 +1260,7 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState 
*bs,
  int bytes)
 {
 BDRVNVMeState *s = bs->opaque;
-NVMeQueuePair *ioq = s->queues[1];
+N

[PATCH v6 02/15] block/nvme: Avoid further processing if trace event not enabled

2020-08-21 Thread Philippe Mathieu-Daudé

Avoid further processing if TRACE_NVME_SUBMIT_COMMAND_RAW is
not enabled. This is an untested intend of performance optimization.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/nvme.c b/block/nvme.c
index 2f5e3c2adfa..8c30a5fee28 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -441,6 +441,9 @@ static void nvme_trace_command(const NvmeCmd *cmd)
 {
 int i;
 
+if (!trace_event_get_state_backends(TRACE_NVME_SUBMIT_COMMAND_RAW)) {
+return;
+}
 for (i = 0; i < 8; ++i) {
 uint8_t *cmdp = (uint8_t *)cmd + i * 8;
 trace_nvme_submit_command_raw(cmdp[0], cmdp[1], cmdp[2], cmdp[3],
-- 
2.26.2

[PATCH v6 03/15] block/nvme: Let nvme_create_queue_pair() fail gracefully

2020-08-21 Thread Philippe Mathieu-Daudé

As nvme_create_queue_pair() is allowed to fail, replace the
alloc() calls by try_alloc() to avoid aborting QEMU.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 8c30a5fee28..a6e5537aaaf 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -213,14 +213,22 @@ static NVMeQueuePair 
*nvme_create_queue_pair(BlockDriverState *bs,
 int i, r;
 BDRVNVMeState *s = bs->opaque;
 Error *local_err = NULL;
-NVMeQueuePair *q = g_new0(NVMeQueuePair, 1);
+NVMeQueuePair *q;
 uint64_t prp_list_iova;
 
+q = g_try_new0(NVMeQueuePair, 1);
+if (!q) {
+return NULL;
+}
+q->prp_list_pages = qemu_try_blockalign0(bs,
+  s->page_size * NVME_NUM_REQS);
+if (!q->prp_list_pages) {
+goto fail;
+}
 qemu_mutex_init(&q->lock);
 q->s = s;
 q->index = idx;
 qemu_co_queue_init(&q->free_req_queue);
-q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_NUM_REQS);
 q->completion_bh = aio_bh_new(bdrv_get_aio_context(bs),
   nvme_process_completion_bh, q);
 r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
-- 
2.26.2

[PATCH v6 05/15] block/nvme: Improve error message when IO queue creation failed

2020-08-21 Thread Philippe Mathieu-Daudé

Do not use the same error message for different failures.
Display a different error whether it is the CQ or the SQ.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index b4c1a6690e4..c63629d3b45 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -648,7 +648,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
**errp)
 .cdw11 = cpu_to_le32(0x3),
 };
 if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
-error_setg(errp, "Failed to create io queue [%d]", n);
+error_setg(errp, "Failed to create CQ io queue [%d]", n);
 nvme_free_queue_pair(q);
 return false;
 }
@@ -659,7 +659,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
**errp)
 .cdw11 = cpu_to_le32(0x1 | (n << 16)),
 };
 if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
-error_setg(errp, "Failed to create io queue [%d]", n);
+error_setg(errp, "Failed to create SQ io queue [%d]", n);
 nvme_free_queue_pair(q);
 return false;
 }
-- 
2.26.2

[PATCH v6 01/15] block/nvme: Replace magic value by SCALE_MS definition

2020-08-21 Thread Philippe Mathieu-Daudé

Use self-explicit SCALE_MS definition instead of magic value.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/nvme.c b/block/nvme.c
index 374e2689157..2f5e3c2adfa 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -715,7 +715,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 /* Reset device to get a clean state. */
 s->regs->cc = cpu_to_le32(le32_to_cpu(s->regs->cc) & 0xFE);
 /* Wait for CSTS.RDY = 0. */
-deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * 
100ULL;
+deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
 while (le32_to_cpu(s->regs->csts) & 0x1) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to reset (%"
-- 
2.26.2

[PATCH v6 00/15] block/nvme: Various cleanups required to use multiple queues

2020-08-21 Thread Philippe Mathieu-Daudé

Hi Kevin,

This series is mostly code rearrangement (cleanups) to be
able to split the hardware code from the block driver code,
to be able to use multiple queues on the same hardware, or
multiple block drivers on the same hardware.

All this series is reviewed.

Since v5:
- audit rebase on "block/nvme: support nested aio_poll"
- addressed Stefano's review comments
- added Stefano's R-b tags

Since v4:
- added 'block/nvme: Use an array of EventNotifier' patch

Since v3:
- renamed QUEUE_INDEX_{ADMIN/IO} -> INDEX{ADMIN/IO}
- added stefanha tags

Since v2:
- addressed stefanha review comments
- added 4 trivial patches (to simplify the last one)
- register IRQ notifier for each queuepair (admin and io)

Since v1:
- rebased
- use SCALE_MS definition
- added Stefan's R-b
- addressed Stefan's review comments
  - use union { NvmeIdCtrl / NvmeIdNs }
  - move irq_notifier to NVMeQueuePair
  - removed patches depending on "a tracable hardware stateo
object instead of BDRVNVMeState".

Phil.

Philippe Mathieu-Daudé (15):
  block/nvme: Replace magic value by SCALE_MS definition
  block/nvme: Avoid further processing if trace event not enabled
  block/nvme: Let nvme_create_queue_pair() fail gracefully
  block/nvme: Define INDEX macros to ease code review
  block/nvme: Improve error message when IO queue creation failed
  block/nvme: Use common error path in nvme_add_io_queue()
  block/nvme: Rename local variable
  block/nvme: Use union of NvmeIdCtrl / NvmeIdNs structures
  block/nvme: Replace qemu_try_blockalign0 by qemu_try_blockalign/memset
  block/nvme: Replace qemu_try_blockalign(bs) by
qemu_try_memalign(pg_sz)
  block/nvme: Simplify nvme_init_queue() arguments
  block/nvme: Replace BDRV_POLL_WHILE by AIO_WAIT_WHILE
  block/nvme: Simplify nvme_create_queue_pair() arguments
  block/nvme: Extract nvme_poll_queue()
  block/nvme: Use an array of EventNotifier

 block/nvme.c | 211 ++-
 1 file changed, 125 insertions(+), 86 deletions(-)

-- 
2.26.2

Re: [PULL 0/3] Block patches

2020-08-21 Thread Peter Maydell

On Mon, 17 Aug 2020 at 16:16, Stefan Hajnoczi  wrote:
>
> The following changes since commit d0ed6a69d399ae193959225cdeaa9382746c91cc:
>
>   Update version for v5.1.0 release (2020-08-11 17:07:03 +0100)
>
> are available in the Git repository at:
>
>   https://github.com/stefanha/qemu.git tags/block-pull-request
>
> for you to fetch changes up to 44277bf914471962c9e88e09c859aae65ae109c4:
>
>   aio-posix: keep aio_notify_me disabled during polling (2020-08-13 13:34:14 =
> +0100)
>
> 
> Pull request
>
> 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.2
for any user-visible changes.

-- PMM

Re: Suspicious QOM types without instance/class size

2020-08-21 Thread Eduardo Habkost

On Fri, Aug 21, 2020 at 11:40:12AM +0200, David Hildenbrand wrote:
> On 20.08.20 23:55, Eduardo Habkost wrote:
> > While trying to convert TypeInfo declarations to the new
> > OBJECT_DECLARE* macros, I've stumbled on a few suspicious cases
> > where instance_size or class_size is not set, despite having type
> > checker macros that use a specific type.
> > 
> > The ones with "WARNING" are abstract types (maybe not serious if
> > subclasses set the appropriate sizes).  The ones with "ERROR"
> > don't seem to be abstract types.
[...]
> > ERROR: hw/s390x/virtio-ccw.c:1237:1: class_size should be set to 
> > sizeof(VirtioCcwBusClass)?
> 
> The parent of TYPE_VIRTIO_CCW_BUS is TYPE_VIRTIO_BUS.
> 
> typedef struct VirtioBusClass VirtioCcwBusClass;
> 
> So I guess the sizes match? Anyhow, setting doesn't hurt.

Thanks for checking.  Yeah, the sizes match today.

It's a good idea to set it, just in case a real VirtioCcwBusClass
struct gets created one day.

-- 
Eduardo

Re: Suspicious QOM types without instance/class size

2020-08-21 Thread Eduardo Habkost

On Fri, Aug 21, 2020 at 11:47:32AM +1000, David Gibson wrote:
> On Thu, Aug 20, 2020 at 05:55:29PM -0400, Eduardo Habkost wrote:
> > While trying to convert TypeInfo declarations to the new
> > OBJECT_DECLARE* macros, I've stumbled on a few suspicious cases
> > where instance_size or class_size is not set, despite having type
> > checker macros that use a specific type.
> > 
> > The ones with "WARNING" are abstract types (maybe not serious if
> > subclasses set the appropriate sizes).  The ones with "ERROR"
> > don't seem to be abstract types.
> 
> 
> Comment on the ones within my area:
> > 
> > WARNING: hw/input/adb.c:310:1: class_size should be set to 
> > sizeof(ADBDeviceClass)?
> 
> Yeah, that looks like a bug (though we'll get away with it because
> it's abstract).

Right, luckily we are not touching any ADBDeviceClass field
inside adb_device_class_init().

> 
> > WARNING: hw/ppc/pnv_lpc.c:771:1: instance_size should be set to 
> > sizeof(PnvLpcController)?
> 
> Ditto.

Agreed.

> 
> Should I make fixes for these, or will you?

Please send the fixes, and I will apply them before running the
TypeInfo conversion script.

> 
> > ERROR: hw/ppc/spapr_drc.c:771:1: instance_size should be set to 
> > sizeof(SpaprDrc)?
> 
> I'm confused by this one.  I'm not exactly sure which definition is
> tripping the error, and AFAICT they should all be correctly inheriting
> instance_size from either TYPE_SPAPR_DR_CONNECTOR or
> TYPE_SPAPR_DRC_PHSYICAL.  If anything, it looks like
> TYPE_SPAPR_DRC_PHB could drop it's explicit override of instance_size.

The error is triggered because of this type checking macro at
include/hw/ppc/spapr_drc.h:

#define SPAPR_DRC_PCI(obj) OBJECT_CHECK(SpaprDrc, (obj), \
TYPE_SPAPR_DRC_PCI)

The expectation is that whatever type you use in OBJECT_CHECK
will be the one used for instance_size.  The script also looks at
the parent type, to reduce false positives, but this case was
flagged because SPAPR_DRC_PCI uses SpaprDrc, but the parent type
(SPAPR_DRC_PHYSICAL) uses SpaprDrcPhysical.

Now, I don't understand why we have so many instance checker
macros that use the same typedef (SpaprDrc).  If the code needs a
valid SpaprDrc pointer, it can just use SPAPR_DR_CONNECTOR().

-- 
Eduardo

Re: Suspicious QOM types without instance/class size

2020-08-21 Thread Eduardo Habkost

On Fri, Aug 21, 2020 at 01:29:38PM -0400, Eduardo Habkost wrote:
> On Fri, Aug 21, 2020 at 01:53:52PM +0300, Roman Bolshakov wrote:
> > On Thu, Aug 20, 2020 at 05:55:29PM -0400, Eduardo Habkost wrote:
> > > While trying to convert TypeInfo declarations to the new
> > > OBJECT_DECLARE* macros, I've stumbled on a few suspicious cases
> > > where instance_size or class_size is not set, despite having type
> > > checker macros that use a specific type.
> > > 
> > > The ones with "WARNING" are abstract types (maybe not serious if
> > > subclasses set the appropriate sizes).  The ones with "ERROR"
> > > don't seem to be abstract types.
> > > 
> > 
> > > ERROR: target/i386/hvf/hvf.c:908:1: instance_size should be set to 
> > > sizeof(HVFState)?
> > 
> > Hi Eduardo,
> > 
> > How do you get the error?
> 
> My script looks for corresponding type checking macros, and check
> if instance_size is set to sizeof(T) with the right type from the
> type checking macro.
> 
> The code is here:
> https://github.com/ehabkost/qemu-hacks/blob/920b2c521ad2a29fa663256854e24ed2059ba9cd/scripts/codeconverter/codeconverter/qom_type_info.py#L136
> 
> 
> > 
> > Given your changes, instance size should really be sizeof(HVFState).
> > 
> 
> The changes I've made shouldn't make any difference (if there's
> an issue, it is there before or after my series).
> 
> > BTW, the object definition for hvf seems different from KVM (and perhaps
> > wrong?), e.g. HVFState is allocated within init_machine handler and then
> > assigned to a global variable:
> 
> Interesting.  It looks like hvf_state is _not_ the actual QOM
> object instance.  The actual TYPE_HVF_ACCEL instance is created
> by do_configure_accelerator().  That would explain why the lack
> of instance_init never caused any problems.
> 
> Luckily, no code ever used the HVF_STATE macro.  If
> HVF_STATE(hvf_state) got called, it would crash because of
> uninitialized object instance data.  If HVF_STATE(machine->accel)
> got called, it would return an invalid HVFState pointer (not
> hvf_state).
> 
> I believe the simplest short term solution here is to just delete
> the HVF_STATE macro and HVFState::parent field.  We can worry
> about actually moving hvf_state to the machine->accel QOM object
> later.

Actually, it might be easier to do the full QOM conversion in a
single patch instead of deleting the incomplete code.

Can you check if the following patch works?  I don't have a host
where I can test it.

Signed-off-by: Eduardo Habkost 
---
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index d81f569aed..81d1662d06 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -878,13 +878,11 @@ static int hvf_accel_init(MachineState *ms)
 {
 int x;
 hv_return_t ret;
-HVFState *s;
+HVFState *s = HVF_STATE(ms->accelerator);
 
 ret = hv_vm_create(HV_VM_DEFAULT);
 assert_hvf_ok(ret);
 
-s = g_new0(HVFState, 1);
- 
 s->num_slots = 32;
 for (x = 0; x < s->num_slots; ++x) {
 s->slots[x].size = 0;
@@ -908,6 +906,7 @@ static void hvf_accel_class_init(ObjectClass *oc, void 
*data)
 static const TypeInfo hvf_accel_type = {
 .name = TYPE_HVF_ACCEL,
 .parent = TYPE_ACCEL,
+.instance_size = sizeof(HVFState),
 .class_init = hvf_accel_class_init,
 };
 
 
-- 
Eduardo

[PULL 17/23] hw/sd: Rename sdbus_read_data() as sdbus_read_byte()

2020-08-21 Thread Philippe Mathieu-Daudé

The sdbus_read_data() method do a single byte access on the data
line of a SD bus. Rename it as sdbus_read_byte() and document it.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20200814092346.21825-4-f4...@amsat.org>
---
 include/hw/sd/sd.h| 10 +-
 hw/sd/allwinner-sdhost.c  | 10 +-
 hw/sd/bcm2835_sdhost.c|  2 +-
 hw/sd/core.c  |  2 +-
 hw/sd/milkymist-memcard.c |  8 
 hw/sd/pl181.c |  2 +-
 hw/sd/pxa2xx_mmci.c   |  2 +-
 hw/sd/sdhci.c |  8 
 hw/sd/ssi-sd.c|  2 +-
 9 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 1e5ac955d05..14ffc7f4758 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -166,7 +166,15 @@ int sdbus_do_command(SDBus *sd, SDRequest *req, uint8_t 
*response);
  * Write a byte on the data lines of a SD bus.
  */
 void sdbus_write_byte(SDBus *sd, uint8_t value);
-uint8_t sdbus_read_data(SDBus *sd);
+/**
+ * Read a byte from a SD bus.
+ * @sd: bus
+ *
+ * Read a byte from the data lines of a SD bus.
+ *
+ * Return: byte value read
+ */
+uint8_t sdbus_read_byte(SDBus *sd);
 bool sdbus_data_ready(SDBus *sd);
 bool sdbus_get_inserted(SDBus *sd);
 bool sdbus_get_readonly(SDBus *sd);
diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c
index e05e8a3864c..c004aa39da6 100644
--- a/hw/sd/allwinner-sdhost.c
+++ b/hw/sd/allwinner-sdhost.c
@@ -341,7 +341,7 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState 
*s,
 /* Read from SD bus */
 } else {
 for (uint32_t i = 0; i < buf_bytes; i++) {
-buf[i] = sdbus_read_data(&s->sdbus);
+buf[i] = sdbus_read_byte(&s->sdbus);
 }
 cpu_physical_memory_write((desc->addr & DESC_SIZE_MASK) + num_done,
buf, buf_bytes);
@@ -521,10 +521,10 @@ static uint64_t allwinner_sdhost_read(void *opaque, 
hwaddr offset,
 break;
 case REG_SD_FIFO:  /* Read/Write FIFO */
 if (sdbus_data_ready(&s->sdbus)) {
-res = sdbus_read_data(&s->sdbus);
-res |= sdbus_read_data(&s->sdbus) << 8;
-res |= sdbus_read_data(&s->sdbus) << 16;
-res |= sdbus_read_data(&s->sdbus) << 24;
+res = sdbus_read_byte(&s->sdbus);
+res |= sdbus_read_byte(&s->sdbus) << 8;
+res |= sdbus_read_byte(&s->sdbus) << 16;
+res |= sdbus_read_byte(&s->sdbus) << 24;
 allwinner_sdhost_update_transfer_cnt(s, sizeof(uint32_t));
 allwinner_sdhost_auto_stop(s);
 allwinner_sdhost_update_irq(s);
diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c
index 16aba7cc92b..2c7a675a2d8 100644
--- a/hw/sd/bcm2835_sdhost.c
+++ b/hw/sd/bcm2835_sdhost.c
@@ -190,7 +190,7 @@ static void bcm2835_sdhost_fifo_run(BCM2835SDHostState *s)
 if (is_read) {
 n = 0;
 while (s->datacnt && s->fifo_len < BCM2835_SDHOST_FIFO_LEN) {
-value |= (uint32_t)sdbus_read_data(&s->sdbus) << (n * 8);
+value |= (uint32_t)sdbus_read_byte(&s->sdbus) << (n * 8);
 s->datacnt--;
 n++;
 if (n == 4) {
diff --git a/hw/sd/core.c b/hw/sd/core.c
index 13b5ca03169..a3b620b802b 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -114,7 +114,7 @@ void sdbus_write_byte(SDBus *sdbus, uint8_t value)
 }
 }
 
-uint8_t sdbus_read_data(SDBus *sdbus)
+uint8_t sdbus_read_byte(SDBus *sdbus)
 {
 SDState *card = get_card(sdbus);
 uint8_t value = 0;
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 4128109c047..e8d055bb895 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -152,10 +152,10 @@ static uint64_t memcard_read(void *opaque, hwaddr addr,
 r = 0x;
 } else {
 r = 0;
-r |= sdbus_read_data(&s->sdbus) << 24;
-r |= sdbus_read_data(&s->sdbus) << 16;
-r |= sdbus_read_data(&s->sdbus) << 8;
-r |= sdbus_read_data(&s->sdbus);
+r |= sdbus_read_byte(&s->sdbus) << 24;
+r |= sdbus_read_byte(&s->sdbus) << 16;
+r |= sdbus_read_byte(&s->sdbus) << 8;
+r |= sdbus_read_byte(&s->sdbus);
 }
 break;
 case R_CLK2XDIV:
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 771bae193f5..579d68ad83e 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -223,7 +223,7 @@ static void pl181_fifo_run(PL181State *s)
 if (is_read) {
 n = 0;
 while (s->datacnt && s->fifo_len < PL181_FIFO_LEN) {
-value |= (uint32_t)sdbus_read_data(&s->sdbus) << (n * 8);
+value |= (uint32_t)sdbus_read_byte(&s->sdbus) << (n * 8);
 s->datacnt--;
 n++;
 if (n == 4) {
diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
index 07ddc2eb

[PULL 21/23] hw/sd: Use sdbus_read_data() instead of sdbus_read_byte() when possible

2020-08-21 Thread Philippe Mathieu-Daudé

Use the recently added sdbus_read_data() to read multiple
bytes at once, instead of looping calling sdbus_read_byte().

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20200814092346.21825-8-f4...@amsat.org>
---
 hw/sd/allwinner-sdhost.c  | 10 +++---
 hw/sd/milkymist-memcard.c |  7 ++-
 hw/sd/sdhci.c | 28 
 3 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c
index eea5659c5f1..f9eb92c09ed 100644
--- a/hw/sd/allwinner-sdhost.c
+++ b/hw/sd/allwinner-sdhost.c
@@ -337,9 +337,7 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState 
*s,
 
 /* Read from SD bus */
 } else {
-for (uint32_t i = 0; i < buf_bytes; i++) {
-buf[i] = sdbus_read_byte(&s->sdbus);
-}
+sdbus_read_data(&s->sdbus, buf, buf_bytes);
 cpu_physical_memory_write((desc->addr & DESC_SIZE_MASK) + num_done,
buf, buf_bytes);
 }
@@ -518,10 +516,8 @@ static uint64_t allwinner_sdhost_read(void *opaque, hwaddr 
offset,
 break;
 case REG_SD_FIFO:  /* Read/Write FIFO */
 if (sdbus_data_ready(&s->sdbus)) {
-res = sdbus_read_byte(&s->sdbus);
-res |= sdbus_read_byte(&s->sdbus) << 8;
-res |= sdbus_read_byte(&s->sdbus) << 16;
-res |= sdbus_read_byte(&s->sdbus) << 24;
+sdbus_read_data(&s->sdbus, &res, sizeof(uint32_t));
+le32_to_cpus(&res);
 allwinner_sdhost_update_transfer_cnt(s, sizeof(uint32_t));
 allwinner_sdhost_auto_stop(s);
 allwinner_sdhost_update_irq(s);
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 12e091a46e7..be89a938763 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -151,11 +151,8 @@ static uint64_t memcard_read(void *opaque, hwaddr addr,
 if (!s->enabled) {
 r = 0x;
 } else {
-r = 0;
-r |= sdbus_read_byte(&s->sdbus) << 24;
-r |= sdbus_read_byte(&s->sdbus) << 16;
-r |= sdbus_read_byte(&s->sdbus) << 8;
-r |= sdbus_read_byte(&s->sdbus);
+sdbus_read_data(&s->sdbus, &r, sizeof(r));
+be32_to_cpus(&r);
 }
 break;
 case R_CLK2XDIV:
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index ddf36915619..1785d7e1f79 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -399,8 +399,6 @@ static void sdhci_end_transfer(SDHCIState *s)
 /* Fill host controller's read buffer with BLKSIZE bytes of data from card */
 static void sdhci_read_block_from_card(SDHCIState *s)
 {
-int index = 0;
-uint8_t data;
 const uint16_t blk_size = s->blksize & BLOCK_SIZE_MASK;
 
 if ((s->trnmod & SDHC_TRNS_MULTI) &&
@@ -408,12 +406,9 @@ static void sdhci_read_block_from_card(SDHCIState *s)
 return;
 }
 
-for (index = 0; index < blk_size; index++) {
-data = sdbus_read_byte(&s->sdbus);
-if (!FIELD_EX32(s->hostctl2, SDHC_HOSTCTL2, EXECUTE_TUNING)) {
-/* Device is not in tuning */
-s->fifo_buffer[index] = data;
-}
+if (!FIELD_EX32(s->hostctl2, SDHC_HOSTCTL2, EXECUTE_TUNING)) {
+/* Device is not in tuning */
+sdbus_read_data(&s->sdbus, s->fifo_buffer, blk_size);
 }
 
 if (FIELD_EX32(s->hostctl2, SDHC_HOSTCTL2, EXECUTE_TUNING)) {
@@ -574,7 +569,7 @@ static void sdhci_write_dataport(SDHCIState *s, uint32_t 
value, unsigned size)
 static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
 {
 bool page_aligned = false;
-unsigned int n, begin;
+unsigned int begin;
 const uint16_t block_size = s->blksize & BLOCK_SIZE_MASK;
 uint32_t boundary_chk = 1 << (((s->blksize & ~BLOCK_SIZE_MASK) >> 12) + 
12);
 uint32_t boundary_count = boundary_chk - (s->sdmasysad % boundary_chk);
@@ -596,9 +591,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
 SDHC_DAT_LINE_ACTIVE;
 while (s->blkcnt) {
 if (s->data_count == 0) {
-for (n = 0; n < block_size; n++) {
-s->fifo_buffer[n] = sdbus_read_byte(&s->sdbus);
-}
+sdbus_read_data(&s->sdbus, s->fifo_buffer, block_size);
 }
 begin = s->data_count;
 if (((boundary_count + begin) < block_size) && page_aligned) {
@@ -662,13 +655,10 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState 
*s)
 /* single block SDMA transfer */
 static void sdhci_sdma_transfer_single_block(SDHCIState *s)
 {
-int n;
 uint32_t datacnt = s->blksize & BLOCK_SIZE_MASK;
 
 if (s->trnmod & SDHC_TRNS_READ) {
-for (n = 0; n < datacnt; n++) {
-s->fifo_buffer[n] = sdbus_read_byte(&s->sdbus);
-}
+sdbus_read_data(&s->sdbus, s->fifo_buffer, datacnt);
 dma_memory_write(s->dma_as,

[PULL 22/23] hw/sd: Fix incorrect populated function switch status data structure

2020-08-21 Thread Philippe Mathieu-Daudé

From: Bin Meng 

At present the function switch status data structure bit [399:376]
are wrongly pupulated. These 3 bytes encode function switch status
for the 6 function groups, with 4 bits per group, starting from
function group 6 at bit 399, then followed by function group 5 at
bit 395, and so on.

However the codes mistakenly fills in the function group 1 status
at bit 399. This fixes the code logic.

Fixes: a1bb27b1e9 ("SD card emulation (initial implementation)")
Signed-off-by: Bin Meng 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Sai Pavan Boddu 
Message-Id: <1598021136-49525-1-git-send-email-bmeng...@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/sd/sd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 7c9d956f113..805e21fc883 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -807,11 +807,12 @@ static void sd_function_switch(SDState *sd, uint32_t arg)
 sd->data[11] = 0x43;
 sd->data[12] = 0x80;   /* Supported group 1 functions */
 sd->data[13] = 0x03;
+
 for (i = 0; i < 6; i ++) {
 new_func = (arg >> (i * 4)) & 0x0f;
 if (mode && new_func != 0x0f)
 sd->function_group[i] = new_func;
-sd->data[14 + (i >> 1)] = new_func << ((i * 4) & 4);
+sd->data[16 - (i >> 1)] |= new_func << ((i % 2) * 4);
 }
 memset(&sd->data[17], 0, 47);
 stw_be_p(sd->data + 64, sd_crc16(sd->data, 64));
-- 
2.26.2

Re: Suspicious QOM types without instance/class size

2020-08-21 Thread Eduardo Habkost

On Fri, Aug 21, 2020 at 01:53:52PM +0300, Roman Bolshakov wrote:
> On Thu, Aug 20, 2020 at 05:55:29PM -0400, Eduardo Habkost wrote:
> > While trying to convert TypeInfo declarations to the new
> > OBJECT_DECLARE* macros, I've stumbled on a few suspicious cases
> > where instance_size or class_size is not set, despite having type
> > checker macros that use a specific type.
> > 
> > The ones with "WARNING" are abstract types (maybe not serious if
> > subclasses set the appropriate sizes).  The ones with "ERROR"
> > don't seem to be abstract types.
> > 
> 
> > ERROR: target/i386/hvf/hvf.c:908:1: instance_size should be set to 
> > sizeof(HVFState)?
> 
> Hi Eduardo,
> 
> How do you get the error?

My script looks for corresponding type checking macros, and check
if instance_size is set to sizeof(T) with the right type from the
type checking macro.

The code is here:
https://github.com/ehabkost/qemu-hacks/blob/920b2c521ad2a29fa663256854e24ed2059ba9cd/scripts/codeconverter/codeconverter/qom_type_info.py#L136

> 
> Given your changes, instance size should really be sizeof(HVFState).
> 

The changes I've made shouldn't make any difference (if there's
an issue, it is there before or after my series).

> BTW, the object definition for hvf seems different from KVM (and perhaps
> wrong?), e.g. HVFState is allocated within init_machine handler and then
> assigned to a global variable:

Interesting.  It looks like hvf_state is _not_ the actual QOM
object instance.  The actual TYPE_HVF_ACCEL instance is created
by do_configure_accelerator().  That would explain why the lack
of instance_init never caused any problems.

Luckily, no code ever used the HVF_STATE macro.  If
HVF_STATE(hvf_state) got called, it would crash because of
uninitialized object instance data.  If HVF_STATE(machine->accel)
got called, it would return an invalid HVFState pointer (not
hvf_state).

I believe the simplest short term solution here is to just delete
the HVF_STATE macro and HVFState::parent field.  We can worry
about actually moving hvf_state to the machine->accel QOM object
later.

> 
> static int hvf_accel_init(MachineState *ms)
> {
> int x;
> hv_return_t ret;
> HVFState *s;
> 
> ret = hv_vm_create(HV_VM_DEFAULT);
> assert_hvf_ok(ret);
> 
> s = g_new0(HVFState, 1);
>  
> s->num_slots = 32;
> for (x = 0; x < s->num_slots; ++x) {
> s->slots[x].size = 0;
> s->slots[x].slot_id = x;
> }
>   
> hvf_state = s;
> cpu_interrupt_handler = hvf_handle_interrupt;
> memory_listener_register(&hvf_memory_listener, &address_space_memory);
> return 0;
> }
> 
> static void hvf_accel_class_init(ObjectClass *oc, void *data)
> {
> AccelClass *ac = ACCEL_CLASS(oc);
> ac->name = "HVF";
> ac->init_machine = hvf_accel_init;
> ac->allowed = &hvf_allowed;
> }
> 
> static const TypeInfo hvf_accel_type = {
> .name = TYPE_HVF_ACCEL,
> .parent = TYPE_ACCEL,
> .class_init = hvf_accel_class_init,
> };
> 
> Thanks,
> Roman
> 

-- 
Eduardo

[PULL 19/23] hw/sd: Use sdbus_write_data() instead of sdbus_write_byte when possible

2020-08-21 Thread Philippe Mathieu-Daudé

Use the recently added sdbus_write_data() to write multiple
bytes at once, instead of looping calling sdbus_write_byte().

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20200814092346.21825-6-f4...@amsat.org>
---
 hw/sd/allwinner-sdhost.c  | 14 +-
 hw/sd/milkymist-memcard.c |  7 +++
 hw/sd/sdhci.c | 18 --
 3 files changed, 12 insertions(+), 27 deletions(-)

diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c
index c004aa39da6..eea5659c5f1 100644
--- a/hw/sd/allwinner-sdhost.c
+++ b/hw/sd/allwinner-sdhost.c
@@ -333,10 +333,7 @@ static uint32_t 
allwinner_sdhost_process_desc(AwSdHostState *s,
 if (is_write) {
 cpu_physical_memory_read((desc->addr & DESC_SIZE_MASK) + num_done,
   buf, buf_bytes);
-
-for (uint32_t i = 0; i < buf_bytes; i++) {
-sdbus_write_byte(&s->sdbus, buf[i]);
-}
+sdbus_write_data(&s->sdbus, buf, buf_bytes);
 
 /* Read from SD bus */
 } else {
@@ -548,6 +545,7 @@ static void allwinner_sdhost_write(void *opaque, hwaddr 
offset,
uint64_t value, unsigned size)
 {
 AwSdHostState *s = AW_SDHOST(opaque);
+uint32_t u32;
 
 trace_allwinner_sdhost_write(offset, value, size);
 
@@ -654,11 +652,9 @@ static void allwinner_sdhost_write(void *opaque, hwaddr 
offset,
 s->startbit_detect = value;
 break;
 case REG_SD_FIFO:  /* Read/Write FIFO */
-sdbus_write_byte(&s->sdbus, value & 0xff);
-sdbus_write_byte(&s->sdbus, (value >> 8) & 0xff);
-sdbus_write_byte(&s->sdbus, (value >> 16) & 0xff);
-sdbus_write_byte(&s->sdbus, (value >> 24) & 0xff);
-allwinner_sdhost_update_transfer_cnt(s, sizeof(uint32_t));
+u32 = cpu_to_le32(value);
+sdbus_write_data(&s->sdbus, &u32, sizeof(u32));
+allwinner_sdhost_update_transfer_cnt(s, sizeof(u32));
 allwinner_sdhost_auto_stop(s);
 allwinner_sdhost_update_irq(s);
 break;
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index e8d055bb895..12e091a46e7 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -181,6 +181,7 @@ static void memcard_write(void *opaque, hwaddr addr, 
uint64_t value,
   unsigned size)
 {
 MilkymistMemcardState *s = opaque;
+uint32_t val32;
 
 trace_milkymist_memcard_memory_write(addr, value);
 
@@ -209,10 +210,8 @@ static void memcard_write(void *opaque, hwaddr addr, 
uint64_t value,
 if (!s->enabled) {
 break;
 }
-sdbus_write_byte(&s->sdbus, (value >> 24) & 0xff);
-sdbus_write_byte(&s->sdbus, (value >> 16) & 0xff);
-sdbus_write_byte(&s->sdbus, (value >> 8) & 0xff);
-sdbus_write_byte(&s->sdbus, value & 0xff);
+val32 = cpu_to_be32(value);
+sdbus_write_data(&s->sdbus, &val32, sizeof(val32));
 break;
 case R_ENABLE:
 s->regs[addr] = value;
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index b897b1121b8..ddf36915619 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -496,8 +496,6 @@ static uint32_t sdhci_read_dataport(SDHCIState *s, unsigned 
size)
 /* Write data from host controller FIFO to card */
 static void sdhci_write_block_to_card(SDHCIState *s)
 {
-int index = 0;
-
 if (s->prnsts & SDHC_SPACE_AVAILABLE) {
 if (s->norintstsen & SDHC_NISEN_WBUFRDY) {
 s->norintsts |= SDHC_NIS_WBUFRDY;
@@ -514,9 +512,7 @@ static void sdhci_write_block_to_card(SDHCIState *s)
 }
 }
 
-for (index = 0; index < (s->blksize & BLOCK_SIZE_MASK); index++) {
-sdbus_write_byte(&s->sdbus, s->fifo_buffer[index]);
-}
+sdbus_write_data(&s->sdbus, s->fifo_buffer, s->blksize & BLOCK_SIZE_MASK);
 
 /* Next data can be written through BUFFER DATORT register */
 s->prnsts |= SDHC_SPACE_AVAILABLE;
@@ -641,9 +637,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
 &s->fifo_buffer[begin], s->data_count - begin);
 s->sdmasysad += s->data_count - begin;
 if (s->data_count == block_size) {
-for (n = 0; n < block_size; n++) {
-sdbus_write_byte(&s->sdbus, s->fifo_buffer[n]);
-}
+sdbus_write_data(&s->sdbus, s->fifo_buffer, block_size);
 s->data_count = 0;
 if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
 s->blkcnt--;
@@ -678,9 +672,7 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s)
 dma_memory_write(s->dma_as, s->sdmasysad, s->fifo_buffer, datacnt);
 } else {
 dma_memory_read(s->dma_as, s->sdmasysad, s->fifo_buffer, datacnt);
-for (n = 0; n < datacnt; n++) {
-sdbus_write_byte(&s->sdbus, s->fifo_buffer[n]);
-}
+sdbus_write_data(&s->sdbus, s->fifo_buffer, dat

[PULL 12/23] hw/sd/pl181: Replace disabled fprintf()s by trace events

2020-08-21 Thread Philippe Mathieu-Daudé

Convert disabled DPRINTF() to trace events and remove ifdef'ry.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Acked-by: Peter Maydell 
Message-Id: <20200705204630.4133-9-f4...@amsat.org>
---
 hw/sd/pl181.c  | 26 +-
 hw/sd/trace-events | 10 ++
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index f69488ebac3..574500ce600 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -17,15 +17,7 @@
 #include "qemu/module.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
-
-//#define DEBUG_PL181 1
-
-#ifdef DEBUG_PL181
-#define DPRINTF(fmt, ...) \
-do { printf("pl181: " fmt , ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) do {} while(0)
-#endif
+#include "trace.h"
 
 #define PL181_FIFO_LEN 16
 
@@ -158,7 +150,7 @@ static void pl181_fifo_push(PL181State *s, uint32_t value)
 n = (s->fifo_pos + s->fifo_len) & (PL181_FIFO_LEN - 1);
 s->fifo_len++;
 s->fifo[n] = value;
-DPRINTF("FIFO push %08x\n", (int)value);
+trace_pl181_fifo_push(value);
 }
 
 static uint32_t pl181_fifo_pop(PL181State *s)
@@ -172,7 +164,7 @@ static uint32_t pl181_fifo_pop(PL181State *s)
 value = s->fifo[s->fifo_pos];
 s->fifo_len--;
 s->fifo_pos = (s->fifo_pos + 1) & (PL181_FIFO_LEN - 1);
-DPRINTF("FIFO pop %08x\n", (int)value);
+trace_pl181_fifo_pop(value);
 return value;
 }
 
@@ -184,7 +176,7 @@ static void pl181_do_command(PL181State *s)
 
 request.cmd = s->cmd & PL181_CMD_INDEX;
 request.arg = s->cmdarg;
-DPRINTF("Command %d %08x\n", request.cmd, request.arg);
+trace_pl181_command_send(request.cmd, request.arg);
 rlen = sdbus_do_command(&s->sdbus, &request, response);
 if (rlen < 0)
 goto error;
@@ -201,16 +193,16 @@ static void pl181_do_command(PL181State *s)
 s->response[2] = ldl_be_p(&response[8]);
 s->response[3] = ldl_be_p(&response[12]) & ~1;
 }
-DPRINTF("Response received\n");
+trace_pl181_command_response_pending();
 s->status |= PL181_STATUS_CMDRESPEND;
 } else {
-DPRINTF("Command sent\n");
+trace_pl181_command_sent();
 s->status |= PL181_STATUS_CMDSENT;
 }
 return;
 
 error:
-DPRINTF("Timeout\n");
+trace_pl181_command_timeout();
 s->status |= PL181_STATUS_CMDTIMEOUT;
 }
 
@@ -262,11 +254,11 @@ static void pl181_fifo_run(PL181State *s)
 s->status |= PL181_STATUS_DATAEND;
 /* HACK: */
 s->status |= PL181_STATUS_DATABLOCKEND;
-DPRINTF("Transfer Complete\n");
+trace_pl181_fifo_transfer_complete();
 }
 if (s->datacnt == 0 && s->fifo_len == 0) {
 s->datactrl &= ~PL181_DATA_ENABLE;
-DPRINTF("Data engine idle\n");
+trace_pl181_data_engine_idle();
 } else {
 /* Update FIFO bits.  */
 bits = PL181_STATUS_TXACTIVE | PL181_STATUS_RXACTIVE;
diff --git a/hw/sd/trace-events b/hw/sd/trace-events
index 5f09d32eb2c..a87d7355fb8 100644
--- a/hw/sd/trace-events
+++ b/hw/sd/trace-events
@@ -62,3 +62,13 @@ milkymist_memcard_memory_write(uint32_t addr, uint32_t 
value) "addr 0x%08x value
 # pxa2xx_mmci.c
 pxa2xx_mmci_read(uint8_t size, uint32_t addr, uint32_t value) "size %d addr 
0x%02x value 0x%08x"
 pxa2xx_mmci_write(uint8_t size, uint32_t addr, uint32_t value) "size %d addr 
0x%02x value 0x%08x"
+
+# pl181.c
+pl181_command_send(uint8_t cmd, uint32_t arg) "sending CMD%02d arg 0x%08" 
PRIx32
+pl181_command_sent(void) "command sent"
+pl181_command_response_pending(void) "response received"
+pl181_command_timeout(void) "command timeouted"
+pl181_fifo_push(uint32_t data) "FIFO push 0x%08" PRIx32
+pl181_fifo_pop(uint32_t data) "FIFO pop 0x%08" PRIx32
+pl181_fifo_transfer_complete(void) "FIFO transfer complete"
+pl181_data_engine_idle(void) "data engine idle"
-- 
2.26.2

[PULL 20/23] hw/sd: Add sdbus_read_data() to read multiples bytes on the data line

2020-08-21 Thread Philippe Mathieu-Daudé

Add a sdbus_read_data() method to read multiple bytes on the
data line of a SD bus.
We might improve the tracing later, for now keep logging each
byte individually.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20200814092346.21825-7-f4...@amsat.org>
---
 include/hw/sd/sd.h |  9 +
 hw/sd/core.c   | 15 +++
 2 files changed, 24 insertions(+)

diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 3ae3e8939b3..ac02d61a7a0 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -184,6 +184,15 @@ uint8_t sdbus_read_byte(SDBus *sd);
  * Write multiple bytes of data on the data lines of a SD bus.
  */
 void sdbus_write_data(SDBus *sdbus, const void *buf, size_t length);
+/**
+ * Read data from a SD bus.
+ * @sdbus: bus
+ * @buf: buffer to read data into
+ * @length: number of bytes to read
+ *
+ * Read multiple bytes of data on the data lines of a SD bus.
+ */
+void sdbus_read_data(SDBus *sdbus, void *buf, size_t length);
 bool sdbus_data_ready(SDBus *sd);
 bool sdbus_get_inserted(SDBus *sd);
 bool sdbus_get_readonly(SDBus *sd);
diff --git a/hw/sd/core.c b/hw/sd/core.c
index 9c2781ebf96..957d116f1a7 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -144,6 +144,21 @@ uint8_t sdbus_read_byte(SDBus *sdbus)
 return value;
 }
 
+void sdbus_read_data(SDBus *sdbus, void *buf, size_t length)
+{
+SDState *card = get_card(sdbus);
+uint8_t *data = buf;
+
+if (card) {
+SDCardClass *sc = SD_CARD_GET_CLASS(card);
+
+for (size_t i = 0; i < length; i++) {
+data[i] = sc->read_byte(card);
+trace_sdbus_read(sdbus_name(sdbus), data[i]);
+}
+}
+}
+
 bool sdbus_data_ready(SDBus *sdbus)
 {
 SDState *card = get_card(sdbus);
-- 
2.26.2

[PULL 15/23] hw/sd: Rename read/write_data() as read/write_byte()

2020-08-21 Thread Philippe Mathieu-Daudé

The read/write_data() methods write do a single byte access
on the data line of a SD card. Rename them as read/write_byte().
Add some documentation (not in "hw/sd/sdcard_legacy.h" which we
are going to remove soon).

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20200814092346.21825-2-f4...@amsat.org>
---
 include/hw/sd/sd.h| 19 +--
 include/hw/sd/sdcard_legacy.h |  4 ++--
 hw/sd/core.c  |  4 ++--
 hw/sd/omap_mmc.c  |  8 
 hw/sd/sd.c| 16 
 5 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 8767ab817c1..b58b5a19afe 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -104,8 +104,23 @@ typedef struct {
 /*< public >*/
 
 int (*do_command)(SDState *sd, SDRequest *req, uint8_t *response);
-void (*write_data)(SDState *sd, uint8_t value);
-uint8_t (*read_data)(SDState *sd);
+/**
+ * Write a byte to a SD card.
+ * @sd: card
+ * @value: byte to write
+ *
+ * Write a byte on the data lines of a SD card.
+ */
+void (*write_byte)(SDState *sd, uint8_t value);
+/**
+ * Read a byte from a SD card.
+ * @sd: card
+ *
+ * Read a byte from the data lines of a SD card.
+ *
+ * Return: byte value read
+ */
+uint8_t (*read_byte)(SDState *sd);
 bool (*data_ready)(SDState *sd);
 void (*set_voltage)(SDState *sd, uint16_t millivolts);
 uint8_t (*get_dat_lines)(SDState *sd);
diff --git a/include/hw/sd/sdcard_legacy.h b/include/hw/sd/sdcard_legacy.h
index 8681f8089ba..0dc38895551 100644
--- a/include/hw/sd/sdcard_legacy.h
+++ b/include/hw/sd/sdcard_legacy.h
@@ -34,8 +34,8 @@
 /* Legacy functions to be used only by non-qdevified callers */
 SDState *sd_init(BlockBackend *blk, bool is_spi);
 int sd_do_command(SDState *card, SDRequest *request, uint8_t *response);
-void sd_write_data(SDState *card, uint8_t value);
-uint8_t sd_read_data(SDState *card);
+void sd_write_byte(SDState *card, uint8_t value);
+uint8_t sd_read_byte(SDState *card);
 void sd_set_cb(SDState *card, qemu_irq readonly, qemu_irq insert);
 
 /* sd_enable should not be used -- it is only used on the nseries boards,
diff --git a/hw/sd/core.c b/hw/sd/core.c
index abec48bccb8..79d96576ead 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -110,7 +110,7 @@ void sdbus_write_data(SDBus *sdbus, uint8_t value)
 if (card) {
 SDCardClass *sc = SD_CARD_GET_CLASS(card);
 
-sc->write_data(card, value);
+sc->write_byte(card, value);
 }
 }
 
@@ -122,7 +122,7 @@ uint8_t sdbus_read_data(SDBus *sdbus)
 if (card) {
 SDCardClass *sc = SD_CARD_GET_CLASS(card);
 
-value = sc->read_data(card);
+value = sc->read_byte(card);
 }
 trace_sdbus_read(sdbus_name(sdbus), value);
 
diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
index 7d33c59226a..1f946908fe1 100644
--- a/hw/sd/omap_mmc.c
+++ b/hw/sd/omap_mmc.c
@@ -232,10 +232,10 @@ static void omap_mmc_transfer(struct omap_mmc_s *host)
 if (host->fifo_len > host->af_level)
 break;
 
-value = sd_read_data(host->card);
+value = sd_read_byte(host->card);
 host->fifo[(host->fifo_start + host->fifo_len) & 31] = value;
 if (-- host->blen_counter) {
-value = sd_read_data(host->card);
+value = sd_read_byte(host->card);
 host->fifo[(host->fifo_start + host->fifo_len) & 31] |=
 value << 8;
 host->blen_counter --;
@@ -247,10 +247,10 @@ static void omap_mmc_transfer(struct omap_mmc_s *host)
 break;
 
 value = host->fifo[host->fifo_start] & 0xff;
-sd_write_data(host->card, value);
+sd_write_byte(host->card, value);
 if (-- host->blen_counter) {
 value = host->fifo[host->fifo_start] >> 8;
-sd_write_data(host->card, value);
+sd_write_byte(host->card, value);
 host->blen_counter --;
 }
 
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 5c6f5c94f3d..7c9d956f113 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -1809,7 +1809,7 @@ static void sd_blk_write(SDState *sd, uint64_t addr, 
uint32_t len)
 #define APP_READ_BLOCK(a, len) memset(sd->data, 0xec, len)
 #define APP_WRITE_BLOCK(a, len)
 
-void sd_write_data(SDState *sd, uint8_t value)
+void sd_write_byte(SDState *sd, uint8_t value)
 {
 int i;
 
@@ -1818,7 +1818,7 @@ void sd_write_data(SDState *sd, uint8_t value)
 
 if (sd->state != sd_receivingdata_state) {
 qemu_log_mask(LOG_GUEST_ERROR,
-  "sd_write_data: not in Receiving-Data state\n");
+  "%s: not in Receiving-Data state\n", __func__);
 return;
 }
 
@@ -1940,7 +1940,7 @@ void sd_write_data(SDState *sd, uint8_t value)
 break;
 
 default:
-

[PULL 18/23] hw/sd: Add sdbus_write_data() to write multiples bytes on the data line

2020-08-21 Thread Philippe Mathieu-Daudé

Add a sdbus_write_data() method to write multiple bytes on the
data line of a SD bus.
We might improve the tracing later, for now keep logging each
byte individually.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20200814092346.21825-5-f4...@amsat.org>
---
 include/hw/sd/sd.h |  9 +
 hw/sd/core.c   | 15 +++
 2 files changed, 24 insertions(+)

diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 14ffc7f4758..3ae3e8939b3 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -175,6 +175,15 @@ void sdbus_write_byte(SDBus *sd, uint8_t value);
  * Return: byte value read
  */
 uint8_t sdbus_read_byte(SDBus *sd);
+/**
+ * Write data to a SD bus.
+ * @sdbus: bus
+ * @buf: data to write
+ * @length: number of bytes to write
+ *
+ * Write multiple bytes of data on the data lines of a SD bus.
+ */
+void sdbus_write_data(SDBus *sdbus, const void *buf, size_t length);
 bool sdbus_data_ready(SDBus *sd);
 bool sdbus_get_inserted(SDBus *sd);
 bool sdbus_get_readonly(SDBus *sd);
diff --git a/hw/sd/core.c b/hw/sd/core.c
index a3b620b802b..9c2781ebf96 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -114,6 +114,21 @@ void sdbus_write_byte(SDBus *sdbus, uint8_t value)
 }
 }
 
+void sdbus_write_data(SDBus *sdbus, const void *buf, size_t length)
+{
+SDState *card = get_card(sdbus);
+const uint8_t *data = buf;
+
+if (card) {
+SDCardClass *sc = SD_CARD_GET_CLASS(card);
+
+for (size_t i = 0; i < length; i++) {
+trace_sdbus_write(sdbus_name(sdbus), data[i]);
+sc->write_byte(card, data[i]);
+}
+}
+}
+
 uint8_t sdbus_read_byte(SDBus *sdbus)
 {
 SDState *card = get_card(sdbus);
-- 
2.26.2

[PULL 16/23] hw/sd: Rename sdbus_write_data() as sdbus_write_byte()

2020-08-21 Thread Philippe Mathieu-Daudé

The sdbus_write_data() method do a single byte access on the data
line of a SD bus. Rename it as sdbus_write_byte() and document it.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20200814092346.21825-3-f4...@amsat.org>
---
 include/hw/sd/sd.h|  9 -
 hw/sd/allwinner-sdhost.c  | 10 +-
 hw/sd/bcm2835_sdhost.c|  2 +-
 hw/sd/core.c  |  2 +-
 hw/sd/milkymist-memcard.c |  8 
 hw/sd/pl181.c |  2 +-
 hw/sd/pxa2xx_mmci.c   |  2 +-
 hw/sd/sdhci.c |  8 
 8 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index b58b5a19afe..1e5ac955d05 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -158,7 +158,14 @@ void sdbus_set_voltage(SDBus *sdbus, uint16_t millivolts);
 uint8_t sdbus_get_dat_lines(SDBus *sdbus);
 bool sdbus_get_cmd_line(SDBus *sdbus);
 int sdbus_do_command(SDBus *sd, SDRequest *req, uint8_t *response);
-void sdbus_write_data(SDBus *sd, uint8_t value);
+/**
+ * Write a byte to a SD bus.
+ * @sd: bus
+ * @value: byte to write
+ *
+ * Write a byte on the data lines of a SD bus.
+ */
+void sdbus_write_byte(SDBus *sd, uint8_t value);
 uint8_t sdbus_read_data(SDBus *sd);
 bool sdbus_data_ready(SDBus *sd);
 bool sdbus_get_inserted(SDBus *sd);
diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c
index f404e1fdb45..e05e8a3864c 100644
--- a/hw/sd/allwinner-sdhost.c
+++ b/hw/sd/allwinner-sdhost.c
@@ -335,7 +335,7 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState 
*s,
   buf, buf_bytes);
 
 for (uint32_t i = 0; i < buf_bytes; i++) {
-sdbus_write_data(&s->sdbus, buf[i]);
+sdbus_write_byte(&s->sdbus, buf[i]);
 }
 
 /* Read from SD bus */
@@ -654,10 +654,10 @@ static void allwinner_sdhost_write(void *opaque, hwaddr 
offset,
 s->startbit_detect = value;
 break;
 case REG_SD_FIFO:  /* Read/Write FIFO */
-sdbus_write_data(&s->sdbus, value & 0xff);
-sdbus_write_data(&s->sdbus, (value >> 8) & 0xff);
-sdbus_write_data(&s->sdbus, (value >> 16) & 0xff);
-sdbus_write_data(&s->sdbus, (value >> 24) & 0xff);
+sdbus_write_byte(&s->sdbus, value & 0xff);
+sdbus_write_byte(&s->sdbus, (value >> 8) & 0xff);
+sdbus_write_byte(&s->sdbus, (value >> 16) & 0xff);
+sdbus_write_byte(&s->sdbus, (value >> 24) & 0xff);
 allwinner_sdhost_update_transfer_cnt(s, sizeof(uint32_t));
 allwinner_sdhost_auto_stop(s);
 allwinner_sdhost_update_irq(s);
diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c
index 4a80fbcc861..16aba7cc92b 100644
--- a/hw/sd/bcm2835_sdhost.c
+++ b/hw/sd/bcm2835_sdhost.c
@@ -223,7 +223,7 @@ static void bcm2835_sdhost_fifo_run(BCM2835SDHostState *s)
 }
 n--;
 s->datacnt--;
-sdbus_write_data(&s->sdbus, value & 0xff);
+sdbus_write_byte(&s->sdbus, value & 0xff);
 value >>= 8;
 }
 }
diff --git a/hw/sd/core.c b/hw/sd/core.c
index 79d96576ead..13b5ca03169 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -102,7 +102,7 @@ int sdbus_do_command(SDBus *sdbus, SDRequest *req, uint8_t 
*response)
 return 0;
 }
 
-void sdbus_write_data(SDBus *sdbus, uint8_t value)
+void sdbus_write_byte(SDBus *sdbus, uint8_t value)
 {
 SDState *card = get_card(sdbus);
 
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index e9f5db5e22d..4128109c047 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -209,10 +209,10 @@ static void memcard_write(void *opaque, hwaddr addr, 
uint64_t value,
 if (!s->enabled) {
 break;
 }
-sdbus_write_data(&s->sdbus, (value >> 24) & 0xff);
-sdbus_write_data(&s->sdbus, (value >> 16) & 0xff);
-sdbus_write_data(&s->sdbus, (value >> 8) & 0xff);
-sdbus_write_data(&s->sdbus, value & 0xff);
+sdbus_write_byte(&s->sdbus, (value >> 24) & 0xff);
+sdbus_write_byte(&s->sdbus, (value >> 16) & 0xff);
+sdbus_write_byte(&s->sdbus, (value >> 8) & 0xff);
+sdbus_write_byte(&s->sdbus, value & 0xff);
 break;
 case R_ENABLE:
 s->regs[addr] = value;
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 574500ce600..771bae193f5 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -244,7 +244,7 @@ static void pl181_fifo_run(PL181State *s)
 }
 n--;
 s->datacnt--;
-sdbus_write_data(&s->sdbus, value & 0xff);
+sdbus_write_byte(&s->sdbus, value & 0xff);
 value >>= 8;
 }
 }
diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
index 2996a2ef177..07ddc2eba3e 100644
--- a/hw/sd/pxa2xx_mmci.c
+++ b/hw/sd/pxa2xx_mmci.c
@@ -184,7 +184,7 @@ static void pxa2xx_mmci_fifo_update(

[PULL 08/23] hw/sd/pl181: Add TODO to use Fifo32 API

2020-08-21 Thread Philippe Mathieu-Daudé

Add TODO to use Fifo32 API from "qemu/fifo32.h".

Signed-off-by: Philippe Mathieu-Daudé 
Acked-by: Peter Maydell 
Message-Id: <20200705204630.4133-4-f4...@amsat.org>
---
 hw/sd/pl181.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 3fc2cdd71a1..86219c851d3 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -57,7 +57,7 @@ typedef struct PL181State {
http://www.arm.linux.org.uk/developer/patches/viewpatch.php?id=4446/1
  */
 int32_t linux_hack;
-uint32_t fifo[PL181_FIFO_LEN];
+uint32_t fifo[PL181_FIFO_LEN]; /* TODO use Fifo32 */
 qemu_irq irq[2];
 /* GPIO outputs for 'card is readonly' and 'card inserted' */
 qemu_irq cardstatus[2];
-- 
2.26.2

[PULL 07/23] hw/sd/pl181: Rename pl181_send_command() as pl181_do_command()

2020-08-21 Thread Philippe Mathieu-Daudé

pl181_send_command() do a bus transaction (send or receive),
rename it as pl181_do_command().

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Acked-by: Peter Maydell 
Message-Id: <20200705204630.4133-3-f4...@amsat.org>
---
 hw/sd/pl181.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 649386ec3d1..3fc2cdd71a1 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -173,7 +173,7 @@ static uint32_t pl181_fifo_pop(PL181State *s)
 return value;
 }
 
-static void pl181_send_command(PL181State *s)
+static void pl181_do_command(PL181State *s)
 {
 SDRequest request;
 uint8_t response[16];
@@ -402,7 +402,7 @@ static void pl181_write(void *opaque, hwaddr offset,
 qemu_log_mask(LOG_UNIMP,
   "pl181: Pending commands not implemented\n");
 } else {
-pl181_send_command(s);
+pl181_do_command(s);
 pl181_fifo_run(s);
 }
 /* The command has completed one way or the other.  */
-- 
2.26.2

[PULL 09/23] hw/sd/pl181: Use named GPIOs

2020-08-21 Thread Philippe Mathieu-Daudé

To make the code easier to manage/review/use, rename the
cardstatus[0] variable as 'card_readonly' and name the GPIO
"card-read-only".
Similarly with cardstatus[1], renamed as 'card_inserted' and
name its GPIO "card-inserted".

Adapt the users accordingly by using the qdev_init_gpio_out_named()
function.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Acked-by: Peter Maydell 
Message-Id: <20200705204630.4133-6-f4...@amsat.org>
---
 hw/arm/integratorcp.c | 4 ++--
 hw/arm/realview.c | 4 ++--
 hw/arm/vexpress.c | 4 ++--
 hw/sd/pl181.c | 8 +---
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
index f304c2b4f03..16c4d750a4f 100644
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -645,9 +645,9 @@ static void integratorcp_init(MachineState *machine)
 sysbus_create_simple(TYPE_INTEGRATOR_DEBUG, 0x1a00, 0);
 
 dev = sysbus_create_varargs("pl181", 0x1c00, pic[23], pic[24], NULL);
-qdev_connect_gpio_out(dev, 0,
+qdev_connect_gpio_out_named(dev, "card-read-only", 0,
   qdev_get_gpio_in_named(icp, ICP_GPIO_MMC_WPROT, 0));
-qdev_connect_gpio_out(dev, 1,
+qdev_connect_gpio_out_named(dev, "card-inserted", 0,
   qdev_get_gpio_in_named(icp, ICP_GPIO_MMC_CARDIN, 0));
 sysbus_create_varargs("pl041", 0x1d00, pic[25], NULL);
 
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
index c1ff172b136..3e2360c261f 100644
--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -234,8 +234,8 @@ static void realview_init(MachineState *machine,
 mmc_irq[1] = qemu_irq_split(
 qdev_get_gpio_in(sysctl, ARM_SYSCTL_GPIO_MMC_CARDIN),
 qemu_irq_invert(qdev_get_gpio_in(gpio2, 0)));
-qdev_connect_gpio_out(dev, 0, mmc_irq[0]);
-qdev_connect_gpio_out(dev, 1, mmc_irq[1]);
+qdev_connect_gpio_out_named(dev, "card-read-only", 0, mmc_irq[0]);
+qdev_connect_gpio_out_named(dev, "card-inserted", 0, mmc_irq[1]);
 
 sysbus_create_simple("pl031", 0x10017000, pic[10]);
 
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index 1dc971c34f2..049a0ec2c73 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -624,9 +624,9 @@ static void vexpress_common_init(MachineState *machine)
 
 dev = sysbus_create_varargs("pl181", map[VE_MMCI], pic[9], pic[10], NULL);
 /* Wire up MMC card detect and read-only signals */
-qdev_connect_gpio_out(dev, 0,
+qdev_connect_gpio_out_named(dev, "card-read-only", 0,
   qdev_get_gpio_in(sysctl, ARM_SYSCTL_GPIO_MMC_WPROT));
-qdev_connect_gpio_out(dev, 1,
+qdev_connect_gpio_out_named(dev, "card-inserted", 0,
   qdev_get_gpio_in(sysctl, 
ARM_SYSCTL_GPIO_MMC_CARDIN));
 
 sysbus_create_simple("pl050_keyboard", map[VE_KMI0], pic[12]);
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 86219c851d3..ab4cd733a4d 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -60,7 +60,8 @@ typedef struct PL181State {
 uint32_t fifo[PL181_FIFO_LEN]; /* TODO use Fifo32 */
 qemu_irq irq[2];
 /* GPIO outputs for 'card is readonly' and 'card inserted' */
-qemu_irq cardstatus[2];
+qemu_irq card_readonly;
+qemu_irq card_inserted;
 } PL181State;
 
 static const VMStateDescription vmstate_pl181 = {
@@ -479,7 +480,7 @@ static void pl181_reset(DeviceState *d)
 s->mask[1] = 0;
 
 /* We can assume our GPIO outputs have been wired up now */
-sd_set_cb(s->card, s->cardstatus[0], s->cardstatus[1]);
+sd_set_cb(s->card, s->card_readonly, s->card_inserted);
 /* Since we're still using the legacy SD API the card is not plugged
  * into any bus, and we must reset it manually.
  */
@@ -496,7 +497,8 @@ static void pl181_init(Object *obj)
 sysbus_init_mmio(sbd, &s->iomem);
 sysbus_init_irq(sbd, &s->irq[0]);
 sysbus_init_irq(sbd, &s->irq[1]);
-qdev_init_gpio_out(dev, s->cardstatus, 2);
+qdev_init_gpio_out_named(dev, &s->card_readonly, "card-read-only", 1);
+qdev_init_gpio_out_named(dev, &s->card_inserted, "card-inserted", 1);
 }
 
 static void pl181_realize(DeviceState *dev, Error **errp)
-- 
2.26.2

[PULL 05/23] hw/sd/milkymist: Do not create SD card within the SD host controller

2020-08-21 Thread Philippe Mathieu-Daudé

SD/MMC host controllers provide a SD Bus to plug SD cards,
but don't come with SD card plugged in :) Let the machine/board
model create and plug the SD cards when required.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Message-Id: <20200705211016.15241-5-f4...@amsat.org>
---
 hw/lm32/milkymist.c   | 13 +
 hw/sd/milkymist-memcard.c | 55 +++
 2 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/hw/lm32/milkymist.c b/hw/lm32/milkymist.c
index 469e3c43225..9f8fe9fef15 100644
--- a/hw/lm32/milkymist.c
+++ b/hw/lm32/milkymist.c
@@ -34,6 +34,7 @@
 #include "elf.h"
 #include "milkymist-hw.h"
 #include "hw/display/milkymist_tmu2.h"
+#include "hw/sd/sd.h"
 #include "lm32.h"
 #include "exec/address-spaces.h"
 #include "qemu/cutils.h"
@@ -83,11 +84,23 @@ static void main_cpu_reset(void *opaque)
 static DeviceState *milkymist_memcard_create(hwaddr base)
 {
 DeviceState *dev;
+DriveInfo *dinfo;
 
 dev = qdev_new("milkymist-memcard");
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
 
+dinfo = drive_get_next(IF_SD);
+if (dinfo) {
+DeviceState *card;
+
+card = qdev_new(TYPE_SD_CARD);
+qdev_prop_set_drive_err(card, "drive", blk_by_legacy_dinfo(dinfo),
+&error_fatal);
+qdev_realize_and_unref(card, qdev_get_child_bus(dev, "sd-bus"),
+   &error_fatal);
+}
+
 return dev;
 }
 
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 747c5c6136b..e9f5db5e22d 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -66,6 +66,8 @@ enum {
 #define MILKYMIST_MEMCARD(obj) \
 OBJECT_CHECK(MilkymistMemcardState, (obj), TYPE_MILKYMIST_MEMCARD)
 
+#define TYPE_MILKYMIST_SDBUS "milkymist-sdbus"
+
 struct MilkymistMemcardState {
 SysBusDevice parent_obj;
 
@@ -253,6 +255,19 @@ static void milkymist_memcard_reset(DeviceState *d)
 }
 }
 
+static void milkymist_memcard_set_readonly(DeviceState *dev, bool level)
+{
+qemu_log_mask(LOG_UNIMP,
+  "milkymist_memcard: read-only mode not supported\n");
+}
+
+static void milkymist_memcard_set_inserted(DeviceState *dev, bool level)
+{
+MilkymistMemcardState *s = MILKYMIST_MEMCARD(dev);
+
+s->enabled = !!level;
+}
+
 static void milkymist_memcard_init(Object *obj)
 {
 MilkymistMemcardState *s = MILKYMIST_MEMCARD(obj);
@@ -266,27 +281,6 @@ static void milkymist_memcard_init(Object *obj)
 DEVICE(obj), "sd-bus");
 }
 
-static void milkymist_memcard_realize(DeviceState *dev, Error **errp)
-{
-MilkymistMemcardState *s = MILKYMIST_MEMCARD(dev);
-DeviceState *carddev;
-BlockBackend *blk;
-DriveInfo *dinfo;
-Error *err = NULL;
-
-/* Create and plug in the sd card */
-/* FIXME use a qdev drive property instead of drive_get_next() */
-dinfo = drive_get_next(IF_SD);
-blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
-carddev = qdev_new(TYPE_SD_CARD);
-qdev_prop_set_drive(carddev, "drive", blk);
-if (!qdev_realize_and_unref(carddev, BUS(&s->sdbus), &err)) {
-error_propagate_prepend(errp, err, "failed to init SD card");
-return;
-}
-s->enabled = blk && blk_is_inserted(blk);
-}
-
 static const VMStateDescription vmstate_milkymist_memcard = {
 .name = "milkymist-memcard",
 .version_id = 1,
@@ -308,10 +302,9 @@ static void milkymist_memcard_class_init(ObjectClass 
*klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
 
-dc->realize = milkymist_memcard_realize;
 dc->reset = milkymist_memcard_reset;
 dc->vmsd = &vmstate_milkymist_memcard;
-/* Reason: init() method uses drive_get_next() */
+/* Reason: output IRQs should be wired up */
 dc->user_creatable = false;
 }
 
@@ -323,9 +316,25 @@ static const TypeInfo milkymist_memcard_info = {
 .class_init= milkymist_memcard_class_init,
 };
 
+static void milkymist_sdbus_class_init(ObjectClass *klass, void *data)
+{
+SDBusClass *sbc = SD_BUS_CLASS(klass);
+
+sbc->set_inserted = milkymist_memcard_set_inserted;
+sbc->set_readonly = milkymist_memcard_set_readonly;
+}
+
+static const TypeInfo milkymist_sdbus_info = {
+.name = TYPE_MILKYMIST_SDBUS,
+.parent = TYPE_SD_BUS,
+.instance_size = sizeof(SDBus),
+.class_init = milkymist_sdbus_class_init,
+};
+
 static void milkymist_memcard_register_types(void)
 {
 type_register_static(&milkymist_memcard_info);
+type_register_static(&milkymist_sdbus_info);
 }
 
 type_init(milkymist_memcard_register_types)
-- 
2.26.2

[PULL 06/23] hw/sd/pl181: Replace fprintf(stderr, "*\n") with error_report()

2020-08-21 Thread Philippe Mathieu-Daudé

From: Alistair Francis 

Replace a large number of the fprintf(stderr, "*\n" calls with
error_report(). The functions were renamed with these commands and then
compiler issues where manually fixed.

find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N;N;N;N;N;N; {s|fprintf(stderr, 
"\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N;N;N;N;N; {s|fprintf(stderr, 
"\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N;N;N; {s|fprintf(stderr, 
"\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N;N; {s|fprintf(stderr, 
"\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N; {s|fprintf(stderr, 
"\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N; {s|fprintf(stderr, 
"\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N; {s|fprintf(stderr, 
"\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' 
\
{} +
find ./* -type f -exec sed -i \
'N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +

Some lines where then manually tweaked to pass checkpatch.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Alistair Francis 
Message-Id: 
<488ba8d4c562ea44119de8ea0f385a898bd8fa1e.1513790495.git.alistair.fran...@xilinx.com>
Signed-off-by: Philippe Mathieu-Daudé 
Acked-by: Peter Maydell 
---
 hw/sd/pl181.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 2b3776a6a0f..649386ec3d1 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -15,6 +15,7 @@
 #include "hw/sd/sd.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 
 //#define DEBUG_PL181 1
@@ -148,7 +149,7 @@ static void pl181_fifo_push(PL181State *s, uint32_t value)
 int n;
 
 if (s->fifo_len == PL181_FIFO_LEN) {
-fprintf(stderr, "pl181: FIFO overflow\n");
+error_report("%s: FIFO overflow", __func__);
 return;
 }
 n = (s->fifo_pos + s->fifo_len) & (PL181_FIFO_LEN - 1);
@@ -162,7 +163,7 @@ static uint32_t pl181_fifo_pop(PL181State *s)
 uint32_t value;
 
 if (s->fifo_len == 0) {
-fprintf(stderr, "pl181: FIFO underflow\n");
+error_report("%s: FIFO underflow", __func__);
 return 0;
 }
 value = s->fifo[s->fifo_pos];
-- 
2.26.2

[PULL 04/23] hw/sd/milkymist: Create the SDBus at init()

2020-08-21 Thread Philippe Mathieu-Daudé

We don't need to wait until realize() to create the SDBus,
create it in init() directly.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Message-Id: <20200705211016.15241-4-f4...@amsat.org>
---
 hw/sd/milkymist-memcard.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 11f61294fcf..747c5c6136b 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -261,6 +261,9 @@ static void milkymist_memcard_init(Object *obj)
 memory_region_init_io(&s->regs_region, OBJECT(s), &memcard_mmio_ops, s,
 "milkymist-memcard", R_MAX * 4);
 sysbus_init_mmio(dev, &s->regs_region);
+
+qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS,
+DEVICE(obj), "sd-bus");
 }
 
 static void milkymist_memcard_realize(DeviceState *dev, Error **errp)
@@ -271,9 +274,6 @@ static void milkymist_memcard_realize(DeviceState *dev, 
Error **errp)
 DriveInfo *dinfo;
 Error *err = NULL;
 
-qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS,
-dev, "sd-bus");
-
 /* Create and plug in the sd card */
 /* FIXME use a qdev drive property instead of drive_get_next() */
 dinfo = drive_get_next(IF_SD);
-- 
2.26.2

[PULL 03/23] hw/lm32/milkymist: Un-inline milkymist_memcard_create()

2020-08-21 Thread Philippe Mathieu-Daudé

As we will modify milkymist_memcard_create(), move it first
to the source file where it is used.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Message-Id: <20200705211016.15241-2-f4...@amsat.org>
---
 hw/lm32/milkymist-hw.h | 11 ---
 hw/lm32/milkymist.c| 11 +++
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/hw/lm32/milkymist-hw.h b/hw/lm32/milkymist-hw.h
index 05e2c2a5a75..5dca5d52f57 100644
--- a/hw/lm32/milkymist-hw.h
+++ b/hw/lm32/milkymist-hw.h
@@ -31,17 +31,6 @@ static inline DeviceState *milkymist_hpdmc_create(hwaddr 
base)
 return dev;
 }
 
-static inline DeviceState *milkymist_memcard_create(hwaddr base)
-{
-DeviceState *dev;
-
-dev = qdev_new("milkymist-memcard");
-sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-
-return dev;
-}
-
 static inline DeviceState *milkymist_vgafb_create(hwaddr base,
 uint32_t fb_offset, uint32_t fb_mask)
 {
diff --git a/hw/lm32/milkymist.c b/hw/lm32/milkymist.c
index 85913bb68b6..469e3c43225 100644
--- a/hw/lm32/milkymist.c
+++ b/hw/lm32/milkymist.c
@@ -80,6 +80,17 @@ static void main_cpu_reset(void *opaque)
 env->deba = reset_info->flash_base;
 }
 
+static DeviceState *milkymist_memcard_create(hwaddr base)
+{
+DeviceState *dev;
+
+dev = qdev_new("milkymist-memcard");
+sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
+
+return dev;
+}
+
 static void
 milkymist_init(MachineState *machine)
 {
-- 
2.26.2

[PULL 02/23] hw/sd/pxa2xx_mmci: Trivial simplification

2020-08-21 Thread Philippe Mathieu-Daudé

Avoid declaring PXA2xxMMCIState local variable, return it directly.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Reviewed-by: Laurent Vivier 
Acked-by: Peter Maydell 
Message-Id: <20200705213350.24725-3-f4...@amsat.org>
---
 hw/sd/pxa2xx_mmci.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
index 9482b9212dd..2996a2ef177 100644
--- a/hw/sd/pxa2xx_mmci.c
+++ b/hw/sd/pxa2xx_mmci.c
@@ -480,10 +480,8 @@ PXA2xxMMCIState *pxa2xx_mmci_init(MemoryRegion *sysmem,
 {
 DeviceState *dev;
 SysBusDevice *sbd;
-PXA2xxMMCIState *s;
 
 dev = qdev_new(TYPE_PXA2XX_MMCI);
-s = PXA2XX_MMCI(dev);
 sbd = SYS_BUS_DEVICE(dev);
 sysbus_mmio_map(sbd, 0, base);
 sysbus_connect_irq(sbd, 0, irq);
@@ -491,7 +489,7 @@ PXA2xxMMCIState *pxa2xx_mmci_init(MemoryRegion *sysmem,
 qdev_connect_gpio_out_named(dev, "tx-dma", 0, tx_dma);
 sysbus_realize_and_unref(sbd, &error_fatal);
 
-return s;
+return PXA2XX_MMCI(dev);
 }
 
 static void pxa2xx_mmci_set_inserted(DeviceState *dev, bool inserted)
-- 
2.26.2

[PULL 23/23] hw/sd: Correct the maximum size of a Standard Capacity SD Memory Card

2020-08-21 Thread Philippe Mathieu-Daudé

From: Bin Meng 

Per the SD spec, Standard Capacity SD Memory Card (SDSC) supports
capacity up to and including 2 GiB.

Fixes: 2d7adea4fe ("hw/sd: Support SDHC size cards")
Signed-off-by: Bin Meng 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Sai Pavan Boddu 
Message-Id: <1598021136-49525-2-git-send-email-bmeng...@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/sd/sd.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 805e21fc883..483c4f17204 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -51,6 +51,8 @@
 
 //#define DEBUG_SD 1
 
+#define SDSC_MAX_CAPACITY   (2 * GiB)
+
 typedef enum {
 sd_r0 = 0,/* no response */
 sd_r1,/* normal response command */
@@ -314,7 +316,7 @@ static void sd_ocr_powerup(void *opaque)
 /* card power-up OK */
 sd->ocr = FIELD_DP32(sd->ocr, OCR, CARD_POWER_UP, 1);
 
-if (sd->size > 1 * GiB) {
+if (sd->size > SDSC_MAX_CAPACITY) {
 sd->ocr = FIELD_DP32(sd->ocr, OCR, CARD_CAPACITY, 1);
 }
 }
@@ -386,7 +388,7 @@ static void sd_set_csd(SDState *sd, uint64_t size)
 uint32_t sectsize = (1 << (SECTOR_SHIFT + 1)) - 1;
 uint32_t wpsize = (1 << (WPGROUP_SHIFT + 1)) - 1;
 
-if (size <= 1 * GiB) { /* Standard Capacity SD */
+if (size <= SDSC_MAX_CAPACITY) { /* Standard Capacity SD */
 sd->csd[0] = 0x00; /* CSD structure */
 sd->csd[1] = 0x26; /* Data read access-time-1 */
 sd->csd[2] = 0x00; /* Data read access-time-2 */
-- 
2.26.2

[PULL 14/23] hw/sd: Move sdcard legacy API to 'hw/sd/sdcard_legacy.h'

2020-08-21 Thread Philippe Mathieu-Daudé

omap_mmc.c is the last device left using the legacy sdcard API.
Move the prototype declarations into a separate header, to
make it clear this is a legacy API.

Reviewed-by: Alistair Francis 
Message-Id: <20180216022933.10945-8-f4...@amsat.org>
Signed-off-by: Philippe Mathieu-Daudé 
Acked-by: Peter Maydell 
---
 include/hw/sd/sd.h| 16 ---
 include/hw/sd/sdcard_legacy.h | 50 +++
 hw/sd/omap_mmc.c  |  2 +-
 hw/sd/sd.c|  1 +
 4 files changed, 52 insertions(+), 17 deletions(-)
 create mode 100644 include/hw/sd/sdcard_legacy.h

diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index ace350e0e83..8767ab817c1 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -136,22 +136,6 @@ typedef struct {
 void (*set_readonly)(DeviceState *dev, bool readonly);
 } SDBusClass;
 
-/* Legacy functions to be used only by non-qdevified callers */
-SDState *sd_init(BlockBackend *bs, bool is_spi);
-int sd_do_command(SDState *sd, SDRequest *req,
-  uint8_t *response);
-void sd_write_data(SDState *sd, uint8_t value);
-uint8_t sd_read_data(SDState *sd);
-void sd_set_cb(SDState *sd, qemu_irq readonly, qemu_irq insert);
-/* sd_enable should not be used -- it is only used on the nseries boards,
- * where it is part of a broken implementation of the MMC card slot switch
- * (there should be two card slots which are multiplexed to a single MMC
- * controller, but instead we model it with one card and controller and
- * disable the card when the second slot is selected, so it looks like the
- * second slot is always empty).
- */
-void sd_enable(SDState *sd, bool enable);
-
 /* Functions to be used by qdevified callers (working via
  * an SDBus rather than directly with SDState)
  */
diff --git a/include/hw/sd/sdcard_legacy.h b/include/hw/sd/sdcard_legacy.h
new file mode 100644
index 000..8681f8089ba
--- /dev/null
+++ b/include/hw/sd/sdcard_legacy.h
@@ -0,0 +1,50 @@
+/*
+ * SD Memory Card emulation (deprecated legacy API)
+ *
+ * Copyright (c) 2006 Andrzej Zaborowski  
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in
+ *the documentation and/or other materials provided with the
+ *distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef HW_SDCARD_LEGACY_H
+#define HW_SDCARD_LEGACY_H
+
+#include "hw/sd/sd.h"
+
+/* Legacy functions to be used only by non-qdevified callers */
+SDState *sd_init(BlockBackend *blk, bool is_spi);
+int sd_do_command(SDState *card, SDRequest *request, uint8_t *response);
+void sd_write_data(SDState *card, uint8_t value);
+uint8_t sd_read_data(SDState *card);
+void sd_set_cb(SDState *card, qemu_irq readonly, qemu_irq insert);
+
+/* sd_enable should not be used -- it is only used on the nseries boards,
+ * where it is part of a broken implementation of the MMC card slot switch
+ * (there should be two card slots which are multiplexed to a single MMC
+ * controller, but instead we model it with one card and controller and
+ * disable the card when the second slot is selected, so it looks like the
+ * second slot is always empty).
+ */
+void sd_enable(SDState *card, bool enable);
+
+#endif /* HW_SDCARD_LEGACY_H */
diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
index 4088a8a80bc..7d33c59226a 100644
--- a/hw/sd/omap_mmc.c
+++ b/hw/sd/omap_mmc.c
@@ -23,7 +23,7 @@
 #include "qemu/log.h"
 #include "hw/irq.h"
 #include "hw/arm/omap.h"
-#include "hw/sd/sd.h"
+#include "hw/sd/sdcard_legacy.h"
 
 struct omap_mmc_s {
 qemu_irq irq;
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index a5ae5dccbe5..5c6f5c94f3d 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -37,6 +37,7 @@
 #include "hw/registerfields.h"
 #include "sysemu/block-backend.h"
 #include "hw/sd/sd.h"
+#include "hw/sd/sdcard_legacy.h"
 #include "migration/vmstate.h"
 #include "qapi/error.h"
 #include "qemu/bitmap.h"
-- 
2.

[PULL 13/23] hw/sd/sdcard: Make sd_data_ready() static

2020-08-21 Thread Philippe Mathieu-Daudé

sd_data_ready() belongs to the legacy API. As its last user has
been converted to the SDBus API, make it static.

Reviewed-by: Alistair Francis 
Message-Id: <20180216022933.10945-7-f4...@amsat.org>
Signed-off-by: Philippe Mathieu-Daudé 
Acked-by: Peter Maydell 
---
 include/hw/sd/sd.h | 1 -
 hw/sd/sd.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index a84b8e274a3..ace350e0e83 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -143,7 +143,6 @@ int sd_do_command(SDState *sd, SDRequest *req,
 void sd_write_data(SDState *sd, uint8_t value);
 uint8_t sd_read_data(SDState *sd);
 void sd_set_cb(SDState *sd, qemu_irq readonly, qemu_irq insert);
-bool sd_data_ready(SDState *sd);
 /* sd_enable should not be used -- it is only used on the nseries boards,
  * where it is part of a broken implementation of the MMC card slot switch
  * (there should be two card slots which are multiplexed to a single MMC
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index fad9cf1ee7a..a5ae5dccbe5 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -2082,7 +2082,7 @@ uint8_t sd_read_data(SDState *sd)
 return ret;
 }
 
-bool sd_data_ready(SDState *sd)
+static bool sd_data_ready(SDState *sd)
 {
 return sd->state == sd_sendingdata_state;
 }
-- 
2.26.2

[PULL 00/23] SD/MMC patches for 2020-08-21

2020-08-21 Thread Philippe Mathieu-Daudé

The following changes since commit d6f83a72a7db94a3ede9f5cc4fb39f9c8e89f954:

  Merge remote-tracking branch 'remotes/philmd-gitlab/tags/acceptance-testing=
-20200812' into staging (2020-08-21 14:51:43 +0100)

are available in the Git repository at:

  https://gitlab.com/philmd/qemu.git tags/sd-next-20200821

for you to fetch changes up to 6d2d4069c47e23b9e3913f9c8204fd0edcb99fb3:

  hw/sd: Correct the maximum size of a Standard Capacity SD Memory Card (2020=
-08-21 16:49:22 +0200)


SD/MMC patches

- Convert legacy SD host controller to the SDBus API
- Move legacy API to a separate "sdcard_legacy.h" header
- Introduce methods to access multiple bytes on SDBus data lines
- Fix 'switch function' group location
- Fix SDSC maximum card size (2GB)

CI jobs result:
  https://gitlab.com/philmd/qemu/-/pipelines/180605963


Alistair Francis (1):
  hw/sd/pl181: Replace fprintf(stderr, "*\n") with error_report()

Bin Meng (2):
  hw/sd: Fix incorrect populated function switch status data structure
  hw/sd: Correct the maximum size of a Standard Capacity SD Memory Card

Philippe Mathieu-Daud=C3=A9 (20):
  hw/sd/pxa2xx_mmci: Do not create SD card within the SD host controller
  hw/sd/pxa2xx_mmci: Trivial simplification
  hw/lm32/milkymist: Un-inline milkymist_memcard_create()
  hw/sd/milkymist: Create the SDBus at init()
  hw/sd/milkymist: Do not create SD card within the SD host controller
  hw/sd/pl181: Rename pl181_send_command() as pl181_do_command()
  hw/sd/pl181: Add TODO to use Fifo32 API
  hw/sd/pl181: Use named GPIOs
  hw/sd/pl181: Expose a SDBus and connect the SDCard to it
  hw/sd/pl181: Do not create SD card within the SD host controller
  hw/sd/pl181: Replace disabled fprintf()s by trace events
  hw/sd/sdcard: Make sd_data_ready() static
  hw/sd: Move sdcard legacy API to 'hw/sd/sdcard_legacy.h'
  hw/sd: Rename read/write_data() as read/write_byte()
  hw/sd: Rename sdbus_write_data() as sdbus_write_byte()
  hw/sd: Rename sdbus_read_data() as sdbus_read_byte()
  hw/sd: Add sdbus_write_data() to write multiples bytes on the data
line
  hw/sd: Use sdbus_write_data() instead of sdbus_write_byte when
possible
  hw/sd: Add sdbus_read_data() to read multiples bytes on the data line
  hw/sd: Use sdbus_read_data() instead of sdbus_read_byte() when
possible

 hw/lm32/milkymist-hw.h|  11 
 include/hw/arm/pxa.h  |   3 +-
 include/hw/sd/sd.h|  73 +++---
 include/hw/sd/sdcard_legacy.h |  50 +++
 hw/arm/integratorcp.c |  17 +-
 hw/arm/pxa2xx.c   |  39 +---
 hw/arm/realview.c |  16 -
 hw/arm/versatilepb.c  |  26 +++-
 hw/arm/vexpress.c |  15 -
 hw/lm32/milkymist.c   |  24 
 hw/sd/allwinner-sdhost.c  |  24 +++-
 hw/sd/bcm2835_sdhost.c|   4 +-
 hw/sd/core.c  |  38 ++--
 hw/sd/milkymist-memcard.c |  71 --
 hw/sd/omap_mmc.c  |  10 +--
 hw/sd/pl181.c | 111 +++---
 hw/sd/pxa2xx_mmci.c   |  19 ++
 hw/sd/sd.c|  28 +
 hw/sd/sdhci.c |  46 --
 hw/sd/ssi-sd.c|   2 +-
 hw/sd/trace-events|  10 +++
 21 files changed, 415 insertions(+), 222 deletions(-)
 create mode 100644 include/hw/sd/sdcard_legacy.h

--=20
2.26.2

[PULL 11/23] hw/sd/pl181: Do not create SD card within the SD host controller

2020-08-21 Thread Philippe Mathieu-Daudé

SD/MMC host controllers provide a SD Bus to plug SD cards,
but don't come with SD card plugged in :) Let the machine/board
model create and plug the SD cards when required.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Reviewed-by: Peter Maydell 
Acked-by: Peter Maydell 
Message-Id: <20200705204630.4133-8-f4...@amsat.org>
---
 hw/arm/integratorcp.c | 13 +
 hw/arm/realview.c | 12 
 hw/arm/versatilepb.c  | 26 --
 hw/arm/vexpress.c | 11 +++
 hw/sd/pl181.c | 19 +--
 5 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
index 16c4d750a4f..fe7c2b9d4b1 100644
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -25,6 +25,7 @@
 #include "hw/char/pl011.h"
 #include "hw/hw.h"
 #include "hw/irq.h"
+#include "hw/sd/sd.h"
 
 #define TYPE_INTEGRATOR_CM "integrator_core"
 #define INTEGRATOR_CM(obj) \
@@ -595,6 +596,7 @@ static void integratorcp_init(MachineState *machine)
 MemoryRegion *ram_alias = g_new(MemoryRegion, 1);
 qemu_irq pic[32];
 DeviceState *dev, *sic, *icp;
+DriveInfo *dinfo;
 int i;
 
 cpuobj = object_new(machine->cpu_type);
@@ -649,6 +651,17 @@ static void integratorcp_init(MachineState *machine)
   qdev_get_gpio_in_named(icp, ICP_GPIO_MMC_WPROT, 0));
 qdev_connect_gpio_out_named(dev, "card-inserted", 0,
   qdev_get_gpio_in_named(icp, ICP_GPIO_MMC_CARDIN, 0));
+dinfo = drive_get_next(IF_SD);
+if (dinfo) {
+DeviceState *card;
+
+card = qdev_new(TYPE_SD_CARD);
+qdev_prop_set_drive_err(card, "drive", blk_by_legacy_dinfo(dinfo),
+&error_fatal);
+qdev_realize_and_unref(card, qdev_get_child_bus(dev, "sd-bus"),
+   &error_fatal);
+}
+
 sysbus_create_varargs("pl041", 0x1d00, pic[25], NULL);
 
 if (nd_table[0].used)
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
index 3e2360c261f..5f1f36b15cd 100644
--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -27,6 +27,7 @@
 #include "hw/intc/realview_gic.h"
 #include "hw/irq.h"
 #include "hw/i2c/arm_sbcon_i2c.h"
+#include "hw/sd/sd.h"
 
 #define SMP_BOOT_ADDR 0xe000
 #define SMP_BOOTREG_ADDR 0x1030
@@ -69,6 +70,7 @@ static void realview_init(MachineState *machine,
 qemu_irq mmc_irq[2];
 PCIBus *pci_bus = NULL;
 NICInfo *nd;
+DriveInfo *dinfo;
 I2CBus *i2c;
 int n;
 unsigned int smp_cpus = machine->smp.cpus;
@@ -236,6 +238,16 @@ static void realview_init(MachineState *machine,
 qemu_irq_invert(qdev_get_gpio_in(gpio2, 0)));
 qdev_connect_gpio_out_named(dev, "card-read-only", 0, mmc_irq[0]);
 qdev_connect_gpio_out_named(dev, "card-inserted", 0, mmc_irq[1]);
+dinfo = drive_get_next(IF_SD);
+if (dinfo) {
+DeviceState *card;
+
+card = qdev_new(TYPE_SD_CARD);
+qdev_prop_set_drive_err(card, "drive", blk_by_legacy_dinfo(dinfo),
+&error_fatal);
+qdev_realize_and_unref(card, qdev_get_child_bus(dev, "sd-bus"),
+   &error_fatal);
+}
 
 sysbus_create_simple("pl031", 0x10017000, pic[10]);
 
diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
index 9dc93182b6b..9127579984f 100644
--- a/hw/arm/versatilepb.c
+++ b/hw/arm/versatilepb.c
@@ -25,6 +25,7 @@
 #include "hw/block/flash.h"
 #include "qemu/error-report.h"
 #include "hw/char/pl011.h"
+#include "hw/sd/sd.h"
 
 #define VERSATILE_FLASH_ADDR 0x3400
 #define VERSATILE_FLASH_SIZE (64 * 1024 * 1024)
@@ -309,8 +310,29 @@ static void versatile_init(MachineState *machine, int 
board_id)
 /* Wire up the mux control signals from the SYS_CLCD register */
 qdev_connect_gpio_out(sysctl, 0, qdev_get_gpio_in(dev, 0));
 
-sysbus_create_varargs("pl181", 0x10005000, sic[22], sic[1], NULL);
-sysbus_create_varargs("pl181", 0x1000b000, sic[23], sic[2], NULL);
+dev = sysbus_create_varargs("pl181", 0x10005000, sic[22], sic[1], NULL);
+dinfo = drive_get_next(IF_SD);
+if (dinfo) {
+DeviceState *card;
+
+card = qdev_new(TYPE_SD_CARD);
+qdev_prop_set_drive_err(card, "drive", blk_by_legacy_dinfo(dinfo),
+&error_fatal);
+qdev_realize_and_unref(card, qdev_get_child_bus(dev, "sd-bus"),
+   &error_fatal);
+}
+
+dev = sysbus_create_varargs("pl181", 0x1000b000, sic[23], sic[2], NULL);
+dinfo = drive_get_next(IF_SD);
+if (dinfo) {
+DeviceState *card;
+
+card = qdev_new(TYPE_SD_CARD);
+qdev_prop_set_drive_err(card, "drive", blk_by_legacy_dinfo(dinfo),
+&error_fatal);
+qdev_realize_and_unref(card, qdev_get_child_bus(dev, "sd-bus"),
+   &error_fatal);
+}
 
 /* Add PL031 Real Time Clock. */
 sysbus_create_sim

[PULL 10/23] hw/sd/pl181: Expose a SDBus and connect the SDCard to it

2020-08-21 Thread Philippe Mathieu-Daudé

Convert the controller to the SDBus API:
- add the a TYPE_PL181_BUS object of type TYPE_SD_BUS,
- adapt the SDBusClass set_inserted/set_readonly handlers
- create the bus in the PL181 controller
- switch legacy sd_*() API to the sdbus_*() API.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Acked-by: Peter Maydell 
Message-Id: <20200705204630.4133-7-f4...@amsat.org>
---
 hw/sd/pl181.c | 67 +++
 1 file changed, 51 insertions(+), 16 deletions(-)

diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index ab4cd733a4d..f6de06ece82 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -17,6 +17,7 @@
 #include "qemu/module.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "hw/qdev-properties.h"
 
 //#define DEBUG_PL181 1
 
@@ -32,11 +33,13 @@ do { printf("pl181: " fmt , ## __VA_ARGS__); } while (0)
 #define TYPE_PL181 "pl181"
 #define PL181(obj) OBJECT_CHECK(PL181State, (obj), TYPE_PL181)
 
+#define TYPE_PL181_BUS "pl181-bus"
+
 typedef struct PL181State {
 SysBusDevice parent_obj;
 
 MemoryRegion iomem;
-SDState *card;
+SDBus sdbus;
 uint32_t clock;
 uint32_t power;
 uint32_t cmdarg;
@@ -183,7 +186,7 @@ static void pl181_do_command(PL181State *s)
 request.cmd = s->cmd & PL181_CMD_INDEX;
 request.arg = s->cmdarg;
 DPRINTF("Command %d %08x\n", request.cmd, request.arg);
-rlen = sd_do_command(s->card, &request, response);
+rlen = sdbus_do_command(&s->sdbus, &request, response);
 if (rlen < 0)
 goto error;
 if (s->cmd & PL181_CMD_RESPONSE) {
@@ -224,12 +227,12 @@ static void pl181_fifo_run(PL181State *s)
 int is_read;
 
 is_read = (s->datactrl & PL181_DATA_DIRECTION) != 0;
-if (s->datacnt != 0 && (!is_read || sd_data_ready(s->card))
+if (s->datacnt != 0 && (!is_read || sdbus_data_ready(&s->sdbus))
 && !s->linux_hack) {
 if (is_read) {
 n = 0;
 while (s->datacnt && s->fifo_len < PL181_FIFO_LEN) {
-value |= (uint32_t)sd_read_data(s->card) << (n * 8);
+value |= (uint32_t)sdbus_read_data(&s->sdbus) << (n * 8);
 s->datacnt--;
 n++;
 if (n == 4) {
@@ -250,7 +253,7 @@ static void pl181_fifo_run(PL181State *s)
 }
 n--;
 s->datacnt--;
-sd_write_data(s->card, value & 0xff);
+sdbus_write_data(&s->sdbus, value & 0xff);
 value >>= 8;
 }
 }
@@ -456,6 +459,20 @@ static const MemoryRegionOps pl181_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static void pl181_set_readonly(DeviceState *dev, bool level)
+{
+PL181State *s = (PL181State *)dev;
+
+qemu_set_irq(s->card_readonly, level);
+}
+
+static void pl181_set_inserted(DeviceState *dev, bool level)
+{
+PL181State *s = (PL181State *)dev;
+
+qemu_set_irq(s->card_inserted, level);
+}
+
 static void pl181_reset(DeviceState *d)
 {
 PL181State *s = PL181(d);
@@ -479,12 +496,9 @@ static void pl181_reset(DeviceState *d)
 s->mask[0] = 0;
 s->mask[1] = 0;
 
-/* We can assume our GPIO outputs have been wired up now */
-sd_set_cb(s->card, s->card_readonly, s->card_inserted);
-/* Since we're still using the legacy SD API the card is not plugged
- * into any bus, and we must reset it manually.
- */
-device_legacy_reset(DEVICE(s->card));
+/* Reset other state based on current card insertion/readonly status */
+pl181_set_inserted(DEVICE(s), sdbus_get_inserted(&s->sdbus));
+pl181_set_readonly(DEVICE(s), sdbus_get_readonly(&s->sdbus));
 }
 
 static void pl181_init(Object *obj)
@@ -499,19 +513,24 @@ static void pl181_init(Object *obj)
 sysbus_init_irq(sbd, &s->irq[1]);
 qdev_init_gpio_out_named(dev, &s->card_readonly, "card-read-only", 1);
 qdev_init_gpio_out_named(dev, &s->card_inserted, "card-inserted", 1);
+
+qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
+TYPE_PL181_BUS, dev, "sd-bus");
 }
 
 static void pl181_realize(DeviceState *dev, Error **errp)
 {
-PL181State *s = PL181(dev);
+DeviceState *card;
 DriveInfo *dinfo;
 
 /* FIXME use a qdev drive property instead of drive_get_next() */
+card = qdev_new(TYPE_SD_CARD);
 dinfo = drive_get_next(IF_SD);
-s->card = sd_init(dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, false);
-if (s->card == NULL) {
-error_setg(errp, "sd_init failed");
-}
+qdev_prop_set_drive_err(card, "drive", blk_by_legacy_dinfo(dinfo),
+&error_fatal);
+qdev_realize_and_unref(card,
+   qdev_get_child_bus(dev, "sd-bus"),
+   &error_fatal);
 }
 
 static void pl181_class_init(ObjectClass *klass, void *data)
@@ -533,9 +552,25 @@ static const TypeInfo pl181_info = {
 .class_init= pl181_class_init,
 };
 
+static void pl181_bus_class_init(ObjectClass *klas

Re: [PATCH v5 08/10] iotests.py: add verify_o_direct helper

2020-08-21 Thread Nir Soffer

On Fri, Aug 21, 2020 at 5:12 PM Vladimir Sementsov-Ogievskiy
 wrote:
>
> Add python notrun-helper similar to _check_o_direct for bash tests.
> To be used in the following commit.
>
> Suggested-by: Nir Soffer 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  tests/qemu-iotests/iotests.py | 12 
>  1 file changed, 12 insertions(+)
>
> diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
> index 717b5b652c..7f1aa187a9 100644
> --- a/tests/qemu-iotests/iotests.py
> +++ b/tests/qemu-iotests/iotests.py
> @@ -29,6 +29,7 @@ import struct
>  import subprocess
>  import sys
>  import time
> +import errno
>  from typing import (Any, Callable, Dict, Iterable,
>  List, Optional, Sequence, Tuple, TypeVar)
>  import unittest
> @@ -1083,6 +1084,17 @@ def _verify_aio_mode(supported_aio_modes: 
> Sequence[str] = ()) -> None:
>  if supported_aio_modes and (aiomode not in supported_aio_modes):
>  notrun('not suitable for this aio mode: %s' % aiomode)
>
> +def verify_o_direct() -> None:
> +with FilePath('test_o_direct') as f:
> +try:
> +fd = os.open(f, os.O_DIRECT | os.O_CREAT | os.O_RDWR)
> +except OSError as e:
> +if e.errno != errno.EINVAL:
> +raise
> +notrun(f'file system at {test_dir} does not support O_DIRECT')
> +else:
> +os.close(fd)
> +
>  def supports_quorum():
>  return 'quorum' in qemu_img_pipe('--help')
>
> --
> 2.21.3
>

Reviewed-by: Nir Soffer

[PULL 01/23] hw/sd/pxa2xx_mmci: Do not create SD card within the SD host controller

2020-08-21 Thread Philippe Mathieu-Daudé

SD/MMC host controllers provide a SD Bus to plug SD cards,
but don't come with SD card plugged in :)

The machine/board object is where the SD cards are created.
Since the PXA2xx is not qdevified, for now create the cards
in pxa270_init() which is the SoC model.
In the future we will move this to the board model.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alistair Francis 
Reviewed-by: Peter Maydell 
Acked-by: Peter Maydell 
Message-Id: <20200705213350.24725-2-f4...@amsat.org>
---
 include/hw/arm/pxa.h |  3 +--
 hw/arm/pxa2xx.c  | 39 +--
 hw/sd/pxa2xx_mmci.c  | 11 ++-
 3 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/include/hw/arm/pxa.h b/include/hw/arm/pxa.h
index 8843e5f9107..d99b6192daf 100644
--- a/include/hw/arm/pxa.h
+++ b/include/hw/arm/pxa.h
@@ -89,8 +89,7 @@ void pxa2xx_lcd_vsync_notifier(PXA2xxLCDState *s, qemu_irq 
handler);
 typedef struct PXA2xxMMCIState PXA2xxMMCIState;
 PXA2xxMMCIState *pxa2xx_mmci_init(MemoryRegion *sysmem,
 hwaddr base,
-BlockBackend *blk, qemu_irq irq,
-qemu_irq rx_dma, qemu_irq tx_dma);
+qemu_irq irq, qemu_irq rx_dma, qemu_irq tx_dma);
 void pxa2xx_mmci_handlers(PXA2xxMMCIState *s, qemu_irq readonly,
 qemu_irq coverswitch);
 
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 6203c4cfe0b..20fa201dd57 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -22,6 +22,7 @@
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/ssi/ssi.h"
+#include "hw/sd/sd.h"
 #include "chardev/char-fe.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/qtest.h"
@@ -2136,15 +2137,24 @@ PXA2xxState *pxa270_init(MemoryRegion *address_space,
 
 s->gpio = pxa2xx_gpio_init(0x40e0, s->cpu, s->pic, 121);
 
-dinfo = drive_get(IF_SD, 0, 0);
-if (!dinfo && !qtest_enabled()) {
-warn_report("missing SecureDigital device");
-}
 s->mmc = pxa2xx_mmci_init(address_space, 0x4110,
-dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
 qdev_get_gpio_in(s->pic, PXA2XX_PIC_MMC),
 qdev_get_gpio_in(s->dma, PXA2XX_RX_RQ_MMCI),
 qdev_get_gpio_in(s->dma, PXA2XX_TX_RQ_MMCI));
+dinfo = drive_get(IF_SD, 0, 0);
+if (dinfo) {
+DeviceState *carddev;
+
+/* Create and plug in the sd card */
+carddev = qdev_new(TYPE_SD_CARD);
+qdev_prop_set_drive_err(carddev, "drive",
+blk_by_legacy_dinfo(dinfo), &error_fatal);
+qdev_realize_and_unref(carddev, qdev_get_child_bus(DEVICE(s->mmc),
+   "sd-bus"),
+   &error_fatal);
+} else if (!qtest_enabled()) {
+warn_report("missing SecureDigital device");
+}
 
 for (i = 0; pxa270_serial[i].io_base; i++) {
 if (serial_hd(i)) {
@@ -2260,15 +2270,24 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, 
unsigned int sdram_size)
 
 s->gpio = pxa2xx_gpio_init(0x40e0, s->cpu, s->pic, 85);
 
-dinfo = drive_get(IF_SD, 0, 0);
-if (!dinfo && !qtest_enabled()) {
-warn_report("missing SecureDigital device");
-}
 s->mmc = pxa2xx_mmci_init(address_space, 0x4110,
-dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
 qdev_get_gpio_in(s->pic, PXA2XX_PIC_MMC),
 qdev_get_gpio_in(s->dma, PXA2XX_RX_RQ_MMCI),
 qdev_get_gpio_in(s->dma, PXA2XX_TX_RQ_MMCI));
+dinfo = drive_get(IF_SD, 0, 0);
+if (dinfo) {
+DeviceState *carddev;
+
+/* Create and plug in the sd card */
+carddev = qdev_new(TYPE_SD_CARD);
+qdev_prop_set_drive_err(carddev, "drive",
+blk_by_legacy_dinfo(dinfo), &error_fatal);
+qdev_realize_and_unref(carddev, qdev_get_child_bus(DEVICE(s->mmc),
+   "sd-bus"),
+   &error_fatal);
+} else if (!qtest_enabled()) {
+warn_report("missing SecureDigital device");
+}
 
 for (i = 0; pxa255_serial[i].io_base; i++) {
 if (serial_hd(i)) {
diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
index 68bed24480e..9482b9212dd 100644
--- a/hw/sd/pxa2xx_mmci.c
+++ b/hw/sd/pxa2xx_mmci.c
@@ -476,10 +476,9 @@ static const MemoryRegionOps pxa2xx_mmci_ops = {
 
 PXA2xxMMCIState *pxa2xx_mmci_init(MemoryRegion *sysmem,
 hwaddr base,
-BlockBackend *blk, qemu_irq irq,
-qemu_irq rx_dma, qemu_irq tx_dma)
+qemu_irq irq, qemu_irq rx_dma, qemu_irq tx_dma)
 {
-DeviceState *dev, *carddev;
+DeviceState *dev;
 SysBusDevice *sbd;
 PXA2xxMMCIState *s;
 
@@ -492,12 +491,6 @@ PXA2xxMMCIState *pxa2xx_mmci_init(MemoryRegion *sysmem,
 qdev_connect_gpio_out_named(dev, "tx-dma", 0, tx_dma);
 sysbus_realize_and_unre

[PATCH v4 4/6] util: introduce qemu_open and qemu_create with error reporting

2020-08-21 Thread Daniel P . Berrangé

qemu_open_old() works like open(): set errno and return -1 on failure.
It has even more failure modes, though.  Reporting the error clearly
to users is basically impossible for many of them.

Our standard cure for "errno is too coarse" is the Error object.
Introduce two new helper methods:

  int qemu_open(const char *name, int flags, Error **errp);
  int qemu_create(const char *name, int flags, mode_t mode, Error **errp);

Note that with this design we no longer require or even accept the
O_CREAT flag. Avoiding overloading the two distinct operations
means we can avoid variable arguments which would prevent 'errp' from
being the last argument. It also gives us a guarantee that the 'mode' is
given when creating files, avoiding a latent security bug.

Signed-off-by: Daniel P. Berrangé 
---
 include/qemu/osdep.h |  6 ++
 util/osdep.c | 21 +
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 18333e9006..13a821845b 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -497,7 +497,13 @@ int qemu_madvise(void *addr, size_t len, int advice);
 int qemu_mprotect_rwx(void *addr, size_t size);
 int qemu_mprotect_none(void *addr, size_t size);
 
+/*
+ * Don't introduce new usage of this function, prefer the following
+ * qemu_open/qemu_create that take an "Error **errp"
+ */
 int qemu_open_old(const char *name, int flags, ...);
+int qemu_open(const char *name, int flags, Error **errp);
+int qemu_create(const char *name, int flags, mode_t mode, Error **errp);
 int qemu_close(int fd);
 int qemu_unlink(const char *name);
 #ifndef _WIN32
diff --git a/util/osdep.c b/util/osdep.c
index 9c7118d3cb..a4956fbf6b 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -344,10 +344,7 @@ qemu_open_internal(const char *name, int flags, mode_t 
mode, Error **errp)
 #endif /* ! O_CLOEXEC */
 
 if (ret == -1) {
-const char *action = "open";
-if (flags & O_CREAT) {
-action = "create";
-}
+const char *action = flags & O_CREAT ? "create" : "open";
 error_setg_errno(errp, errno, "Could not %s '%s' flags 0x%x",
  action, name, flags);
 }
@@ -357,6 +354,22 @@ qemu_open_internal(const char *name, int flags, mode_t 
mode, Error **errp)
 }
 
 
+int qemu_open(const char *name, int flags, Error **errp)
+{
+assert(!(flags & O_CREAT));
+
+return qemu_open_internal(name, flags, 0, errp);
+}
+
+
+int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
+{
+assert(!(flags & O_CREAT));
+
+return qemu_open_internal(name, flags | O_CREAT, mode, errp);
+}
+
+
 int qemu_open_old(const char *name, int flags, ...)
 {
 va_list ap;
-- 
2.26.2

[PATCH v4 5/6] util: give a specific error message when O_DIRECT doesn't work

2020-08-21 Thread Daniel P . Berrangé

A common error scenario is to tell QEMU to use O_DIRECT in combination
with a filesystem that doesn't support it. To aid users to diagnosing
their mistake we want to provide a clear error message when this happens.

Reviewed-by: Eric Blake 
Signed-off-by: Daniel P. Berrangé 
---
 util/osdep.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/util/osdep.c b/util/osdep.c
index a4956fbf6b..6c24985f7a 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -345,6 +345,19 @@ qemu_open_internal(const char *name, int flags, mode_t 
mode, Error **errp)
 
 if (ret == -1) {
 const char *action = flags & O_CREAT ? "create" : "open";
+#ifdef O_DIRECT
+if (errno == EINVAL && (flags & O_DIRECT)) {
+ret = open(name, flags & ~O_DIRECT, mode);
+if (ret != -1) {
+close(ret);
+error_setg(errp, "Could not %s '%s' flags 0x%x: "
+   "filesystem does not support O_DIRECT",
+   action, name, flags);
+errno = EINVAL; /* close() clobbered earlier errno */
+return -1;
+}
+}
+#endif /* O_DIRECT */
 error_setg_errno(errp, errno, "Could not %s '%s' flags 0x%x",
  action, name, flags);
 }
-- 
2.26.2

[PATCH v4 6/6] block/fileb: switch to use qemu_open/qemu_create for improved errors

2020-08-21 Thread Daniel P . Berrangé

Currently at startup if using cache=none on a filesystem lacking
O_DIRECT such as tmpfs, at startup QEMU prints

qemu-system-x86_64: -drive file=/tmp/foo.img,cache=none: file system may not 
support O_DIRECT
qemu-system-x86_64: -drive file=/tmp/foo.img,cache=none: Could not open 
'/tmp/foo.img': Invalid argument

while at QMP level the hint is missing, so QEMU reports just

  "error": {
  "class": "GenericError",
  "desc": "Could not open '/tmp/foo.img': Invalid argument"
  }

which is close to useless for the end user trying to figure out what
they did wrong.

With this change at startup QEMU prints

qemu-system-x86_64: -drive file=/tmp/foo.img,cache=none: Unable to open 
'/tmp/foo.img' flags 0x4000: filesystem does not support O_DIRECT

while at the QMP level QEMU reports a massively more informative

  "error": {
 "class": "GenericError",
 "desc": "Unable to open '/tmp/foo.img' flags 0x4002: filesystem does not 
support O_DIRECT"
  }

Reviewed-by: Eric Blake 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Daniel P. Berrangé 
---
 block/file-posix.c| 18 +++---
 block/file-win32.c|  6 ++
 tests/qemu-iotests/051.out|  4 ++--
 tests/qemu-iotests/051.pc.out |  4 ++--
 tests/qemu-iotests/061.out|  2 +-
 tests/qemu-iotests/069.out|  2 +-
 tests/qemu-iotests/082.out|  4 ++--
 tests/qemu-iotests/111.out|  2 +-
 tests/qemu-iotests/226.out|  6 +++---
 tests/qemu-iotests/232.out| 12 ++--
 tests/qemu-iotests/244.out|  6 +++---
 11 files changed, 30 insertions(+), 36 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index bac2566f10..c63926d592 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -630,11 +630,10 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 raw_parse_flags(bdrv_flags, &s->open_flags, false);
 
 s->fd = -1;
-fd = qemu_open_old(filename, s->open_flags, 0644);
+fd = qemu_open(filename, s->open_flags, errp);
 ret = fd < 0 ? -errno : 0;
 
 if (ret < 0) {
-error_setg_file_open(errp, -ret, filename);
 if (ret == -EROFS) {
 ret = -EACCES;
 }
@@ -1032,15 +1031,13 @@ static int raw_reconfigure_getfd(BlockDriverState *bs, 
int flags,
 }
 }
 
-/* If we cannot use fcntl, or fcntl failed, fall back to qemu_open_old() */
+/* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
 if (fd == -1) {
 const char *normalized_filename = bs->filename;
 ret = raw_normalize_devicepath(&normalized_filename, errp);
 if (ret >= 0) {
-assert(!(*open_flags & O_CREAT));
-fd = qemu_open_old(normalized_filename, *open_flags);
+fd = qemu_open(normalized_filename, *open_flags, errp);
 if (fd == -1) {
-error_setg_errno(errp, errno, "Could not reopen file");
 return -1;
 }
 }
@@ -2411,10 +2408,9 @@ raw_co_create(BlockdevCreateOptions *options, Error 
**errp)
 }
 
 /* Create file */
-fd = qemu_open_old(file_opts->filename, O_RDWR | O_CREAT | O_BINARY, 0644);
+fd = qemu_create(file_opts->filename, O_RDWR | O_BINARY, 0644, errp);
 if (fd < 0) {
 result = -errno;
-error_setg_errno(errp, -result, "Could not create file");
 goto out;
 }
 
@@ -3335,7 +3331,7 @@ static bool setup_cdrom(char *bsd_path, Error **errp)
 for (index = 0; index < num_of_test_partitions; index++) {
 snprintf(test_partition, sizeof(test_partition), "%ss%d", bsd_path,
  index);
-fd = qemu_open_old(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE);
+fd = qemu_open(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE, 
NULL);
 if (fd >= 0) {
 partition_found = true;
 qemu_close(fd);
@@ -3653,7 +3649,7 @@ static int cdrom_probe_device(const char *filename)
 int prio = 0;
 struct stat st;
 
-fd = qemu_open_old(filename, O_RDONLY | O_NONBLOCK);
+fd = qemu_open(filename, O_RDONLY | O_NONBLOCK, NULL);
 if (fd < 0) {
 goto out;
 }
@@ -3787,7 +3783,7 @@ static int cdrom_reopen(BlockDriverState *bs)
  */
 if (s->fd >= 0)
 qemu_close(s->fd);
-fd = qemu_open_old(bs->filename, s->open_flags, 0644);
+fd = qemu_open(bs->filename, s->open_flags, NULL);
 if (fd < 0) {
 s->fd = -1;
 return -EIO;
diff --git a/block/file-win32.c b/block/file-win32.c
index 8c1845830e..1a31f8a5ba 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -576,11 +576,9 @@ static int raw_co_create(BlockdevCreateOptions *options, 
Error **errp)
 return -EINVAL;
 }
 
-fd = qemu_open_old(file_opts->filename,
-   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
-   0644);
+fd = qemu_create(file_opts->filename, O_WRONLY | O_TRUNC | O_BINARY,
+ 0644, errp);
 if (fd < 0) {
-e

[PATCH v4 1/6] util: rename qemu_open() to qemu_open_old()

2020-08-21 Thread Daniel P . Berrangé

We want to introduce a new version of qemu_open() that uses an Error
object for reporting problems and make this it the preferred interface.
Rename the existing method to release the namespace for the new impl.

Reviewed-by: Eric Blake 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Daniel P. Berrangé 
---
 accel/kvm/kvm-all.c|  2 +-
 backends/rng-random.c  |  2 +-
 backends/tpm/tpm_passthrough.c |  8 
 block/file-posix.c | 14 +++---
 block/file-win32.c |  5 +++--
 block/vvfat.c  |  5 +++--
 chardev/char-fd.c  |  2 +-
 chardev/char-pipe.c|  6 +++---
 chardev/char.c |  2 +-
 dump/dump.c|  2 +-
 hw/s390x/s390-skeys.c  |  2 +-
 hw/usb/host-libusb.c   |  2 +-
 hw/vfio/common.c   |  4 ++--
 include/qemu/osdep.h   |  2 +-
 io/channel-file.c  |  2 +-
 net/vhost-vdpa.c   |  2 +-
 os-posix.c |  2 +-
 qga/channel-posix.c|  4 ++--
 qga/commands-posix.c   |  6 +++---
 target/arm/kvm.c   |  2 +-
 ui/console.c   |  2 +-
 util/osdep.c   |  2 +-
 util/oslib-posix.c |  2 +-
 23 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 63ef6af9a1..ad8b315b35 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2013,7 +2013,7 @@ static int kvm_init(MachineState *ms)
 #endif
 QLIST_INIT(&s->kvm_parked_vcpus);
 s->vmfd = -1;
-s->fd = qemu_open("/dev/kvm", O_RDWR);
+s->fd = qemu_open_old("/dev/kvm", O_RDWR);
 if (s->fd == -1) {
 fprintf(stderr, "Could not access KVM kernel module: %m\n");
 ret = -errno;
diff --git a/backends/rng-random.c b/backends/rng-random.c
index 32998d8ee7..245b12ab24 100644
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -75,7 +75,7 @@ static void rng_random_opened(RngBackend *b, Error **errp)
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"filename", "a valid filename");
 } else {
-s->fd = qemu_open(s->filename, O_RDONLY | O_NONBLOCK);
+s->fd = qemu_open_old(s->filename, O_RDONLY | O_NONBLOCK);
 if (s->fd == -1) {
 error_setg_file_open(errp, errno, s->filename);
 }
diff --git a/backends/tpm/tpm_passthrough.c b/backends/tpm/tpm_passthrough.c
index 7403807ec4..81e2d8f531 100644
--- a/backends/tpm/tpm_passthrough.c
+++ b/backends/tpm/tpm_passthrough.c
@@ -217,7 +217,7 @@ static int 
tpm_passthrough_open_sysfs_cancel(TPMPassthruState *tpm_pt)
 char path[PATH_MAX];
 
 if (tpm_pt->options->cancel_path) {
-fd = qemu_open(tpm_pt->options->cancel_path, O_WRONLY);
+fd = qemu_open_old(tpm_pt->options->cancel_path, O_WRONLY);
 if (fd < 0) {
 error_report("tpm_passthrough: Could not open TPM cancel path: %s",
  strerror(errno));
@@ -235,11 +235,11 @@ static int 
tpm_passthrough_open_sysfs_cancel(TPMPassthruState *tpm_pt)
 dev++;
 if (snprintf(path, sizeof(path), "/sys/class/tpm/%s/device/cancel",
  dev) < sizeof(path)) {
-fd = qemu_open(path, O_WRONLY);
+fd = qemu_open_old(path, O_WRONLY);
 if (fd < 0) {
 if (snprintf(path, sizeof(path), 
"/sys/class/misc/%s/device/cancel",
  dev) < sizeof(path)) {
-fd = qemu_open(path, O_WRONLY);
+fd = qemu_open_old(path, O_WRONLY);
 }
 }
 }
@@ -271,7 +271,7 @@ tpm_passthrough_handle_device_opts(TPMPassthruState 
*tpm_pt, QemuOpts *opts)
 }
 
 tpm_pt->tpm_dev = value ? value : TPM_PASSTHROUGH_DEFAULT_DEVICE;
-tpm_pt->tpm_fd = qemu_open(tpm_pt->tpm_dev, O_RDWR);
+tpm_pt->tpm_fd = qemu_open_old(tpm_pt->tpm_dev, O_RDWR);
 if (tpm_pt->tpm_fd < 0) {
 error_report("Cannot access TPM device using '%s': %s",
  tpm_pt->tpm_dev, strerror(errno));
diff --git a/block/file-posix.c b/block/file-posix.c
index 9a00d4190a..bac2566f10 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -630,7 +630,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 raw_parse_flags(bdrv_flags, &s->open_flags, false);
 
 s->fd = -1;
-fd = qemu_open(filename, s->open_flags, 0644);
+fd = qemu_open_old(filename, s->open_flags, 0644);
 ret = fd < 0 ? -errno : 0;
 
 if (ret < 0) {
@@ -1032,13 +1032,13 @@ static int raw_reconfigure_getfd(BlockDriverState *bs, 
int flags,
 }
 }
 
-/* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
+/* If we cannot use fcntl, or fcntl failed, fall back to qemu_open_old() */
 if (fd == -1) {
 const char *normalized_filename = bs->filename;
 ret = raw_normalize_devicepath(&normalized_filename, errp);
 if (ret >= 0) {
 assert(!(*open_flags

[PATCH v4 2/6] util: refactor qemu_open_old to split off variadic args handling

2020-08-21 Thread Daniel P . Berrangé

This simple refactoring prepares for future patches. The variadic args
handling is split from the main bulk of the open logic. The duplicated
calls to open() are removed in favour of updating the "flags" variable
to have O_CLOEXEC.

Signed-off-by: Daniel P. Berrangé 
---
 util/osdep.c | 40 +++-
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/util/osdep.c b/util/osdep.c
index 9df1b6adec..9ff92551e7 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 
 /* Needed early for CONFIG_BSD etc. */
 
@@ -282,10 +283,10 @@ int qemu_lock_fd_test(int fd, int64_t start, int64_t len, 
bool exclusive)
 /*
  * Opens a file with FD_CLOEXEC set
  */
-int qemu_open_old(const char *name, int flags, ...)
+static int
+qemu_open_internal(const char *name, int flags, mode_t mode)
 {
 int ret;
-int mode = 0;
 
 #ifndef _WIN32
 const char *fdset_id_str;
@@ -323,22 +324,35 @@ int qemu_open_old(const char *name, int flags, ...)
 }
 #endif
 
-if (flags & O_CREAT) {
-va_list ap;
-
-va_start(ap, flags);
-mode = va_arg(ap, int);
-va_end(ap);
-}
-
 #ifdef O_CLOEXEC
-ret = open(name, flags | O_CLOEXEC, mode);
-#else
+flags |= O_CLOEXEC;
+#endif /* O_CLOEXEC */
+
 ret = open(name, flags, mode);
+
+#ifndef O_CLOEXEC
 if (ret >= 0) {
 qemu_set_cloexec(ret);
 }
-#endif
+#endif /* ! O_CLOEXEC */
+
+return ret;
+}
+
+
+int qemu_open_old(const char *name, int flags, ...)
+{
+va_list ap;
+mode_t mode = 0;
+int ret;
+
+va_start(ap, flags);
+if (flags & O_CREAT) {
+mode = va_arg(ap, int);
+}
+va_end(ap);
+
+ret = qemu_open_internal(name, flags, mode);
 
 #ifdef O_DIRECT
 if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
-- 
2.26.2

[PATCH v4 0/6] block: improve error reporting for unsupported O_DIRECT

2020-08-21 Thread Daniel P . Berrangé

v1: https://lists.gnu.org/archive/html/qemu-devel/2020-07/msg00269.html
v2: https://lists.gnu.org/archive/html/qemu-devel/2020-07/msg00589.html
v3: https://lists.gnu.org/archive/html/qemu-devel/2020-07/msg07098.html

See patch commit messages for rationale

Ideally we would convert other callers of qemu_open to use
qemu_open_err, and eventually remove qemu_open, renaming
qemu_open_err back to qemu_open.  Given soft freeze is just
days away though, I'm hoping this series is simple enough
to get into this release, leaving bigger cleanup for later.

Improved in v4:

 - Use assert() for programmer mistakes
 - Split second patch into three distinct parts
 - Misc typos
 - Improve commit message

Improved in v3:

 - Re-arrange the patches series, so that the conversion to Error
   takes place first, then the improve O_DIRECT reporting
 - Rename existing method to qemu_open_old
 - Use a pair of new methods qemu_open + qemu_create to improve
   arg checking

Improved in v2:

 - Mention that qemu_open_err is preferred over qemu_open
 - Get rid of obsolete error_report call
 - Simplify O_DIRECT handling
 - Fixup iotests for changed error message text

Daniel P. Berrangé (6):
  util: rename qemu_open() to qemu_open_old()
  util: refactor qemu_open_old to split off variadic args handling
  util: add Error object for qemu_open_internal error reporting
  util: introduce qemu_open and qemu_create with error reporting
  util: give a specific error message when O_DIRECT doesn't work
  block/fileb: switch to use qemu_open/qemu_create for improved errors

 accel/kvm/kvm-all.c|  2 +-
 backends/rng-random.c  |  2 +-
 backends/tpm/tpm_passthrough.c |  8 ++--
 block/file-posix.c | 16 +++
 block/file-win32.c |  5 +-
 block/vvfat.c  |  5 +-
 chardev/char-fd.c  |  2 +-
 chardev/char-pipe.c|  6 +--
 chardev/char.c |  2 +-
 dump/dump.c|  2 +-
 hw/s390x/s390-skeys.c  |  2 +-
 hw/usb/host-libusb.c   |  2 +-
 hw/vfio/common.c   |  4 +-
 include/qemu/osdep.h   |  8 +++-
 io/channel-file.c  |  2 +-
 net/vhost-vdpa.c   |  2 +-
 os-posix.c |  2 +-
 qga/channel-posix.c|  4 +-
 qga/commands-posix.c   |  6 +--
 target/arm/kvm.c   |  2 +-
 tests/qemu-iotests/051.out |  4 +-
 tests/qemu-iotests/051.pc.out  |  4 +-
 tests/qemu-iotests/061.out |  2 +-
 tests/qemu-iotests/069.out |  2 +-
 tests/qemu-iotests/082.out |  4 +-
 tests/qemu-iotests/111.out |  2 +-
 tests/qemu-iotests/226.out |  6 +--
 tests/qemu-iotests/232.out | 12 ++---
 tests/qemu-iotests/244.out |  6 +--
 ui/console.c   |  2 +-
 util/osdep.c   | 83 --
 util/oslib-posix.c |  2 +-
 32 files changed, 136 insertions(+), 77 deletions(-)

-- 
2.26.2

[PATCH v4 3/6] util: add Error object for qemu_open_internal error reporting

2020-08-21 Thread Daniel P . Berrangé

Instead of relying on the limited information from errno, we can now
also provide detailed error messages.

Signed-off-by: Daniel P. Berrangé 
---
 util/osdep.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/util/osdep.c b/util/osdep.c
index 9ff92551e7..9c7118d3cb 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -284,7 +284,7 @@ int qemu_lock_fd_test(int fd, int64_t start, int64_t len, 
bool exclusive)
  * Opens a file with FD_CLOEXEC set
  */
 static int
-qemu_open_internal(const char *name, int flags, mode_t mode)
+qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
 {
 int ret;
 
@@ -298,24 +298,31 @@ qemu_open_internal(const char *name, int flags, mode_t 
mode)
 
 fdset_id = qemu_parse_fdset(fdset_id_str);
 if (fdset_id == -1) {
+error_setg(errp, "Could not parse fdset %s", name);
 errno = EINVAL;
 return -1;
 }
 
 fd = monitor_fdset_get_fd(fdset_id, flags);
 if (fd < 0) {
+error_setg_errno(errp, -fd, "Could not acquire FD for %s flags %x",
+ name, flags);
 errno = -fd;
 return -1;
 }
 
 dupfd = qemu_dup_flags(fd, flags);
 if (dupfd == -1) {
+error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
+ name, flags);
 return -1;
 }
 
 ret = monitor_fdset_dup_fd_add(fdset_id, dupfd);
 if (ret == -1) {
 close(dupfd);
+error_setg(errp, "Could not save FD for %s flags %x",
+   name, flags);
 errno = EINVAL;
 return -1;
 }
@@ -336,6 +343,16 @@ qemu_open_internal(const char *name, int flags, mode_t 
mode)
 }
 #endif /* ! O_CLOEXEC */
 
+if (ret == -1) {
+const char *action = "open";
+if (flags & O_CREAT) {
+action = "create";
+}
+error_setg_errno(errp, errno, "Could not %s '%s' flags 0x%x",
+ action, name, flags);
+}
+
+
 return ret;
 }
 
@@ -352,7 +369,7 @@ int qemu_open_old(const char *name, int flags, ...)
 }
 va_end(ap);
 
-ret = qemu_open_internal(name, flags, mode);
+ret = qemu_open_internal(name, flags, mode, NULL);
 
 #ifdef O_DIRECT
 if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
-- 
2.26.2

Re: [PATCH 0/1] qcow2: Skip copy-on-write when allocating a zero cluster

2020-08-21 Thread Brian Foster

On Fri, Aug 21, 2020 at 02:12:32PM +0200, Alberto Garcia wrote:
> On Fri 21 Aug 2020 01:42:52 PM CEST, Alberto Garcia wrote:
> > On Fri 21 Aug 2020 01:05:06 PM CEST, Brian Foster  
> > wrote:
> >>> > 1) off: for every write request QEMU initializes the cluster (64KB)
> >>> > with fallocate(ZERO_RANGE) and then writes the 4KB of data.
> >>> > 
> >>> > 2) off w/o ZERO_RANGE: QEMU writes the 4KB of data and fills the rest
> >>> > of the cluster with zeroes.
> >>> > 
> >>> > 3) metadata: all clusters were allocated when the image was created
> >>> > but they are sparse, QEMU only writes the 4KB of data.
> >>> > 
> >>> > 4) falloc: all clusters were allocated with fallocate() when the image
> >>> > was created, QEMU only writes 4KB of data.
> >>> > 
> >>> > 5) full: all clusters were allocated by writing zeroes to all of them
> >>> > when the image was created, QEMU only writes 4KB of data.
> >>> > 
> >>> > As I said in a previous message I'm not familiar with xfs, but the
> >>> > parts that I don't understand are
> >>> > 
> >>> >- Why is (4) slower than (1)?
> >>> 
> >>> Because fallocate() is a full IO serialisation barrier at the
> >>> filesystem level. If you do:
> >>> 
> >>> fallocate(whole file)
> >>> 
> >>> 
> >>> 
> >>> .
> >>> 
> >>> The IO can run concurrent and does not serialise against anything in
> >>> the filesysetm except unwritten extent conversions at IO completion
> >>> (see answer to next question!)
> >>> 
> >>> However, if you just use (4) you get:
> >>> 
> >>> falloc(64k)
> >>>   
> >>>   
> >>> <4k io>
> >>>   
> >>> falloc(64k)
> >>>   
> >>>   
> >>>   <4k IO completes, converts 4k to written>
> >>>   
> >>> <4k io>
> >>> falloc(64k)
> >>>   
> >>>   
> >>>   <4k IO completes, converts 4k to written>
> >>>   
> >>> <4k io>
> >>>   
> >>> 
> >>
> >> Option 4 is described above as initial file preallocation whereas
> >> option 1 is per 64k cluster prealloc. Prealloc mode mixup aside, Berto
> >> is reporting that the initial file preallocation mode is slower than
> >> the per cluster prealloc mode. Berto, am I following that right?
> 
> After looking more closely at the data I can see that there is a peak of
> ~30K IOPS during the first 5 or 6 seconds and then it suddenly drops to
> ~7K for the rest of the test.
> 
> I was running fio with --ramp_time=5 which ignores the first 5 seconds
> of data in order to let performance settle, but if I remove that I can
> see the effect more clearly. I can observe it with raw files (in 'off'
> and 'prealloc' modes) and qcow2 files in 'prealloc' mode. With qcow2 and
> preallocation=off the performance is stable during the whole test.
> 

That's interesting. I ran your fio command (without --ramp_time and with
--runtime=5m) against a file on XFS (so no qcow2, no zero_range) once
with sparse file with a 64k extent size hint and again with a fully
preallocated 25GB file and I saw similar results in terms of the delta.
This was just against an SSD backed vdisk in my local dev VM, but I saw
~5800 iops for the full preallocation test and ~6200 iops with the
extent size hint.

I do notice an initial iops burst as described for both tests, so I
switched to use a 60s ramp time and 60s runtime. With that longer ramp
up time, I see ~5000 iops with the 64k extent size hint and ~5500 iops
with the full 25GB prealloc. Perhaps the unexpected performance delta
with qcow2 is similarly transient towards the start of the test and the
runtime is short enough that it skews the final results..?

Brian

> Berto
>

Re: [PATCH] util/meson.build: fix fdmon-io_uring build

2020-08-21 Thread Stefano Garzarella

On Fri, Aug 21, 2020 at 06:46:15PM +0200, Philippe Mathieu-Daudé wrote:
> On 8/21/20 6:23 PM, Stefano Garzarella wrote:
> > On Fri, Aug 21, 2020 at 06:12:45PM +0200, Philippe Mathieu-DaudÃƒÂ© wrote:
> >> Hi Stefano,
> >>
> >> On 8/21/20 5:48 PM, Stefano Garzarella wrote:
> >>> libqemuutil.a build fails with this error:
> >>>
> >>>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> >>> `get_sqe':
> >>>   qemu/build/../util/fdmon-io_uring.c:83: undefined reference to 
> >>> `io_uring_get_sqe'
> >>>   /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:92: undefined 
> >>> reference to `io_uring_submit'
> >>>   /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:96: undefined 
> >>> reference to `io_uring_get_sqe'
> >>>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> >>> `fdmon_io_uring_wait':
> >>>   qemu/build/../util/fdmon-io_uring.c:289: undefined reference to 
> >>> `io_uring_submit_and_wait'
> >>>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> >>> `fdmon_io_uring_setup':
> >>>   qemu/build/../util/fdmon-io_uring.c:328: undefined reference to 
> >>> `io_uring_queue_init'
> >>>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> >>> `fdmon_io_uring_destroy':
> >>>   qemu/build/../util/fdmon-io_uring.c:343: undefined reference to 
> >>> `io_uring_queue_exit'
> >>>   collect2: error: ld returned 1 exit status
> >>
> >> Can you add a gitlab job to reproduce this? (Or at least explain
> >> how to reproduce, so we add that job later). Thanks!
> > 
> > I think the only prerequisite is to install 'liburing-devel' package
> > (Fedora 32) on the build system, and then fdmon-io_uring.c will be built.
> 
> Oh easy then, simply add it to PACKAGES in
> tests/docker/dockerfiles/fedora.docker :)

Yeah, I just found it ;-)

I'll send a patch.

Thanks,
Stefano

Re: [PATCH] util/meson.build: fix fdmon-io_uring build

2020-08-21 Thread Philippe Mathieu-Daudé

On 8/21/20 6:23 PM, Stefano Garzarella wrote:
> On Fri, Aug 21, 2020 at 06:12:45PM +0200, Philippe Mathieu-DaudÃƒÂ© wrote:
>> Hi Stefano,
>>
>> On 8/21/20 5:48 PM, Stefano Garzarella wrote:
>>> libqemuutil.a build fails with this error:
>>>
>>>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
>>> `get_sqe':
>>>   qemu/build/../util/fdmon-io_uring.c:83: undefined reference to 
>>> `io_uring_get_sqe'
>>>   /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:92: undefined reference 
>>> to `io_uring_submit'
>>>   /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:96: undefined reference 
>>> to `io_uring_get_sqe'
>>>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
>>> `fdmon_io_uring_wait':
>>>   qemu/build/../util/fdmon-io_uring.c:289: undefined reference to 
>>> `io_uring_submit_and_wait'
>>>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
>>> `fdmon_io_uring_setup':
>>>   qemu/build/../util/fdmon-io_uring.c:328: undefined reference to 
>>> `io_uring_queue_init'
>>>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
>>> `fdmon_io_uring_destroy':
>>>   qemu/build/../util/fdmon-io_uring.c:343: undefined reference to 
>>> `io_uring_queue_exit'
>>>   collect2: error: ld returned 1 exit status
>>
>> Can you add a gitlab job to reproduce this? (Or at least explain
>> how to reproduce, so we add that job later). Thanks!
> 
> I think the only prerequisite is to install 'liburing-devel' package
> (Fedora 32) on the build system, and then fdmon-io_uring.c will be built.

Oh easy then, simply add it to PACKAGES in
tests/docker/dockerfiles/fedora.docker :)

> 
> I'll try to add a gitlab job ;-)
> 
> Thanks,
> Stefano
> 
>>
>>>
>>> This patch fix the issue adding 'linux_io_uring' dependency for
>>> fdmon-io_uring.c
>>>
>>> Fixes: a81df1b68b ("libqemuutil, qapi, trace: convert to meson")
>>> Cc: pbonz...@redhat.com
>>> Signed-off-by: Stefano Garzarella 
>>> ---
>>>  util/meson.build | 2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/util/meson.build b/util/meson.build
>>> index 23b8ad459b..e6b207a99e 100644
>>> --- a/util/meson.build
>>> +++ b/util/meson.build
>>> @@ -4,7 +4,7 @@ util_ss.add(when: 'CONFIG_ATOMIC64', if_false: 
>>> files('atomic64.c'))
>>>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
>>>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
>>>  util_ss.add(when: 'CONFIG_EPOLL_CREATE1', if_true: files('fdmon-epoll.c'))
>>> -util_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: 
>>> files('fdmon-io_uring.c'))
>>> +util_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: 
>>> files('fdmon-io_uring.c'))
>>>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('compatfd.c'))
>>>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('event_notifier-posix.c'))
>>>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('mmap-alloc.c'))
>>>
>>
>

Re: Suspicious QOM types without instance/class size

2020-08-21 Thread Eduardo Habkost

On Fri, Aug 21, 2020 at 09:06:51AM -0700, Alistair Francis wrote:
> On Thu, Aug 20, 2020 at 2:56 PM Eduardo Habkost  wrote:
> >
> > While trying to convert TypeInfo declarations to the new
> > OBJECT_DECLARE* macros, I've stumbled on a few suspicious cases
> > where instance_size or class_size is not set, despite having type
> > checker macros that use a specific type.
> >
> > The ones with "WARNING" are abstract types (maybe not serious if
> > subclasses set the appropriate sizes).  The ones with "ERROR"
> > don't seem to be abstract types.
> >
> 
> > ERROR: hw/core/register.c:328:1: instance_size should be set to 
> > sizeof(RegisterInfo)?
> 
> I'll send a patch out for this one today.
> 
> If you are fixing all of these as part of a series I'm also happy to
> just let you do that.

Feel free to send the fix, and I will include it as part of my
series if necessary.

Note that register_init_block() relies on the fact that
register_init() won't touch any RegisterInfo field except
parent_obj, so this won't be a one line patch.

-- 
Eduardo

Re: [PULL 0/9] Tracing patches

2020-08-21 Thread Peter Maydell

On Thu, 13 Aug 2020 at 08:44, Stefan Hajnoczi  wrote:
>
> The following changes since commit d0ed6a69d399ae193959225cdeaa9382746c91cc:
>
>   Update version for v5.1.0 release (2020-08-11 17:07:03 +0100)
>
> are available in the Git repository at:
>
>   https://github.com/stefanha/qemu.git tags/tracing-pull-request
>
> for you to fetch changes up to bd6c9e56aba2e1b9a307642c72375386afbcf1f2:
>
>   trace-events: Fix attribution of trace points to source (2020-08-12 20:28:5=
> 4 +0100)
>
> 
> Pull request
>
> macOS dtrace support and cleanups.

Hi; this conflicts with the meson buildsystem merge, I'm
afraid -- can you rebase and resend, please?

thanks
-- PMM

Re: [PATCH v2 (BROKEN) 0/6] migration: bring improved savevm/loadvm/delvm to QMP

2020-08-21 Thread Daniel P . Berrangé

On Mon, Jul 27, 2020 at 04:08:37PM +0100, Daniel P. Berrangé wrote:
> A followup to:
> 
>  v1: https://lists.gnu.org/archive/html/qemu-devel/2020-07/msg00866.html

snip

> HELP NEEDED:  this series starts to implement the approach that Kevin
> suggested wrto use of generic jobs.
> 
> When I try to actually run the code though it crashes:
> 
> ERROR:/home/berrange/src/virt/qemu/softmmu/cpus.c:1788:qemu_mutex_unlock_ioth=
> read: assertion failed: (qemu_mutex_iothread_locked())
> Bail out! ERROR:/home/berrange/src/virt/qemu/softmmu/cpus.c:1788:qemu_mutex_u=
> nlock_iothread: assertion failed: (qemu_mutex_iothread_locked())
> 
> Obviously I've missed something related to locking, but I've no idea
> what, so I'm sending this v2 simply as a way to solicit suggestions
> for what I've messed up.

What I've found is

qmp_snapshot_save() is the QMP handler and runs in the main thread, so iothread
lock is held.


This calls job_create() which ends up invoking  snapshot_save_job_run
in a background coroutine, but IIUC  iothread lock is still held when
the coroutine starts.

This then invokes save_snapshot() which invokes qemu_savevm_state


This calls   qemu_mutex_unlock_iothread() and then 
qemu_savevm_state_setup().

Eventually something in the qcow2 code triggers qemu_coroutine_yield()
so control goes back to the main event loop thread.


The problem is that the iothread lock has been released, but the main
event loop thread is still expecting it to be held.

I've no idea how to go about solving this problem.


The save_snapshot() code, as written today, needs to run serialized with
everything else, but because the job framework has used a coroutine to
run it, we can switch back to the main event thread at any time.

I don't know how to force save_snapshot() to be serialized when using
the generic job framework.


> 
> You can reproduce with I/O tests using "check -qcow2 310"  and it
> gave a stack:
> 
>   Thread 5 (Thread 0x7fffe6e4c700 (LWP 3399011)):
>   #0  futex_wait_cancelable (private=0, expected=0, 
> futex_word=0x566a9fd8) at ../sysdeps/nptl/futex-internal.h:183
>   #1  __pthread_cond_wait_common (abstime=0x0, clockid=0, 
> mutex=0x56227160 , cond=0x566a9fb0) at 
> pthread_cond_wait.c:508
>   #2  __pthread_cond_wait (cond=cond@entry=0x566a9fb0, 
> mutex=mutex@entry=0x56227160 ) at 
> pthread_cond_wait.c:638
>   #3  0x55ceb6cb in qemu_cond_wait_impl (cond=0x566a9fb0, 
> mutex=0x56227160 , file=0x55d44198 
> "/home/berrange/src/virt/qemu/softmmu/cpus.c", line=1145) at 
> /home/berrange/src/virt/qemu/util/qemu-thread-posix.c:174
>   #4  0x55931974 in qemu_wait_io_event (cpu=cpu@entry=0x56685050) 
> at /home/berrange/src/virt/qemu/softmmu/cpus.c:1145
>   #5  0x55933a89 in qemu_dummy_cpu_thread_fn 
> (arg=arg@entry=0x56685050) at 
> /home/berrange/src/virt/qemu/softmmu/cpus.c:1241
>   #6  0x55ceb049 in qemu_thread_start (args=0x7fffe6e476f0) at 
> /home/berrange/src/virt/qemu/util/qemu-thread-posix.c:521
>   #7  0x74fdc432 in start_thread (arg=) at 
> pthread_create.c:477
>   #8  0x74f0a9d3 in clone () at 
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
>   
>   Thread 4 (Thread 0x7fffe764d700 (LWP 3399010)):
>   #0  0x74effb6f in __GI___poll (fds=0x7fffdc006ec0, nfds=3, 
> timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
>   #1  0x77c1aace in g_main_context_iterate.constprop () at 
> /lib64/libglib-2.0.so.0
>   #2  0x77c1ae53 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>   #3  0x559a9d81 in iothread_run (opaque=opaque@entry=0x5632f200) 
> at /home/berrange/src/virt/qemu/iothread.c:82
>   #4  0x55ceb049 in qemu_thread_start (args=0x7fffe76486f0) at 
> /home/berrange/src/virt/qemu/util/qemu-thread-posix.c:521
>   #5  0x74fdc432 in start_thread (arg=) at 
> pthread_create.c:477
>   #6  0x74f0a9d3 in clone () at 
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
>   
>   Thread 3 (Thread 0x7fffe7e4e700 (LWP 3399009)):
>   #0  0x74fe5c58 in futex_abstimed_wait_cancelable (private=0, 
> abstime=0x7fffe7e49650, clockid=0, expected=0, futex_word=0x562bf888) at 
> ../sysdeps/nptl/futex-internal.h:320
>   #1  do_futex_wait (sem=sem@entry=0x562bf888, 
> abstime=abstime@entry=0x7fffe7e49650, clockid=0) at sem_waitcommon.c:112
>   #2  0x74fe5d83 in __new_sem_wait_slow 
> (sem=sem@entry=0x562bf888, abstime=abstime@entry=0x7fffe7e49650, 
> clockid=0) at sem_waitcommon.c:184
>   #3  0x74fe5e12 in sem_timedwait (sem=sem@entry=0x562bf888, 
> abstime=abstime@entry=0x7fffe7e49650) at sem_timedwait.c:40
>   #4  0x55cebbdf in qemu_sem_timedwait (sem=sem@entry=0x562bf888, 
> ms=ms@entry=1) at 
> /home/berrange/src/virt/qemu/util/qemu-thread-posix.c:307
>   #5  0x55d03fa4 in worker_thread 
> (opaque=opaque@entry=0x562bf810) at 
> /home/berrange/src/virt/qemu/util/thread-pool.c:91
>   #6  0x55ceb049 in qemu_th

Re: [PATCH] util/meson.build: fix fdmon-io_uring build

2020-08-21 Thread Stefano Garzarella

On Fri, Aug 21, 2020 at 06:12:45PM +0200, Philippe Mathieu-DaudÃ© wrote:
> Hi Stefano,
> 
> On 8/21/20 5:48 PM, Stefano Garzarella wrote:
> > libqemuutil.a build fails with this error:
> > 
> >   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> > `get_sqe':
> >   qemu/build/../util/fdmon-io_uring.c:83: undefined reference to 
> > `io_uring_get_sqe'
> >   /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:92: undefined reference 
> > to `io_uring_submit'
> >   /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:96: undefined reference 
> > to `io_uring_get_sqe'
> >   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> > `fdmon_io_uring_wait':
> >   qemu/build/../util/fdmon-io_uring.c:289: undefined reference to 
> > `io_uring_submit_and_wait'
> >   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> > `fdmon_io_uring_setup':
> >   qemu/build/../util/fdmon-io_uring.c:328: undefined reference to 
> > `io_uring_queue_init'
> >   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> > `fdmon_io_uring_destroy':
> >   qemu/build/../util/fdmon-io_uring.c:343: undefined reference to 
> > `io_uring_queue_exit'
> >   collect2: error: ld returned 1 exit status
> 
> Can you add a gitlab job to reproduce this? (Or at least explain
> how to reproduce, so we add that job later). Thanks!

I think the only prerequisite is to install 'liburing-devel' package
(Fedora 32) on the build system, and then fdmon-io_uring.c will be built.

I'll try to add a gitlab job ;-)

Thanks,
Stefano

> 
> > 
> > This patch fix the issue adding 'linux_io_uring' dependency for
> > fdmon-io_uring.c
> > 
> > Fixes: a81df1b68b ("libqemuutil, qapi, trace: convert to meson")
> > Cc: pbonz...@redhat.com
> > Signed-off-by: Stefano Garzarella 
> > ---
> >  util/meson.build | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/util/meson.build b/util/meson.build
> > index 23b8ad459b..e6b207a99e 100644
> > --- a/util/meson.build
> > +++ b/util/meson.build
> > @@ -4,7 +4,7 @@ util_ss.add(when: 'CONFIG_ATOMIC64', if_false: 
> > files('atomic64.c'))
> >  util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
> >  util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
> >  util_ss.add(when: 'CONFIG_EPOLL_CREATE1', if_true: files('fdmon-epoll.c'))
> > -util_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: 
> > files('fdmon-io_uring.c'))
> > +util_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: 
> > files('fdmon-io_uring.c'))
> >  util_ss.add(when: 'CONFIG_POSIX', if_true: files('compatfd.c'))
> >  util_ss.add(when: 'CONFIG_POSIX', if_true: files('event_notifier-posix.c'))
> >  util_ss.add(when: 'CONFIG_POSIX', if_true: files('mmap-alloc.c'))
> > 
>

Re: Suspicious QOM types without instance/class size

2020-08-21 Thread Alistair Francis

On Thu, Aug 20, 2020 at 2:56 PM Eduardo Habkost  wrote:
>
> While trying to convert TypeInfo declarations to the new
> OBJECT_DECLARE* macros, I've stumbled on a few suspicious cases
> where instance_size or class_size is not set, despite having type
> checker macros that use a specific type.
>
> The ones with "WARNING" are abstract types (maybe not serious if
> subclasses set the appropriate sizes).  The ones with "ERROR"
> don't seem to be abstract types.
>

> ERROR: hw/core/register.c:328:1: instance_size should be set to 
> sizeof(RegisterInfo)?

I'll send a patch out for this one today.

If you are fixing all of these as part of a series I'm also happy to
just let you do that.

Alistair

>
> --
> Eduardo
>
>

Re: [PATCH] util/meson.build: fix fdmon-io_uring build

2020-08-21 Thread Philippe Mathieu-Daudé

Hi Stefano,

On 8/21/20 5:48 PM, Stefano Garzarella wrote:
> libqemuutil.a build fails with this error:
> 
>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function `get_sqe':
>   qemu/build/../util/fdmon-io_uring.c:83: undefined reference to 
> `io_uring_get_sqe'
>   /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:92: undefined reference to 
> `io_uring_submit'
>   /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:96: undefined reference to 
> `io_uring_get_sqe'
>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> `fdmon_io_uring_wait':
>   qemu/build/../util/fdmon-io_uring.c:289: undefined reference to 
> `io_uring_submit_and_wait'
>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> `fdmon_io_uring_setup':
>   qemu/build/../util/fdmon-io_uring.c:328: undefined reference to 
> `io_uring_queue_init'
>   /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
> `fdmon_io_uring_destroy':
>   qemu/build/../util/fdmon-io_uring.c:343: undefined reference to 
> `io_uring_queue_exit'
>   collect2: error: ld returned 1 exit status

Can you add a gitlab job to reproduce this? (Or at least explain
how to reproduce, so we add that job later). Thanks!

> 
> This patch fix the issue adding 'linux_io_uring' dependency for
> fdmon-io_uring.c
> 
> Fixes: a81df1b68b ("libqemuutil, qapi, trace: convert to meson")
> Cc: pbonz...@redhat.com
> Signed-off-by: Stefano Garzarella 
> ---
>  util/meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/util/meson.build b/util/meson.build
> index 23b8ad459b..e6b207a99e 100644
> --- a/util/meson.build
> +++ b/util/meson.build
> @@ -4,7 +4,7 @@ util_ss.add(when: 'CONFIG_ATOMIC64', if_false: 
> files('atomic64.c'))
>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
>  util_ss.add(when: 'CONFIG_EPOLL_CREATE1', if_true: files('fdmon-epoll.c'))
> -util_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: 
> files('fdmon-io_uring.c'))
> +util_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: 
> files('fdmon-io_uring.c'))
>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('compatfd.c'))
>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('event_notifier-posix.c'))
>  util_ss.add(when: 'CONFIG_POSIX', if_true: files('mmap-alloc.c'))
>

Re: [PATCH 0/1] qcow2: Skip copy-on-write when allocating a zero cluster

2020-08-21 Thread Alberto Garcia

On Thu 20 Aug 2020 11:58:11 PM CEST, Dave Chinner wrote:
>> The virtual drive (/dev/vdb) is a freshly created qcow2 file stored on
>> the host (on an xfs or ext4 filesystem as the table above shows), and
>> it is attached to QEMU using a virtio-blk-pci device:
>> 
>>-drive if=virtio,file=image.qcow2,cache=none,l2-cache-size=200M
>
> You're not using AIO on this image file, so it can't do
> concurrent IO? what happens when you add "aio=native" to this?

I sent the results on a reply to Brian.

>> cache=none means that the image is opened with O_DIRECT and
>> l2-cache-size is large enough so QEMU is able to cache all the
>> relevant qcow2 metadata in memory.
>
> What happens when you just use a sparse file (i.e. a raw image) with
> aio=native instead of using qcow2? XFS, ext4, btrfs, etc all support
> sparse files so using qcow2 to provide sparse image file support is
> largely an unnecessary layer of indirection and overhead...
>
> And with XFS, you don't need qcow2 for snapshots either because you
> can use reflink copies to take an atomic copy-on-write snapshot of the
> raw image file... (assuming you made the xfs filesystem with reflink
> support (which is the TOT default now)).

To be clear, I'm not trying to advocate for or against qcow2 on xfs, we
were just analyzing different allocation strategies for qcow2 and we
came across these results which we don't quite understand.

>> 1) off: for every write request QEMU initializes the cluster (64KB)
>> with fallocate(ZERO_RANGE) and then writes the 4KB of data.
>> 
>> 2) off w/o ZERO_RANGE: QEMU writes the 4KB of data and fills the rest
>> of the cluster with zeroes.
>> 
>> 3) metadata: all clusters were allocated when the image was created
>> but they are sparse, QEMU only writes the 4KB of data.
>> 
>> 4) falloc: all clusters were allocated with fallocate() when the image
>> was created, QEMU only writes 4KB of data.
>> 
>> 5) full: all clusters were allocated by writing zeroes to all of them
>> when the image was created, QEMU only writes 4KB of data.
>> 
>> As I said in a previous message I'm not familiar with xfs, but the
>> parts that I don't understand are
>> 
>>- Why is (4) slower than (1)?
>
> Because fallocate() is a full IO serialisation barrier at the
> filesystem level. If you do:
>
> fallocate(whole file)
> 
> 
> 
> .
>
> The IO can run concurrent and does not serialise against anything in
> the filesysetm except unwritten extent conversions at IO completion
> (see answer to next question!)
>
> However, if you just use (4) you get:
>
> falloc(64k)
>   
>   
> <4k io>
>   
> falloc(64k)
>   
>   
>   <4k IO completes, converts 4k to written>
>   
> <4k io>

I think Brian pointed it out already, but scenario (4) is rather
falloc(25GB), then QEMU is launched and the actual 4k IO requests start
to happen.

So I would expect that after falloc(25GB) all clusters are initialized
and the end result would be closer to a full preallocation (i.e. writing
25GB worth of zeroes to disk).

> IOWs, typical "write once" benchmark testing indicates the *worst*
> performance you are going to see. As the guest filesytsem ages and
> initialises more of the underlying image file, it will get faster, not
> slower.

Yes, that's clear, once everything is allocation then it is fast (and
really much faster in the case of xfs vs ext4), what we try to optimize
in qcow2 is precisely the allocation of new clusters.

Berto

Re: [PATCH 0/1] qcow2: Skip copy-on-write when allocating a zero cluster

2020-08-21 Thread Alberto Garcia

On Fri 21 Aug 2020 02:59:44 PM CEST, Brian Foster wrote:
>> > Option 4 is described above as initial file preallocation whereas
>> > option 1 is per 64k cluster prealloc. Prealloc mode mixup aside, Berto
>> > is reporting that the initial file preallocation mode is slower than
>> > the per cluster prealloc mode. Berto, am I following that right?
>> 
>> Option (1) means that no qcow2 cluster is allocated at the beginning of
>> the test so, apart from updating the relevant qcow2 metadata, each write
>> request clears the cluster first (with fallocate(ZERO_RANGE)) then
>> writes the requested 4KB of data. Further writes to the same cluster
>> don't need changes on the qcow2 metadata so they go directly to the area
>> that was cleared with fallocate().
>> 
>> Option (4) means that all clusters are allocated when the image is
>> created and they are initialized with fallocate() (actually with
>> posix_fallocate() now that I read the code, I suppose it's the same for
>> xfs?). Only after that the test starts. All write requests are simply
>> forwarded to the disk, there is no need to touch any qcow2 metadata nor
>> do anything else.
>> 
>
> Ok, I think that's consistent with what I described above (sorry, I find
> the preallocation mode names rather confusing so I was trying to avoid
> using them). Have you confirmed that posix_fallocate() in this case
> translates directly to fallocate()? I suppose that's most likely the
> case, otherwise you'd see numbers more like with preallocation=full
> (file preallocated via writing zeroes).

Yes, it seems to be:

   
https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/posix_fallocate.c;h=7238b000383af2f3878a9daf8528819645b6aa31;hb=HEAD

And that's also what the posix_fallocate() manual page says.

>> And yes, (4) is a bit slower than (1) in my tests. On ext4 I get 10%
>> more IOPS.
>> 
>> I just ran the tests with aio=native and with a raw image instead of
>> qcow2, here are the results:
>> 
>> qcow2:
>> |--+-+|
>> | preallocation| aio=threads | aio=native |
>> |--+-+|
>> | off  |8139 |   7649 |
>> | off (w/o ZERO_RANGE) |2965 |   2779 |
>> | metadata |7768 |   8265 |
>> | falloc   |7742 |   7956 |
>> | full |   41389 |  56668 |
>> |--+-+|
>> 
>
> So this seems like Dave's suggestion to use native aio produced more
> predictable results with full file prealloc being a bit faster than per
> cluster prealloc. Not sure why that isn't the case with aio=threads. I
> was wondering if perhaps the threading affects something indirectly like
> the qcow2 metadata allocation itself, but I guess that would be
> inconsistent with ext4 showing a notable jump from (1) to (4) (assuming
> the previous ext4 numbers were with aio=threads).

Yes, I took the ext4 numbers with aio=threads

>> raw:
>> |---+-+|
>> | preallocation | aio=threads | aio=native |
>> |---+-+|
>> | off   |7647 |   7928 |
>> | falloc|7662 |   7856 |
>> | full  |   45224 |  58627 |
>> |---+-+|
>> 
>> A qcow2 file with preallocation=metadata is more or less similar to a
>> sparse raw file (and the numbers are indeed similar).
>> 
>> preallocation=off on qcow2 does not have an equivalent on raw files.
>
> It sounds like preallocation=off for qcow2 would be roughly equivalent
> to a raw file with a 64k extent size hint (on XFS).

There's the overhead of handling the qcow2 metadata but QEMU keeps a
memory cache so it should not be too big.

Berto

Re: [PATCH v7 39/47] blockdev: Fix active commit choice

2020-08-21 Thread Kevin Wolf

Am 25.06.2020 um 17:22 hat Max Reitz geschrieben:
> We have to perform an active commit whenever the top node has a parent
> that has taken the WRITE permission on it.
> 
> Signed-off-by: Max Reitz 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> ---
>  blockdev.c | 24 +---
>  1 file changed, 21 insertions(+), 3 deletions(-)
> 
> diff --git a/blockdev.c b/blockdev.c
> index 402f1d1df1..237fffbe53 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -2589,6 +2589,7 @@ void qmp_block_commit(bool has_job_id, const char 
> *job_id, const char *device,
>  AioContext *aio_context;
>  Error *local_err = NULL;
>  int job_flags = JOB_DEFAULT;
> +uint64_t top_perm, top_shared;
>  
>  if (!has_speed) {
>  speed = 0;
> @@ -2704,14 +2705,31 @@ void qmp_block_commit(bool has_job_id, const char 
> *job_id, const char *device,
>  goto out;
>  }
>  
> -if (top_bs == bs) {
> +/*
> + * Active commit is required if and only if someone has taken a
> + * WRITE permission on the top node.

...or if someone wants to take a WRITE permission while the job is
running.

Future intentions of the user is something that we can't know, so maybe
this should become an option in the future (not in this series, of
course).

>Historically, we have always
> + * used active commit for top nodes, so continue that practice.
> + * (Active commit is never really wrong.)
> + */

Changing the practice would break compatibility with clients that start
an active commit job and then attach it to a read-write device, so we
must continue the practice. I think the comment should be clearer about
this, it sounds more like "no reason, but why not".

This is even more problematic because the commit job doesn't unshare
BLK_PERM_WRITE yet, so it would lead to silent corruption rather than an
error.

> +bdrv_get_cumulative_perm(top_bs, &top_perm, &top_shared);
> +if (top_perm & BLK_PERM_WRITE ||
> +bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs))
> +{
>  if (has_backing_file) {
>  error_setg(errp, "'backing-file' specified,"
>   " but 'top' is the active layer");

Hm, this error message isn't accurate any more.

In fact, the implementation isn't consistent with the QAPI documentation
any more, because backing-file is only an error for the top level.

>  goto out;
>  }
> -commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
> -job_flags, speed, on_error,
> +if (!has_job_id) {
> +/*
> + * Emulate here what block_job_create() does, because it
> + * is possible that @bs != @top_bs (the block job should
> + * be named after @bs, even if @top_bs is the actual
> + * source)
> + */

Should it? Oh, yes, looks like it. block-commit is weird. :-)

> +job_id = bdrv_get_device_name(bs);
> +}
> +commit_active_start(job_id, top_bs, base_bs, job_flags, speed, 
> on_error,
>  filter_node_name, NULL, NULL, false, &local_err);
>  } else {
>  BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);

Kevin

[PATCH] util/meson.build: fix fdmon-io_uring build

2020-08-21 Thread Stefano Garzarella

libqemuutil.a build fails with this error:

  /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function `get_sqe':
  qemu/build/../util/fdmon-io_uring.c:83: undefined reference to 
`io_uring_get_sqe'
  /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:92: undefined reference to 
`io_uring_submit'
  /usr/bin/ld: qemu/build/../util/fdmon-io_uring.c:96: undefined reference to 
`io_uring_get_sqe'
  /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
`fdmon_io_uring_wait':
  qemu/build/../util/fdmon-io_uring.c:289: undefined reference to 
`io_uring_submit_and_wait'
  /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
`fdmon_io_uring_setup':
  qemu/build/../util/fdmon-io_uring.c:328: undefined reference to 
`io_uring_queue_init'
  /usr/bin/ld: libqemuutil.a(util_fdmon-io_uring.c.o): in function 
`fdmon_io_uring_destroy':
  qemu/build/../util/fdmon-io_uring.c:343: undefined reference to 
`io_uring_queue_exit'
  collect2: error: ld returned 1 exit status

This patch fix the issue adding 'linux_io_uring' dependency for
fdmon-io_uring.c

Fixes: a81df1b68b ("libqemuutil, qapi, trace: convert to meson")
Cc: pbonz...@redhat.com
Signed-off-by: Stefano Garzarella 
---
 util/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/meson.build b/util/meson.build
index 23b8ad459b..e6b207a99e 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -4,7 +4,7 @@ util_ss.add(when: 'CONFIG_ATOMIC64', if_false: 
files('atomic64.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
 util_ss.add(when: 'CONFIG_EPOLL_CREATE1', if_true: files('fdmon-epoll.c'))
-util_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: files('fdmon-io_uring.c'))
+util_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: 
files('fdmon-io_uring.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('compatfd.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('event_notifier-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('mmap-alloc.c'))
-- 
2.26.2

Re: [PATCH v7 37/47] qemu-img: Use child access functions

2020-08-21 Thread Kevin Wolf

Am 25.06.2020 um 17:22 hat Max Reitz geschrieben:
> This changes iotest 204's output, because blkdebug on top of a COW node
> used to make qemu-img map disregard the rest of the backing chain (the
> backing chain was broken by the filter).  With this patch, the
> allocation in the base image is reported correctly.
> 
> Signed-off-by: Max Reitz 

> @@ -3437,6 +3441,7 @@ static int img_rebase(int argc, char **argv)
>  uint8_t *buf_old = NULL;
>  uint8_t *buf_new = NULL;
>  BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
> +BlockDriverState *unfiltered_bs;
>  char *filename;
>  const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
>  int c, flags, src_flags, ret;
> @@ -3571,6 +3576,8 @@ static int img_rebase(int argc, char **argv)
>  }
>  bs = blk_bs(blk);
>  
> +unfiltered_bs = bdrv_skip_filters(bs);
> +
>  if (out_basefmt != NULL) {
>  if (bdrv_find_format(out_basefmt) == NULL) {
>  error_report("Invalid format name: '%s'", out_basefmt);
> @@ -3582,7 +3589,7 @@ static int img_rebase(int argc, char **argv)
>  /* For safe rebasing we need to compare old and new backing file */
>  if (!unsafe) {
>  QDict *options = NULL;
> -BlockDriverState *base_bs = backing_bs(bs);
> +BlockDriverState *base_bs = bdrv_cow_bs(unfiltered_bs);
>  
>  if (base_bs) {
>  blk_old_backing = blk_new(qemu_get_aio_context(),
> @@ -3738,8 +3745,9 @@ static int img_rebase(int argc, char **argv)
>   * If cluster wasn't changed since prefix_chain, we don't 
> need
>   * to take action
>   */
> -ret = bdrv_is_allocated_above(backing_bs(bs), 
> prefix_chain_bs,
> -  false, offset, n, &n);
> +ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
> +  prefix_chain_bs, false,
> +  offset, n, &n);
>  if (ret < 0) {
>  error_report("error while reading image metadata: %s",
>   strerror(-ret));

img_rebase() has these additional calls:

/* If the cluster is allocated, we don't need to take action */
ret = bdrv_is_allocated(bs, offset, n, &n);

And:

if (out_baseimg && *out_baseimg) {
ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
} else {
ret = bdrv_change_backing_file(bs, NULL, NULL);
}

Shouldn't they use unfiltered_bs?

(Not that it's likely that anyone would use 'qemu-img rebase' with a
filter, but while you're touching it...)

Kevin

[PATCH v3 2/2] hw/sd: sd: Correct the maximum size of a Standard Capacity SD Memory Card

2020-08-21 Thread Bin Meng

From: Bin Meng 

Per the SD spec, Standard Capacity SD Memory Card (SDSC) supports
capacity up to and including 2 GiB.

Fixes: 2d7adea4fe ("hw/sd: Support SDHC size cards")
Signed-off-by: Bin Meng 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Sai Pavan Boddu 

---

Changes in v3:
- add Reviewed-by tag from Philippe Mathieu-Daudé
- add Tested-by tag from Sai Pavan Boddu

Changes in v2:
- fix SDSC size check in sd_set_csd() too

 hw/sd/sd.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 3226404..254d713 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -50,6 +50,8 @@
 
 //#define DEBUG_SD 1
 
+#define SDSC_MAX_CAPACITY   (2 * GiB)
+
 typedef enum {
 sd_r0 = 0,/* no response */
 sd_r1,/* normal response command */
@@ -313,7 +315,7 @@ static void sd_ocr_powerup(void *opaque)
 /* card power-up OK */
 sd->ocr = FIELD_DP32(sd->ocr, OCR, CARD_POWER_UP, 1);
 
-if (sd->size > 1 * GiB) {
+if (sd->size > SDSC_MAX_CAPACITY) {
 sd->ocr = FIELD_DP32(sd->ocr, OCR, CARD_CAPACITY, 1);
 }
 }
@@ -385,7 +387,7 @@ static void sd_set_csd(SDState *sd, uint64_t size)
 uint32_t sectsize = (1 << (SECTOR_SHIFT + 1)) - 1;
 uint32_t wpsize = (1 << (WPGROUP_SHIFT + 1)) - 1;
 
-if (size <= 1 * GiB) { /* Standard Capacity SD */
+if (size <= SDSC_MAX_CAPACITY) { /* Standard Capacity SD */
 sd->csd[0] = 0x00; /* CSD structure */
 sd->csd[1] = 0x26; /* Data read access-time-1 */
 sd->csd[2] = 0x00; /* Data read access-time-2 */
-- 
2.7.4

[PATCH v3 1/2] hw/sd: sd: Fix incorrect populated function switch status data structure

2020-08-21 Thread Bin Meng

From: Bin Meng 

At present the function switch status data structure bit [399:376]
are wrongly pupulated. These 3 bytes encode function switch status
for the 6 function groups, with 4 bits per group, starting from
function group 6 at bit 399, then followed by function group 5 at
bit 395, and so on.

However the codes mistakenly fills in the function group 1 status
at bit 399. This fixes the code logic.

Fixes: a1bb27b1e9 ("SD card emulation (initial implementation)")
Signed-off-by: Bin Meng 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Sai Pavan Boddu 

---

Changes in v3:
- add Tested-by tag from Sai Pavan Boddu

Changes in v2:
- remove the pointless zero initialization

 hw/sd/sd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index fad9cf1..3226404 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -806,11 +806,12 @@ static void sd_function_switch(SDState *sd, uint32_t arg)
 sd->data[11] = 0x43;
 sd->data[12] = 0x80;   /* Supported group 1 functions */
 sd->data[13] = 0x03;
+
 for (i = 0; i < 6; i ++) {
 new_func = (arg >> (i * 4)) & 0x0f;
 if (mode && new_func != 0x0f)
 sd->function_group[i] = new_func;
-sd->data[14 + (i >> 1)] = new_func << ((i * 4) & 4);
+sd->data[16 - (i >> 1)] |= new_func << ((i % 2) * 4);
 }
 memset(&sd->data[17], 0, 47);
 stw_be_p(sd->data + 64, sd_crc16(sd->data, 64));
-- 
2.7.4

Re: [PATCH 0/7] hw/sd: Use sdbus_read_data/sdbus_write_data for multiple bytes access

2020-08-21 Thread Philippe Mathieu-Daudé

On 8/14/20 11:23 AM, Philippe Mathieu-Daudé wrote:
> Introduce sdbus_read_data() and sdbus_write_data() methods to
> access multiple bytes on the data line of a SD bus.
> 
> I haven't named then sdbus_access_block() because I expect a
> block to be a power of 2, while there is no such restriction
> on the SD bus (think of SPI).
> 
> We can also simplify the bcm2835_sdhost and pl181 models, but
> it is simpler to first let them use the Fifo32 API.
> 
> Based-on: <20200705204630.4133-1-f4...@amsat.org>
> "hw/sd: convert legacy SDHCI devices to the SDBus API"
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg720136.html
> 
> Philippe Mathieu-Daudé (7):
>   hw/sd: Rename read/write_data() as read/write_byte()
>   hw/sd: Rename sdbus_write_data() as sdbus_write_byte()
>   hw/sd: Rename sdbus_read_data() as sdbus_read_byte()
>   hw/sd: Add sdbus_write_data() to write multiples bytes on the data
> line
>   hw/sd: Use sdbus_write_data() instead of sdbus_write_byte when
> possible
>   hw/sd: Add sdbus_read_data() to read multiples bytes on the data line
>   hw/sd: Use sdbus_read_data() instead of sdbus_read_byte() when
> possible

Thanks, series applied to the sd-next tree.

Re: [PATCH v2 0/3] hw/sd: Add Cadence SDHCI emulation

2020-08-21 Thread Bin Meng

Hi Philippe,

On Fri, Aug 21, 2020 at 10:21 PM Philippe Mathieu-Daudé  wrote:
>
> Hi Bin,
>
> On 8/21/20 2:54 AM, Bin Meng wrote:
> > Hi Philippe,
> >
> > On Fri, Aug 21, 2020 at 2:04 AM Philippe Mathieu-Daudé  
> > wrote:
> >>
> >> Hi Sai Pavan, you said you were interested to test the first 2
> >> patches. FYI I plan to queue them and send the pull request tomorrow
> >> or Saturday the latest.
> >
> > Have you got a chance to review the v2 of 3rd patch?
> >
> > "hw/sd: Add Cadence SDHCI emulation"
>
> I'll have a look at it, but it makes sense to merge it via the
> tree using it (so the RISCV tree).

Thank you. Sure I will include the Cadence SDHCI patch in the
PolarFire SoC support series in the next version.

>
> Meanwhile I'm queueing patches 1 and 2 to my sd-next tree,
> adding the Tested-by from Sai Pavan from:
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg732027.html
>

I just noticed that the v2 patch has the wrong author email address,
so I plan to send v3 of patch 1 and 2 to correct it, with Sai Pavan's
Tested-by tag. Sorry!

Regards,
Bin

Re: [PATCH v2 0/3] hw/sd: Add Cadence SDHCI emulation

2020-08-21 Thread Philippe Mathieu-Daudé

Hi Bin,

On 8/21/20 2:54 AM, Bin Meng wrote:
> Hi Philippe,
> 
> On Fri, Aug 21, 2020 at 2:04 AM Philippe Mathieu-Daudé  
> wrote:
>>
>> Hi Sai Pavan, you said you were interested to test the first 2
>> patches. FYI I plan to queue them and send the pull request tomorrow
>> or Saturday the latest.
> 
> Have you got a chance to review the v2 of 3rd patch?
> 
> "hw/sd: Add Cadence SDHCI emulation"

I'll have a look at it, but it makes sense to merge it via the
tree using it (so the RISCV tree).

Meanwhile I'm queueing patches 1 and 2 to my sd-next tree,
adding the Tested-by from Sai Pavan from:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg732027.html

Thanks,

Phil.

> 
> Regards,
> Bin
>

[PATCH v5 10/10] iotests: add 298 to test new preallocate filter driver

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/298 | 50 ++
 tests/qemu-iotests/298.out |  6 +
 tests/qemu-iotests/group   |  1 +
 3 files changed, 57 insertions(+)
 create mode 100644 tests/qemu-iotests/298
 create mode 100644 tests/qemu-iotests/298.out

diff --git a/tests/qemu-iotests/298 b/tests/qemu-iotests/298
new file mode 100644
index 00..4f2087352a
--- /dev/null
+++ b/tests/qemu-iotests/298
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+#
+# Test for preallocate filter
+#
+# Copyright (c) 2020 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import iotests
+from iotests import log
+
+iotests.script_initialize(supported_fmts=['qcow2'])
+iotests.verify_o_direct()
+
+size = 10 * 1024 * 1024
+disk = iotests.file_path('disk')
+
+iotests.qemu_img_create('-f', iotests.imgfmt, disk, str(size))
+
+opts = f'driver={iotests.imgfmt},' \
+f'file.driver=preallocate,file.file.filename={disk}'
+p = iotests.QemuIoInteractive('--image-opts', '-t', 'none', opts)
+
+log(p.cmd('write 0 1M'), filters=[iotests.filter_qemu_io])
+p.cmd('flush')
+
+if os.path.getsize(disk) > 100 * 1024 * 1024:
+log('file in progress is big, preallocation works')
+
+p.close()
+
+if os.path.getsize(disk) < 10 * 1024 * 1024:
+log('file is small after close')
+
+# Check that there are no leaks.
+log(iotests.qemu_img_pipe('check', '-f', 'qcow2', disk),
+filters=[iotests.filter_img_check])
diff --git a/tests/qemu-iotests/298.out b/tests/qemu-iotests/298.out
new file mode 100644
index 00..baf8f8425c
--- /dev/null
+++ b/tests/qemu-iotests/298.out
@@ -0,0 +1,6 @@
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+file in progress is big, preallocation works
+file is small after close
+No errors were found on the image.
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 7f76066640..cdcde2fe48 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -306,6 +306,7 @@
 295 rw
 296 rw
 297 meta
+298 auto quick
 299 auto quick
 301 backing quick
 302 quick
-- 
2.21.3

[PATCH v5 09/10] iotests.py: add filter_img_check

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

Add analog of bash _filter_qemu_img_check to python framework.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
---
 tests/qemu-iotests/iotests.py | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 7f1aa187a9..14f1d47d52 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -403,6 +403,10 @@ def filter_img_info(output, filename):
 lines.append(line)
 return '\n'.join(lines)
 
+def filter_img_check(msg):
+msg = re.sub(r'.*allocated.*fragmented.*compressed clusters', '', msg)
+return re.sub(r'Image end offset: [0-9]+', '', msg).strip()
+
 def filter_imgfmt(msg):
 return msg.replace(imgfmt, 'IMGFMT')
 
-- 
2.21.3

[PATCH v5 08/10] iotests.py: add verify_o_direct helper

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

Add python notrun-helper similar to _check_o_direct for bash tests.
To be used in the following commit.

Suggested-by: Nir Soffer 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/iotests.py | 12 
 1 file changed, 12 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 717b5b652c..7f1aa187a9 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -29,6 +29,7 @@ import struct
 import subprocess
 import sys
 import time
+import errno
 from typing import (Any, Callable, Dict, Iterable,
 List, Optional, Sequence, Tuple, TypeVar)
 import unittest
@@ -1083,6 +1084,17 @@ def _verify_aio_mode(supported_aio_modes: Sequence[str] 
= ()) -> None:
 if supported_aio_modes and (aiomode not in supported_aio_modes):
 notrun('not suitable for this aio mode: %s' % aiomode)
 
+def verify_o_direct() -> None:
+with FilePath('test_o_direct') as f:
+try:
+fd = os.open(f, os.O_DIRECT | os.O_CREAT | os.O_RDWR)
+except OSError as e:
+if e.errno != errno.EINVAL:
+raise
+notrun(f'file system at {test_dir} does not support O_DIRECT')
+else:
+os.close(fd)
+
 def supports_quorum():
 return 'quorum' in qemu_img_pipe('--help')
 
-- 
2.21.3

[PATCH v5 04/10] block/io: bdrv_wait_serialising_requests_locked: drop extra bs arg

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

bs is linked in req, so no needs to pass it separately. Most of
tracked-requests API doesn't have bs argument. Actually, after this
patch only tracked_request_begin has it, but it's for purpose.

While being here, also add a comment about what "_locked" is.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
---
 block/io.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/io.c b/block/io.c
index 5b96715058..36bbe4b9b1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -761,16 +761,16 @@ bdrv_find_conflicting_request(BdrvTrackedRequest *self)
 return NULL;
 }
 
+/* Called with self->bs->reqs_lock held */
 static bool coroutine_fn
-bdrv_wait_serialising_requests_locked(BlockDriverState *bs,
-  BdrvTrackedRequest *self)
+bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
 {
 BdrvTrackedRequest *req;
 bool waited = false;
 
 while ((req = bdrv_find_conflicting_request(self))) {
 self->waiting_for = req;
-qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
+qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
 self->waiting_for = NULL;
 waited = true;
 }
@@ -794,7 +794,7 @@ bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, 
uint64_t align)
 
 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
-waited = bdrv_wait_serialising_requests_locked(bs, req);
+waited = bdrv_wait_serialising_requests_locked(req);
 qemu_co_mutex_unlock(&bs->reqs_lock);
 return waited;
 }
@@ -876,7 +876,7 @@ static bool coroutine_fn 
bdrv_wait_serialising_requests(BdrvTrackedRequest *self
 }
 
 qemu_co_mutex_lock(&bs->reqs_lock);
-waited = bdrv_wait_serialising_requests_locked(bs, self);
+waited = bdrv_wait_serialising_requests_locked(self);
 qemu_co_mutex_unlock(&bs->reqs_lock);
 
 return waited;
-- 
2.21.3

[PATCH v5 03/10] block/io: split out bdrv_find_conflicting_request

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

To be reused in separate.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
---
 block/io.c | 71 +++---
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/block/io.c b/block/io.c
index b18680a842..5b96715058 100644
--- a/block/io.c
+++ b/block/io.c
@@ -727,43 +727,54 @@ static bool tracked_request_overlaps(BdrvTrackedRequest 
*req,
 return true;
 }
 
+/* Called with self->bs->reqs_lock held */
+static BdrvTrackedRequest *
+bdrv_find_conflicting_request(BdrvTrackedRequest *self)
+{
+BdrvTrackedRequest *req;
+
+QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
+if (req == self || (!req->serialising && !self->serialising)) {
+continue;
+}
+if (tracked_request_overlaps(req, self->overlap_offset,
+ self->overlap_bytes))
+{
+/*
+ * Hitting this means there was a reentrant request, for
+ * example, a block driver issuing nested requests.  This must
+ * never happen since it means deadlock.
+ */
+assert(qemu_coroutine_self() != req->co);
+
+/*
+ * If the request is already (indirectly) waiting for us, or
+ * will wait for us as soon as it wakes up, then just go on
+ * (instead of producing a deadlock in the former case).
+ */
+if (!req->waiting_for) {
+return req;
+}
+}
+}
+
+return NULL;
+}
+
 static bool coroutine_fn
 bdrv_wait_serialising_requests_locked(BlockDriverState *bs,
   BdrvTrackedRequest *self)
 {
 BdrvTrackedRequest *req;
-bool retry;
 bool waited = false;
 
-do {
-retry = false;
-QLIST_FOREACH(req, &bs->tracked_requests, list) {
-if (req == self || (!req->serialising && !self->serialising)) {
-continue;
-}
-if (tracked_request_overlaps(req, self->overlap_offset,
- self->overlap_bytes))
-{
-/* Hitting this means there was a reentrant request, for
- * example, a block driver issuing nested requests.  This must
- * never happen since it means deadlock.
- */
-assert(qemu_coroutine_self() != req->co);
-
-/* If the request is already (indirectly) waiting for us, or
- * will wait for us as soon as it wakes up, then just go on
- * (instead of producing a deadlock in the former case). */
-if (!req->waiting_for) {
-self->waiting_for = req;
-qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
-self->waiting_for = NULL;
-retry = true;
-waited = true;
-break;
-}
-}
-}
-} while (retry);
+while ((req = bdrv_find_conflicting_request(self))) {
+self->waiting_for = req;
+qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
+self->waiting_for = NULL;
+waited = true;
+}
+
 return waited;
 }
 
-- 
2.21.3

[PATCH v5 05/10] block: bdrv_mark_request_serialising: split non-waiting function

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

We'll need a separate function, which will only "mark" request
serialising with specified align but not wait for conflicting
requests. So, it will be like old bdrv_mark_request_serialising(),
before merging bdrv_wait_serialising_requests_locked() into it.

To reduce the possible mess, let's do the following:

Public function that does both marking and waiting will be called
bdrv_make_request_serialising, and private function which will only
"mark" will be called tracked_request_set_serialising().

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block_int.h |  3 ++-
 block/file-posix.c|  2 +-
 block/io.c| 35 +++
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38dec0275b..4d56a1b141 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1039,7 +1039,8 @@ extern unsigned int bdrv_drain_all_count;
 void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
 void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState 
*old_parent);
 
-bool coroutine_fn bdrv_mark_request_serialising(BdrvTrackedRequest *req, 
uint64_t align);
+bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
+uint64_t align);
 BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState 
*bs);
 
 int get_tmp_filename(char *filename, int size);
diff --git a/block/file-posix.c b/block/file-posix.c
index 9a00d4190a..560d1c0a94 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2957,7 +2957,7 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t 
offset, int bytes,
 req->bytes = end - req->offset;
 req->overlap_bytes = req->bytes;
 
-bdrv_mark_request_serialising(req, bs->bl.request_alignment);
+bdrv_make_request_serialising(req, bs->bl.request_alignment);
 }
 #endif
 
diff --git a/block/io.c b/block/io.c
index 36bbe4b9b1..dd28befb08 100644
--- a/block/io.c
+++ b/block/io.c
@@ -778,15 +778,14 @@ bdrv_wait_serialising_requests_locked(BdrvTrackedRequest 
*self)
 return waited;
 }
 
-bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
+/* Called with req->bs->reqs_lock held */
+static void tracked_request_set_serialising(BdrvTrackedRequest *req,
+uint64_t align)
 {
-BlockDriverState *bs = req->bs;
 int64_t overlap_offset = req->offset & ~(align - 1);
 uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
- overlap_offset;
-bool waited;
 
-qemu_co_mutex_lock(&bs->reqs_lock);
 if (!req->serialising) {
 atomic_inc(&req->bs->serialising_in_flight);
 req->serialising = true;
@@ -794,9 +793,6 @@ bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, 
uint64_t align)
 
 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
-waited = bdrv_wait_serialising_requests_locked(req);
-qemu_co_mutex_unlock(&bs->reqs_lock);
-return waited;
 }
 
 /**
@@ -882,6 +878,21 @@ static bool coroutine_fn 
bdrv_wait_serialising_requests(BdrvTrackedRequest *self
 return waited;
 }
 
+bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
+uint64_t align)
+{
+bool waited;
+
+qemu_co_mutex_lock(&req->bs->reqs_lock);
+
+tracked_request_set_serialising(req, align);
+waited = bdrv_wait_serialising_requests_locked(req);
+
+qemu_co_mutex_unlock(&req->bs->reqs_lock);
+
+return waited;
+}
+
 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
size_t size)
 {
@@ -1492,7 +1503,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
  * with each other for the same cluster.  For example, in copy-on-read
  * it ensures that the CoR read and write operations are atomic and
  * guest writes cannot interleave between them. */
-bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
+bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
 } else {
 bdrv_wait_serialising_requests(req);
 }
@@ -1903,7 +1914,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t 
offset, uint64_t bytes,
 assert(!(flags & ~BDRV_REQ_MASK));
 
 if (flags & BDRV_REQ_SERIALISING) {
-bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
+bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
 } else {
 bdrv_wait_serialising_requests(req);
 }
@@ -2069,7 +2080,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild 
*child,
 
 padding = bdrv_init_padding(bs, offset, bytes, &pad);
 if (padding) {
-bdrv_mark_request_serialising(req, align);
+

[PULL 09/14] qcow2_format.py: collect fields to dump in JSON format

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

As __dict__ is being extended with class members we do not want to
print, add the to_json() method to classes that returns a json-dumpable
object with desired fields and their values. Extend it in subclass when
necessary to print the final dictionary in the JSON output which
follows.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Message-Id: 
<1596742557-320265-10-git-send-email-andrey.shinkev...@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/qcow2_format.py | 36 ++
 1 file changed, 36 insertions(+)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index de0adcbf9db0..5a298b2f1357 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -119,6 +119,9 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):

 print('{:<25} {}'.format(f[2], value_str))

+def to_json(self):
+return dict((f[2], self.__dict__[f[2]]) for f in self.fields)
+

 class Qcow2BitmapExt(Qcow2Struct):

@@ -151,6 +154,11 @@ class Qcow2BitmapExt(Qcow2Struct):
 print()
 entry.dump()

+def to_json(self):
+fields_dict = super().to_json()
+fields_dict['bitmap_directory'] = self.bitmap_directory
+return fields_dict
+

 class Qcow2BitmapDirEntry(Qcow2Struct):

@@ -189,6 +197,14 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 super(Qcow2BitmapDirEntry, self).dump()
 self.bitmap_table.dump()

+def to_json(self):
+# Put the name ahead of the dict
+return {
+'name': self.name,
+**super().to_json(),
+'bitmap_table': self.bitmap_table
+}
+

 class Qcow2BitmapTableEntry(Qcow2Struct):

@@ -214,6 +230,10 @@ class Qcow2BitmapTableEntry(Qcow2Struct):
 else:
 self.type = 'all-zeroes'

+def to_json(self):
+return {'type': self.type, 'offset': self.offset,
+'reserved': self.reserved}
+

 class Qcow2BitmapTable:

@@ -234,6 +254,9 @@ class Qcow2BitmapTable:
 size = 0
 print(f'{i:<14} {entry.type:<15} {size:<12} {entry.offset}')

+def to_json(self):
+return self.entries
+

 QCOW2_EXT_MAGIC_BITMAPS = 0x23852875

@@ -249,6 +272,9 @@ class QcowHeaderExtension(Qcow2Struct):
 0x44415441: 'Data file'
 }

+def to_json(self):
+return self.mapping.get(self.value, "")
+
 fields = (
 ('u32', Magic, 'magic'),
 ('u32', '{}', 'length')
@@ -311,6 +337,16 @@ class QcowHeaderExtension(Qcow2Struct):
 else:
 self.obj.dump()

+def to_json(self):
+# Put the name ahead of the dict
+res = {'name': self.Magic(self.magic), **super().to_json()}
+if self.obj is not None:
+res['data'] = self.obj
+else:
+res['data_str'] = self.data_str
+
+return res
+
 @classmethod
 def create(cls, magic, data):
 return QcowHeaderExtension(magic, len(data), data)
-- 
2.28.0

[PATCH v5 07/10] block: introduce preallocate filter

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

It's intended to be inserted between format and protocol nodes to
preallocate additional space (expanding protocol file) on writes
crossing EOF. It improves performance for file-systems with slow
allocation.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 docs/system/qemu-block-drivers.rst.inc |  26 +++
 qapi/block-core.json   |  20 +-
 block/preallocate.c| 291 +
 block/Makefile.objs|   1 +
 4 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 block/preallocate.c

diff --git a/docs/system/qemu-block-drivers.rst.inc 
b/docs/system/qemu-block-drivers.rst.inc
index b052a6d14e..5e8a35c571 100644
--- a/docs/system/qemu-block-drivers.rst.inc
+++ b/docs/system/qemu-block-drivers.rst.inc
@@ -952,3 +952,29 @@ on host and see if there are locks held by the QEMU 
process on the image file.
 More than one byte could be locked by the QEMU instance, each byte of which
 reflects a particular permission that is acquired or protected by the running
 block driver.
+
+Filter drivers
+~~
+
+QEMU supports several filter drivers, which don't store any data, but do some
+additional tasks, hooking io requests.
+
+.. program:: filter-drivers
+.. option:: preallocate
+
+  Preallocate filter driver is intended to be inserted between format
+  and protocol nodes and does preallocation of some additional space
+  (expanding the protocol file) on write. It may be used for
+  file-systems with slow allocation.
+
+  Supported options:
+
+  .. program:: preallocate
+  .. option:: prealloc-align
+
+On preallocation, align file length to this number, default 1M.
+
+  .. program:: preallocate
+  .. option:: prealloc-size
+
+How much to preallocate, default 128M.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 197bdc1c36..b40448063b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2805,7 +2805,7 @@
 'cloop', 'compress', 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps',
 'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi',
 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
-'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
+'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
 { 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
 'sheepdog',
 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
@@ -3074,6 +3074,23 @@
   'data': { 'aes': 'QCryptoBlockOptionsQCow',
 'luks': 'QCryptoBlockOptionsLUKS'} }
 
+##
+# @BlockdevOptionsPreallocate:
+#
+# Filter driver intended to be inserted between format and protocol node
+# and do preallocation in protocol node on write.
+#
+# @prealloc-align: on preallocation, align file length to this number,
+# default 1048576 (1M)
+#
+# @prealloc-size: how much to preallocate, default 134217728 (128M)
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockdevOptionsPreallocate',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*prealloc-align': 'int', '*prealloc-size': 'int' } }
+
 ##
 # @BlockdevOptionsQcow2:
 #
@@ -3979,6 +3996,7 @@
   'null-co':'BlockdevOptionsNull',
   'nvme':   'BlockdevOptionsNVMe',
   'parallels':  'BlockdevOptionsGenericFormat',
+  'preallocate':'BlockdevOptionsPreallocate',
   'qcow2':  'BlockdevOptionsQcow2',
   'qcow':   'BlockdevOptionsQcow',
   'qed':'BlockdevOptionsGenericCOWFormat',
diff --git a/block/preallocate.c b/block/preallocate.c
new file mode 100644
index 00..bdf54dbd2f
--- /dev/null
+++ b/block/preallocate.c
@@ -0,0 +1,291 @@
+/*
+ * preallocate filter driver
+ *
+ * The driver performs preallocate operation: it is injected above
+ * some node, and before each write over EOF it does additional preallocating
+ * write-zeroes request.
+ *
+ * Copyright (c) 2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+#include "qemu/units.h"
+#include "block/block_int.h"
+
+
+typedef struct BDRVPreallocateState {
+int64_t prealloc_size;
+int64_t prealloc_align;
+
+/*
+ * Filter is started as not-active, so

[PATCH v5 02/10] block/io.c: drop assertion on double waiting for request serialisation

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

The comments states, that on misaligned request we should have already
been waiting. But for bdrv_padding_rmw_read, we called
bdrv_mark_request_serialising with align = request_alignment, and now
we serialise with align = cluster_size. So we may have to wait again
with larger alignment.

Note, that the only user of BDRV_REQ_SERIALISING is backup which issues
cluster-aligned requests, so seems the assertion should not fire for
now. But it's wrong anyway.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Paolo Bonzini 
---
 block/io.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/block/io.c b/block/io.c
index ad3a51ed53..b18680a842 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1881,7 +1881,6 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t 
offset, uint64_t bytes,
   BdrvTrackedRequest *req, int flags)
 {
 BlockDriverState *bs = child->bs;
-bool waited;
 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 
 if (bs->read_only) {
@@ -1893,15 +1892,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t 
offset, uint64_t bytes,
 assert(!(flags & ~BDRV_REQ_MASK));
 
 if (flags & BDRV_REQ_SERIALISING) {
-waited = bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
-/*
- * For a misaligned request we should have already waited earlier,
- * because we come after bdrv_padding_rmw_read which must be called
- * with the request already marked as serialising.
- */
-assert(!waited ||
-   (req->offset == req->overlap_offset &&
-req->bytes == req->overlap_bytes));
+bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
 } else {
 bdrv_wait_serialising_requests(req);
 }
-- 
2.21.3

[PATCH v5 06/10] block: introduce BDRV_REQ_NO_WAIT flag

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

Add flag to make serialising request no wait: if there are conflicting
requests, just return error immediately. It's will be used in upcoming
preallocate filter.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block.h |  9 -
 block/io.c| 11 ++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index b8f4e86e8d..877fda06a4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -67,8 +67,15 @@ typedef enum {
  * written to qiov parameter which may be NULL.
  */
 BDRV_REQ_PREFETCH  = 0x200,
+
+/*
+ * If we need to wait for other requests, just fail immediately. Used
+ * only together with BDRV_REQ_SERIALISING.
+ */
+BDRV_REQ_NO_WAIT = 0x400,
+
 /* Mask of valid flags */
-BDRV_REQ_MASK   = 0x3ff,
+BDRV_REQ_MASK   = 0x7ff,
 } BdrvRequestFlags;
 
 typedef struct BlockSizes {
diff --git a/block/io.c b/block/io.c
index dd28befb08..c93b1e98a3 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1912,9 +1912,18 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t 
offset, uint64_t bytes,
 assert(!(bs->open_flags & BDRV_O_INACTIVE));
 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
 assert(!(flags & ~BDRV_REQ_MASK));
+assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
 
 if (flags & BDRV_REQ_SERIALISING) {
-bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
+QEMU_LOCK_GUARD(&bs->reqs_lock);
+
+tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
+
+if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
+return -EBUSY;
+}
+
+bdrv_wait_serialising_requests_locked(req);
 } else {
 bdrv_wait_serialising_requests(req);
 }
-- 
2.21.3

[PULL 14/14] iotests: Test node/bitmap aliases during migration

2020-08-21 Thread Eric Blake

From: Max Reitz 

Signed-off-by: Max Reitz 
Message-Id: <20200820150725.68687-4-mre...@redhat.com>
Reviewed-by: Eric Blake 
Tested-by: Eric Blake 
[eblake: fold in python cleanups recommended by Vladimir]
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/300 | 593 +
 tests/qemu-iotests/300.out |   5 +
 tests/qemu-iotests/group   |   1 +
 3 files changed, 599 insertions(+)
 create mode 100755 tests/qemu-iotests/300
 create mode 100644 tests/qemu-iotests/300.out

diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
new file mode 100755
index ..5b75121b8496
--- /dev/null
+++ b/tests/qemu-iotests/300
@@ -0,0 +1,593 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Tests for dirty bitmaps migration with node aliases
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import random
+import re
+from typing import Dict, List, Optional, Union
+import iotests
+import qemu
+
+BlockBitmapMapping = List[Dict[str, Union[str, List[Dict[str, str]
+
+assert iotests.sock_dir is not None
+mig_sock = os.path.join(iotests.sock_dir, 'mig_sock')
+
+
+class TestDirtyBitmapMigration(iotests.QMPTestCase):
+src_node_name: str = ''
+dst_node_name: str = ''
+src_bmap_name: str = ''
+dst_bmap_name: str = ''
+
+def setUp(self) -> None:
+self.vm_a = iotests.VM(path_suffix='-a')
+self.vm_a.add_blockdev(f'node-name={self.src_node_name},'
+   'driver=null-co')
+self.vm_a.launch()
+
+self.vm_b = iotests.VM(path_suffix='-b')
+self.vm_b.add_blockdev(f'node-name={self.dst_node_name},'
+   'driver=null-co')
+self.vm_b.add_incoming(f'unix:{mig_sock}')
+self.vm_b.launch()
+
+result = self.vm_a.qmp('block-dirty-bitmap-add',
+   node=self.src_node_name,
+   name=self.src_bmap_name)
+self.assert_qmp(result, 'return', {})
+
+# Dirty some random megabytes
+for _ in range(9):
+mb_ofs = random.randrange(1024)
+self.vm_a.hmp_qemu_io(self.src_node_name, f'discard {mb_ofs}M 1M')
+
+result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
+   node=self.src_node_name,
+   name=self.src_bmap_name)
+self.bitmap_hash_reference = result['return']['sha256']
+
+caps = [{'capability': name, 'state': True}
+for name in ('dirty-bitmaps', 'events')]
+
+for vm in (self.vm_a, self.vm_b):
+result = vm.qmp('migrate-set-capabilities', capabilities=caps)
+self.assert_qmp(result, 'return', {})
+
+def tearDown(self) -> None:
+self.vm_a.shutdown()
+self.vm_b.shutdown()
+try:
+os.remove(mig_sock)
+except OSError:
+pass
+
+def check_bitmap(self, bitmap_name_valid: bool) -> None:
+result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
+   node=self.dst_node_name,
+   name=self.dst_bmap_name)
+if bitmap_name_valid:
+self.assert_qmp(result, 'return/sha256',
+self.bitmap_hash_reference)
+else:
+self.assert_qmp(result, 'error/desc',
+f"Dirty bitmap '{self.dst_bmap_name}' not found")
+
+def migrate(self, bitmap_name_valid: bool = True,
+migration_success: bool = True) -> None:
+result = self.vm_a.qmp('migrate', uri=f'unix:{mig_sock}')
+self.assert_qmp(result, 'return', {})
+
+with iotests.Timeout(5, 'Timeout waiting for migration to complete'):
+self.assertEqual(self.vm_a.wait_migration('postmigrate'),
+ migration_success)
+self.assertEqual(self.vm_b.wait_migration('running'),
+ migration_success)
+
+if migration_success:
+self.check_bitmap(bitmap_name_valid)
+
+def verify_dest_error(self, msg: Optional[str]) -> None:
+"""
+Check whether the given error message is present in vm_b's log.
+(vm_b is shut down to do so.)
+If @msg is None, check that there has not been any error.
+"""
+self.vm_b.shutdown()
+if

[PULL 13/14] iotests.py: Let wait_migration() return on failure

2020-08-21 Thread Eric Blake

From: Max Reitz 

Let wait_migration() return on failure (with the return value indicating
whether the migration was completed or has failed), so we can use it for
migrations that are expected to fail, too.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20200820150725.68687-3-mre...@redhat.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/iotests.py | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 717b5b652c45..e197c73ca501 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -729,16 +729,22 @@ class VM(qtest.QEMUQtestMachine):
 }
 ]))

-def wait_migration(self, expect_runstate):
+def wait_migration(self, expect_runstate: Optional[str]) -> bool:
 while True:
 event = self.event_wait('MIGRATION')
 log(event, filters=[filter_qmp_event])
-if event['data']['status'] == 'completed':
+if event['data']['status'] in ('completed', 'failed'):
 break
-# The event may occur in finish-migrate, so wait for the expected
-# post-migration runstate
-while self.qmp('query-status')['return']['status'] != expect_runstate:
-pass
+
+if event['data']['status'] == 'completed':
+# The event may occur in finish-migrate, so wait for the expected
+# post-migration runstate
+runstate = None
+while runstate != expect_runstate:
+runstate = self.qmp('query-status')['return']['status']
+return True
+else:
+return False

 def node_info(self, node_name):
 nodes = self.qmp('query-named-block-nodes')
-- 
2.28.0

[PULL 07/14] qcow2_format.py: Dump bitmap table serialized entries

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

Add bitmap table information to the QCOW2 metadata dump.

Bitmap name   bitmap-1
...
Bitmap table   typesize offset
0  serialized  6553610092544
1  all-zeroes  00
2  all-zeroes  00

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <1596742557-320265-8-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/303.out |  4 +++
 tests/qemu-iotests/qcow2_format.py | 50 ++
 2 files changed, 54 insertions(+)

diff --git a/tests/qemu-iotests/303.out b/tests/qemu-iotests/303.out
index 038ba93a87d7..70828e05f11f 100644
--- a/tests/qemu-iotests/303.out
+++ b/tests/qemu-iotests/303.out
@@ -66,6 +66,8 @@ type  1
 granularity_bits  15
 name_size 8
 extra_data_size   0
+Bitmap table   typesize offset
+0  serialized  6553610092544

 Bitmap name   bitmap-2
 bitmap_table_offset   0x9c
@@ -75,4 +77,6 @@ type  1
 granularity_bits  16
 name_size 8
 extra_data_size   0
+Bitmap table   typesize offset
+0  all-zeroes  00

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index ca0d3501e0a2..574249bc463c 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -175,6 +175,10 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 entry_raw_size = self.bitmap_dir_entry_raw_size()
 padding = ((entry_raw_size + 7) & ~7) - entry_raw_size
 fd.seek(padding, 1)
+self.bitmap_table = Qcow2BitmapTable(fd=fd,
+ offset=self.bitmap_table_offset,
+ nb_entries=self.bitmap_table_size,
+ cluster_size=self.cluster_size)

 def bitmap_dir_entry_raw_size(self):
 return struct.calcsize(self.fmt) + self.name_size + \
@@ -183,6 +187,52 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 def dump(self):
 print(f'{"Bitmap name":<25} {self.name}')
 super(Qcow2BitmapDirEntry, self).dump()
+self.bitmap_table.dump()
+
+
+class Qcow2BitmapTableEntry(Qcow2Struct):
+
+fields = (
+('u64',  '{}', 'entry'),
+)
+
+BME_TABLE_ENTRY_RESERVED_MASK = 0xff0001fe
+BME_TABLE_ENTRY_OFFSET_MASK = 0x00fffe00
+BME_TABLE_ENTRY_FLAG_ALL_ONES = 1
+
+def __init__(self, fd):
+super().__init__(fd=fd)
+self.reserved = self.entry & self.BME_TABLE_ENTRY_RESERVED_MASK
+self.offset = self.entry & self.BME_TABLE_ENTRY_OFFSET_MASK
+if self.offset:
+if self.entry & self.BME_TABLE_ENTRY_FLAG_ALL_ONES:
+self.type = 'invalid'
+else:
+self.type = 'serialized'
+elif self.entry & self.BME_TABLE_ENTRY_FLAG_ALL_ONES:
+self.type = 'all-ones'
+else:
+self.type = 'all-zeroes'
+
+
+class Qcow2BitmapTable:
+
+def __init__(self, fd, offset, nb_entries, cluster_size):
+self.cluster_size = cluster_size
+position = fd.tell()
+fd.seek(offset)
+self.entries = [Qcow2BitmapTableEntry(fd) for _ in range(nb_entries)]
+fd.seek(position)
+
+def dump(self):
+bitmap_table = enumerate(self.entries)
+print(f'{"Bitmap table":<14} {"type":<15} {"size":<12} {"offset"}')
+for i, entry in bitmap_table:
+if entry.type == 'serialized':
+size = self.cluster_size
+else:
+size = 0
+print(f'{i:<14} {entry.type:<15} {size:<12} {entry.offset}')


 QCOW2_EXT_MAGIC_BITMAPS = 0x23852875
-- 
2.28.0

[PULL 11/14] iotests: dump QCOW2 header in JSON in #303

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

Extend the test case #303 by dumping QCOW2 image metadata in JSON
format.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: 
<1596742557-320265-12-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/303 |  3 ++
 tests/qemu-iotests/303.out | 76 ++
 2 files changed, 79 insertions(+)

diff --git a/tests/qemu-iotests/303 b/tests/qemu-iotests/303
index e9accdc7bc92..6c2177448348 100755
--- a/tests/qemu-iotests/303
+++ b/tests/qemu-iotests/303
@@ -58,3 +58,6 @@ add_bitmap(1, 0, 6, False)
 add_bitmap(2, 6, 8, True)
 dump = ['qcow2.py', disk, 'dump-header']
 subprocess.run(dump)
+# Dump the metadata in JSON format
+dump.append('-j')
+subprocess.run(dump)
diff --git a/tests/qemu-iotests/303.out b/tests/qemu-iotests/303.out
index 70828e05f11f..7fa1edef0d89 100644
--- a/tests/qemu-iotests/303.out
+++ b/tests/qemu-iotests/303.out
@@ -80,3 +80,79 @@ extra_data_size   0
 Bitmap table   typesize offset
 0  all-zeroes  00

+{
+"magic": 1363560955,
+"version": 3,
+"backing_file_offset": 0,
+"backing_file_size": 0,
+"cluster_bits": 16,
+"size": 10485760,
+"crypt_method": 0,
+"l1_size": 1,
+"l1_table_offset": 196608,
+"refcount_table_offset": 65536,
+"refcount_table_clusters": 1,
+"nb_snapshots": 0,
+"snapshot_offset": 0,
+"incompatible_features": 0,
+"compatible_features": 0,
+"autoclear_features": 1,
+"refcount_order": 4,
+"header_length": 112
+}
+
+[
+{
+"name": "Feature table",
+"magic": 1745090647,
+"length": 336,
+"data_str": ""
+},
+{
+"name": "Bitmaps",
+"magic": 595929205,
+"length": 24,
+"data": {
+"nb_bitmaps": 2,
+"reserved32": 0,
+"bitmap_directory_size": 64,
+"bitmap_directory_offset": 10289152,
+"bitmap_directory": [
+{
+"name": "bitmap-1",
+"bitmap_table_offset": 10158080,
+"bitmap_table_size": 1,
+"flags": 2,
+"type": 1,
+"granularity_bits": 15,
+"name_size": 8,
+"extra_data_size": 0,
+"bitmap_table": [
+{
+"type": "serialized",
+"offset": 10092544,
+"reserved": 0
+}
+]
+},
+{
+"name": "bitmap-2",
+"bitmap_table_offset": 10223616,
+"bitmap_table_size": 1,
+"flags": 0,
+"type": 1,
+"granularity_bits": 16,
+"name_size": 8,
+"extra_data_size": 0,
+"bitmap_table": [
+{
+"type": "all-zeroes",
+"offset": 0,
+"reserved": 0
+}
+]
+}
+]
+}
+}
+]
-- 
2.28.0

[PATCH v5 00/10] preallocate filter

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

Hi all!

Here is a filter, which does preallocation on write.

In Virtuozzo we have to deal with some custom distributed storage
solution, where allocation is relatively expensive operation. We have to
workaround it in Qemu, so here is a new filter.

v5: rewrite patch 08 as Nir suggested

v4:
01-04: add r-bs
05: add coroutine_fn tag
06: use QEMU_LOCK_GUARD and fix reqs_lock leak
07: grammar
08-10: add r-b

Vladimir Sementsov-Ogievskiy (10):
  block: simplify comment to BDRV_REQ_SERIALISING
  block/io.c: drop assertion on double waiting for request serialisation
  block/io: split out bdrv_find_conflicting_request
  block/io: bdrv_wait_serialising_requests_locked: drop extra bs arg
  block: bdrv_mark_request_serialising: split non-waiting function
  block: introduce BDRV_REQ_NO_WAIT flag
  block: introduce preallocate filter
  iotests.py: add verify_o_direct helper
  iotests.py: add filter_img_check
  iotests: add 298 to test new preallocate filter driver

 docs/system/qemu-block-drivers.rst.inc |  26 +++
 qapi/block-core.json   |  20 +-
 include/block/block.h  |  20 +-
 include/block/block_int.h  |   3 +-
 block/file-posix.c |   2 +-
 block/io.c | 130 ++-
 block/preallocate.c| 291 +
 block/Makefile.objs|   1 +
 tests/qemu-iotests/298 |  50 +
 tests/qemu-iotests/298.out |   6 +
 tests/qemu-iotests/group   |   1 +
 tests/qemu-iotests/iotests.py  |  16 ++
 12 files changed, 498 insertions(+), 68 deletions(-)
 create mode 100644 block/preallocate.c
 create mode 100644 tests/qemu-iotests/298
 create mode 100644 tests/qemu-iotests/298.out

-- 
2.21.3

[PATCH v5 01/10] block: simplify comment to BDRV_REQ_SERIALISING

2020-08-21 Thread Vladimir Sementsov-Ogievskiy

1. BDRV_REQ_NO_SERIALISING doesn't exist already, don't mention it.

2. We are going to add one more user of BDRV_REQ_SERIALISING, so
   comment about backup becomes a bit confusing here. The use case in
   backup is documented in block/backup.c, so let's just drop
   duplication here.

3. The fact that BDRV_REQ_SERIALISING is only for write requests is
   omitted. Add a note.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
---
 include/block/block.h | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 6e36154061..b8f4e86e8d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -53,16 +53,7 @@ typedef enum {
  * content. */
 BDRV_REQ_WRITE_UNCHANGED= 0x40,
 
-/*
- * BDRV_REQ_SERIALISING forces request serialisation for writes.
- * It is used to ensure that writes to the backing file of a backup process
- * target cannot race with a read of the backup target that defers to the
- * backing file.
- *
- * Note, that BDRV_REQ_SERIALISING is _not_ opposite in meaning to
- * BDRV_REQ_NO_SERIALISING. A more descriptive name for the latter might be
- * _DO_NOT_WAIT_FOR_SERIALISING, except that is too long.
- */
+/* Forces request serialisation. Use only with write requests. */
 BDRV_REQ_SERIALISING= 0x80,
 
 /* Execute the request only if the operation can be offloaded or otherwise
-- 
2.21.3

[PULL 08/14] qcow2.py: Introduce '-j' key to dump in JSON format

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

Add the command key to the qcow2.py arguments list to dump QCOW2
metadata in JSON format. Here is the suggested way to do that. The
implementation of the dump in JSON format is in the patch that follows.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <1596742557-320265-9-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/qcow2.py| 18 ++
 tests/qemu-iotests/qcow2_format.py |  4 ++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/qcow2.py b/tests/qemu-iotests/qcow2.py
index 0910e6ac0705..77ca59cc663d 100755
--- a/tests/qemu-iotests/qcow2.py
+++ b/tests/qemu-iotests/qcow2.py
@@ -26,16 +26,19 @@ from qcow2_format import (
 )


+is_json = False
+
+
 def cmd_dump_header(fd):
 h = QcowHeader(fd)
-h.dump()
+h.dump(is_json)
 print()
-h.dump_extensions()
+h.dump_extensions(is_json)


 def cmd_dump_header_exts(fd):
 h = QcowHeader(fd)
-h.dump_extensions()
+h.dump_extensions(is_json)


 def cmd_set_header(fd, name, value):
@@ -151,11 +154,14 @@ def main(filename, cmd, args):


 def usage():
-print("Usage: %s   [, ...]" % sys.argv[0])
+print("Usage: %s   [, ...] [, ...]" % sys.argv[0])
 print("")
 print("Supported commands:")
 for name, handler, num_args, desc in cmds:
 print("%-20s - %s" % (name, desc))
+print("")
+print("Supported keys:")
+print("%-20s - %s" % ('-j', 'Dump in JSON format'))


 if __name__ == '__main__':
@@ -163,4 +169,8 @@ if __name__ == '__main__':
 usage()
 sys.exit(1)

+is_json = '-j' in sys.argv
+if is_json:
+sys.argv.remove('-j')
+
 main(sys.argv[1], sys.argv[2], sys.argv[3:])
diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 574249bc463c..de0adcbf9db0 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -109,7 +109,7 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 self.__dict__ = dict((field[2], values[i])
  for i, field in enumerate(self.fields))

-def dump(self):
+def dump(self, is_json=False):
 for f in self.fields:
 value = self.__dict__[f[2]]
 if isinstance(f[1], str):
@@ -408,7 +408,7 @@ class QcowHeader(Qcow2Struct):
 buf = buf[0:header_bytes-1]
 fd.write(buf)

-def dump_extensions(self):
+def dump_extensions(self, is_json=False):
 for ex in self.extensions:
 print('Header extension:')
 ex.dump()
-- 
2.28.0

[PULL 06/14] qcow2_format.py: pass cluster size to substructures

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

The cluster size of an image is the QcowHeader class member and may be
obtained by dependent extension structures such as Qcow2BitmapExt for
further bitmap table details print.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <1596742557-320265-7-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/qcow2_format.py | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 05a8aa98f72c..ca0d3501e0a2 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -129,19 +129,21 @@ class Qcow2BitmapExt(Qcow2Struct):
 ('u64', '{:#x}', 'bitmap_directory_offset')
 )

-def __init__(self, fd):
+def __init__(self, fd, cluster_size):
 super().__init__(fd=fd)
 tail = struct.calcsize(self.fmt) % 8
 if tail:
 fd.seek(8 - tail, 1)
 position = fd.tell()
+self.cluster_size = cluster_size
 self.read_bitmap_directory(fd)
 fd.seek(position)

 def read_bitmap_directory(self, fd):
 fd.seek(self.bitmap_directory_offset)
 self.bitmap_directory = \
-[Qcow2BitmapDirEntry(fd) for _ in range(self.nb_bitmaps)]
+[Qcow2BitmapDirEntry(fd, cluster_size=self.cluster_size)
+ for _ in range(self.nb_bitmaps)]

 def dump(self):
 super().dump()
@@ -162,8 +164,9 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 ('u32', '{}', 'extra_data_size')
 )

-def __init__(self, fd):
+def __init__(self, fd, cluster_size):
 super().__init__(fd=fd)
+self.cluster_size = cluster_size
 # Seek relative to the current position in the file
 fd.seek(self.extra_data_size, 1)
 bitmap_name = fd.read(self.name_size)
@@ -203,11 +206,13 @@ class QcowHeaderExtension(Qcow2Struct):
 # then padding to next multiply of 8
 )

-def __init__(self, magic=None, length=None, data=None, fd=None):
+def __init__(self, magic=None, length=None, data=None, fd=None,
+ cluster_size=None):
 """
 Support both loading from fd and creation from user data.
 For fd-based creation current position in a file will be used to read
 the data.
+The cluster_size value may be obtained by dependent structures.

 This should be somehow refactored and functionality should be moved to
 superclass (to allow creation of any qcow2 struct), but then, fields
@@ -230,7 +235,7 @@ class QcowHeaderExtension(Qcow2Struct):
 assert all(v is None for v in (magic, length, data))
 super().__init__(fd=fd)
 if self.magic == QCOW2_EXT_MAGIC_BITMAPS:
-self.obj = Qcow2BitmapExt(fd=fd)
+self.obj = Qcow2BitmapExt(fd=fd, cluster_size=cluster_size)
 self.data = None
 else:
 padded = (self.length + 7) & ~7
@@ -319,7 +324,7 @@ class QcowHeader(Qcow2Struct):
 end = self.cluster_size

 while fd.tell() < end:
-ext = QcowHeaderExtension(fd=fd)
+ext = QcowHeaderExtension(fd=fd, cluster_size=self.cluster_size)
 if ext.magic == 0:
 break
 else:
-- 
2.28.0

[PULL 01/14] iotests: add test for QCOW2 header dump

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

The simple script creates a QCOW2 image and fills it with some data.
Two bitmaps are created as well. Then the script reads the image header
with extensions from the disk by running the script qcow2.py and dumps
the information to the output. Other entities, such as snapshots, may
be added to the test later.

Suggested-by: Eric Blake 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <1596742557-320265-2-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/303 | 60 ++
 tests/qemu-iotests/303.out | 60 ++
 tests/qemu-iotests/group   |  1 +
 3 files changed, 121 insertions(+)
 create mode 100755 tests/qemu-iotests/303
 create mode 100644 tests/qemu-iotests/303.out

diff --git a/tests/qemu-iotests/303 b/tests/qemu-iotests/303
new file mode 100755
index ..e9accdc7bc92
--- /dev/null
+++ b/tests/qemu-iotests/303
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+#
+# Test for dumping of qcow2 image metadata
+#
+# Copyright (c) 2020 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import iotests
+import subprocess
+from iotests import qemu_img_create, qemu_io, file_path, log, filter_qemu_io
+
+iotests.script_initialize(supported_fmts=['qcow2'])
+
+disk = file_path('disk')
+chunk = 1024 * 1024
+
+
+def create_bitmap(bitmap_number, disabled):
+granularity = 1 << (14 + bitmap_number)
+bitmap_name = 'bitmap-' + str(bitmap_number)
+args = ['bitmap', '--add', '-g', f'{granularity}', '-f', iotests.imgfmt,
+disk, bitmap_name]
+if disabled:
+args.append('--disable')
+
+iotests.qemu_img_pipe(*args)
+
+
+def write_to_disk(offset, size):
+write = f'write {offset} {size}'
+log(qemu_io('-c', write, disk), filters=[filter_qemu_io])
+
+
+def add_bitmap(num, begin, end, disabled):
+log(f'Add bitmap {num}')
+create_bitmap(num, disabled)
+for i in range(begin, end):
+write_to_disk((i) * chunk, chunk)
+log('')
+
+
+qemu_img_create('-f', iotests.imgfmt, disk, '10M')
+
+add_bitmap(1, 0, 6, False)
+add_bitmap(2, 6, 8, True)
+dump = ['qcow2.py', disk, 'dump-header']
+subprocess.run(dump)
diff --git a/tests/qemu-iotests/303.out b/tests/qemu-iotests/303.out
new file mode 100644
index ..8d7973ccc201
--- /dev/null
+++ b/tests/qemu-iotests/303.out
@@ -0,0 +1,60 @@
+Add bitmap 1
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+wrote 1048576/1048576 bytes at offset 2097152
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+wrote 1048576/1048576 bytes at offset 3145728
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+wrote 1048576/1048576 bytes at offset 4194304
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+wrote 1048576/1048576 bytes at offset 5242880
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+
+Add bitmap 2
+wrote 1048576/1048576 bytes at offset 6291456
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+wrote 1048576/1048576 bytes at offset 7340032
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+
+magic 0x514649fb
+version   3
+backing_file_offset   0x0
+backing_file_size 0x0
+cluster_bits  16
+size  10485760
+crypt_method  0
+l1_size   1
+l1_table_offset   0x3
+refcount_table_offset 0x1
+refcount_table_clusters   1
+nb_snapshots  0
+snapshot_offset   0x0
+incompatible_features []
+compatible_features   []
+autoclear_features[0]
+refcount_order4
+header_length 112
+
+Header extension:
+magic 0x6803f857 (Feature table)
+length336
+data  
+
+Header extension:
+magic 0x23852875 (Bitmaps)
+length24
+nb_bitmaps2
+reserved320
+bitmap_directory_size 0x40
+bitmap_directory_offset   0x9d
+
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 7f76066640a6..ecff2621cddc 100644
---

[PULL 04/14] qcow2_format.py: dump bitmap flags in human readable way.

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

Introduce the class BitmapFlags that parses a bitmap flags mask.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <1596742557-320265-5-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/qcow2_format.py | 16 
 1 file changed, 16 insertions(+)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index d4a997453758..b4473442c9d4 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -40,6 +40,22 @@ class Flags64(Qcow2Field):
 return str(bits)


+class BitmapFlags(Qcow2Field):
+
+flags = {
+0x1: 'in-use',
+0x2: 'auto'
+}
+
+def __str__(self):
+bits = []
+for bit in range(64):
+flag = self.value & (1 << bit)
+if flag:
+bits.append(self.flags.get(flag, f'bit-{bit}'))
+return f'{self.value:#x} ({bits})'
+
+
 class Enum(Qcow2Field):

 def __str__(self):
-- 
2.28.0

[PULL 03/14] qcow2_format.py: change Qcow2BitmapExt initialization method

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

There are two ways to initialize a class derived from Qcow2Struct:
1. Pass a block of binary data to the constructor.
2. Pass the file descriptor to allow reading the file from constructor.
Let's change the Qcow2BitmapExt initialization method from 1 to 2 to
support a scattered reading in the initialization chain.
The implementation comes with the patch that follows.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <1596742557-320265-4-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/qcow2_format.py | 34 ++
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 2f3681bb5f7c..d4a997453758 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -113,6 +113,11 @@ class Qcow2BitmapExt(Qcow2Struct):
 ('u64', '{:#x}', 'bitmap_directory_offset')
 )

+def __init__(self, fd):
+super().__init__(fd=fd)
+tail = struct.calcsize(self.fmt) % 8
+if tail:
+fd.seek(8 - tail, 1)

 QCOW2_EXT_MAGIC_BITMAPS = 0x23852875

@@ -161,21 +166,24 @@ class QcowHeaderExtension(Qcow2Struct):
 else:
 assert all(v is None for v in (magic, length, data))
 super().__init__(fd=fd)
-padded = (self.length + 7) & ~7
-self.data = fd.read(padded)
-assert self.data is not None
+if self.magic == QCOW2_EXT_MAGIC_BITMAPS:
+self.obj = Qcow2BitmapExt(fd=fd)
+self.data = None
+else:
+padded = (self.length + 7) & ~7
+self.data = fd.read(padded)
+assert self.data is not None
+self.obj = None

-data_str = self.data[:self.length]
-if all(c in string.printable.encode('ascii') for c in data_str):
-data_str = f"'{ data_str.decode('ascii') }'"
-else:
-data_str = ''
-self.data_str = data_str
+if self.data is not None:
+data_str = self.data[:self.length]
+if all(c in string.printable.encode(
+'ascii') for c in data_str):
+data_str = f"'{ data_str.decode('ascii') }'"
+else:
+data_str = ''
+self.data_str = data_str

-if self.magic == QCOW2_EXT_MAGIC_BITMAPS:
-self.obj = Qcow2BitmapExt(data=self.data)
-else:
-self.obj = None

 def dump(self):
 super().dump()
-- 
2.28.0

[PULL 12/14] migration: Add block-bitmap-mapping parameter

2020-08-21 Thread Eric Blake

From: Max Reitz 

This migration parameter allows mapping block node names and bitmap
names to aliases for the purpose of block dirty bitmap migration.

This way, management tools can use different node and bitmap names on
the source and destination and pass the mapping of how bitmaps are to be
transferred to qemu (on the source, the destination, or even both with
arbitrary aliases in the migration stream).

While touching this code, fix a bug where bitmap names longer than 255
bytes would fail an assertion in qemu_put_counted_string().

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Max Reitz 
Message-Id: <20200820150725.68687-2-mre...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
---
 qapi/migration.json| 104 -
 migration/migration.h  |   3 +
 migration/block-dirty-bitmap.c | 410 -
 migration/migration.c  |  30 +++
 monitor/hmp-cmds.c |  30 +++
 5 files changed, 521 insertions(+), 56 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index ea53b23dca90..5f6b06172cab 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -508,6 +508,44 @@
   'data': [ 'none', 'zlib',
 { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }

+##
+# @BitmapMigrationBitmapAlias:
+#
+# @name: The name of the bitmap.
+#
+# @alias: An alias name for migration (for example the bitmap name on
+# the opposite site).
+#
+# Since: 5.2
+##
+{ 'struct': 'BitmapMigrationBitmapAlias',
+  'data': {
+  'name': 'str',
+  'alias': 'str'
+  } }
+
+##
+# @BitmapMigrationNodeAlias:
+#
+# Maps a block node name and the bitmaps it has to aliases for dirty
+# bitmap migration.
+#
+# @node-name: A block node name.
+#
+# @alias: An alias block node name for migration (for example the
+# node name on the opposite site).
+#
+# @bitmaps: Mappings for the bitmaps on this node.
+#
+# Since: 5.2
+##
+{ 'struct': 'BitmapMigrationNodeAlias',
+  'data': {
+  'node-name': 'str',
+  'alias': 'str',
+  'bitmaps': [ 'BitmapMigrationBitmapAlias' ]
+  } }
+
 ##
 # @MigrationParameter:
 #
@@ -642,6 +680,25 @@
 #  will consume more CPU.
 #  Defaults to 1. (Since 5.0)
 #
+# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+#  aliases for the purpose of dirty bitmap migration.  Such
+#  aliases may for example be the corresponding names on the
+#  opposite site.
+#  The mapping must be one-to-one, but not necessarily
+#  complete: On the source, unmapped bitmaps and all bitmaps
+#  on unmapped nodes will be ignored.  On the destination,
+#  encountering an unmapped alias in the incoming migration
+#  stream will result in a report, and all further bitmap
+#  migration data will then be discarded.
+#  Note that the destination does not know about bitmaps it
+#  does not receive, so there is no limitation or requirement
+#  regarding the number of bitmaps received, or how they are
+#  named, or on which nodes they are placed.
+#  By default (when this parameter has never been set), bitmap
+#  names are mapped to themselves.  Nodes are mapped to their
+#  block device name if there is one, and to their node name
+#  otherwise. (Since 5.2)
+#
 # Since: 2.4
 ##
 { 'enum': 'MigrationParameter',
@@ -656,7 +713,8 @@
'multifd-channels',
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
-   'multifd-zlib-level' ,'multifd-zstd-level' ] }
+   'multifd-zlib-level' ,'multifd-zstd-level',
+   'block-bitmap-mapping' ] }

 ##
 # @MigrateSetParameters:
@@ -782,6 +840,25 @@
 #  will consume more CPU.
 #  Defaults to 1. (Since 5.0)
 #
+# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+#  aliases for the purpose of dirty bitmap migration.  Such
+#  aliases may for example be the corresponding names on the
+#  opposite site.
+#  The mapping must be one-to-one, but not necessarily
+#  complete: On the source, unmapped bitmaps and all bitmaps
+#  on unmapped nodes will be ignored.  On the destination,
+#  encountering an unmapped alias in the incoming migration
+#  stream will result in a report, and all further bitmap
+#  migration data will then be discarded.
+#  Note that the destination does not know about bitmaps it
+#  does not receive, so there is no limitation or requirement
+#  regarding the number of bitmaps received, or how they are
+#  named, or on which nodes they are placed.
+#  By default (when this parameter has never been set), bitmap
+#  names are mapped to themselves.  Nodes are mapped to their
+#  block device name if there is one, and to their node name
+#

[PULL 10/14] qcow2_format.py: support dumping metadata in JSON format

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

Implementation of dumping QCOW2 image metadata.
The sample output:
{
"Header_extensions": [
{
"name": "Feature table",
"magic": 1745090647,
"length": 192,
"data_str": ""
},
{
"name": "Bitmaps",
"magic": 595929205,
"length": 24,
"data": {
"nb_bitmaps": 2,
"reserved32": 0,
"bitmap_directory_size": 64,
"bitmap_directory_offset": 1048576,
"bitmap_directory": [
{
"name": "bitmap-1",
"bitmap_table_offset": 589824,
"bitmap_table_size": 1,
"flags": 2,
"type": 1,
"granularity_bits": 15,
"name_size": 8,
"extra_data_size": 0,
"bitmap_table": [
{
"type": "serialized",
"offset": 655360
},
...

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: 
<1596742557-320265-11-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/qcow2_format.py | 17 +
 1 file changed, 17 insertions(+)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 5a298b2f1357..8adc9959e10b 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -19,6 +19,15 @@

 import struct
 import string
+import json
+
+
+class ComplexEncoder(json.JSONEncoder):
+def default(self, obj):
+if hasattr(obj, 'to_json'):
+return obj.to_json()
+else:
+return json.JSONEncoder.default(self, obj)


 class Qcow2Field:
@@ -110,6 +119,10 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
  for i, field in enumerate(self.fields))

 def dump(self, is_json=False):
+if is_json:
+print(json.dumps(self.to_json(), indent=4, cls=ComplexEncoder))
+return
+
 for f in self.fields:
 value = self.__dict__[f[2]]
 if isinstance(f[1], str):
@@ -445,6 +458,10 @@ class QcowHeader(Qcow2Struct):
 fd.write(buf)

 def dump_extensions(self, is_json=False):
+if is_json:
+print(json.dumps(self.extensions, indent=4, cls=ComplexEncoder))
+return
+
 for ex in self.extensions:
 print('Header extension:')
 ex.dump()
-- 
2.28.0

[PULL 05/14] qcow2_format.py: Dump bitmap directory information

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

Read and dump entries from the bitmap directory of QCOW2 image.

Header extension:
magic 0x23852875 (Bitmaps)
...
Bitmap name   bitmap-1
bitmap_table_offset   0xf
bitmap_table_size 1
flags 0x2 (['auto'])
type  1
granularity_bits  16
name_size 8
extra_data_size   0

Suggested-by: Kevin Wolf 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <1596742557-320265-6-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/303.out | 18 
 tests/qemu-iotests/qcow2_format.py | 47 ++
 2 files changed, 65 insertions(+)

diff --git a/tests/qemu-iotests/303.out b/tests/qemu-iotests/303.out
index 8d7973ccc201..038ba93a87d7 100644
--- a/tests/qemu-iotests/303.out
+++ b/tests/qemu-iotests/303.out
@@ -58,3 +58,21 @@ reserved320
 bitmap_directory_size 0x40
 bitmap_directory_offset   0x9d

+Bitmap name   bitmap-1
+bitmap_table_offset   0x9b
+bitmap_table_size 1
+flags 0x2 (['auto'])
+type  1
+granularity_bits  15
+name_size 8
+extra_data_size   0
+
+Bitmap name   bitmap-2
+bitmap_table_offset   0x9c
+bitmap_table_size 1
+flags 0x0 ([])
+type  1
+granularity_bits  16
+name_size 8
+extra_data_size   0
+
diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index b4473442c9d4..05a8aa98f72c 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -134,6 +134,53 @@ class Qcow2BitmapExt(Qcow2Struct):
 tail = struct.calcsize(self.fmt) % 8
 if tail:
 fd.seek(8 - tail, 1)
+position = fd.tell()
+self.read_bitmap_directory(fd)
+fd.seek(position)
+
+def read_bitmap_directory(self, fd):
+fd.seek(self.bitmap_directory_offset)
+self.bitmap_directory = \
+[Qcow2BitmapDirEntry(fd) for _ in range(self.nb_bitmaps)]
+
+def dump(self):
+super().dump()
+for entry in self.bitmap_directory:
+print()
+entry.dump()
+
+
+class Qcow2BitmapDirEntry(Qcow2Struct):
+
+fields = (
+('u64', '{:#x}', 'bitmap_table_offset'),
+('u32', '{}', 'bitmap_table_size'),
+('u32', BitmapFlags, 'flags'),
+('u8',  '{}', 'type'),
+('u8',  '{}', 'granularity_bits'),
+('u16', '{}', 'name_size'),
+('u32', '{}', 'extra_data_size')
+)
+
+def __init__(self, fd):
+super().__init__(fd=fd)
+# Seek relative to the current position in the file
+fd.seek(self.extra_data_size, 1)
+bitmap_name = fd.read(self.name_size)
+self.name = bitmap_name.decode('ascii')
+# Move position to the end of the entry in the directory
+entry_raw_size = self.bitmap_dir_entry_raw_size()
+padding = ((entry_raw_size + 7) & ~7) - entry_raw_size
+fd.seek(padding, 1)
+
+def bitmap_dir_entry_raw_size(self):
+return struct.calcsize(self.fmt) + self.name_size + \
+self.extra_data_size
+
+def dump(self):
+print(f'{"Bitmap name":<25} {self.name}')
+super(Qcow2BitmapDirEntry, self).dump()
+

 QCOW2_EXT_MAGIC_BITMAPS = 0x23852875

-- 
2.28.0

[PULL 02/14] qcow2_format.py: make printable data an extension class member

2020-08-21 Thread Eric Blake

From: Andrey Shinkevich 

Let us differ binary data type from string one for the extension data
variable and keep the string as the QcowHeaderExtension class member.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <1596742557-320265-3-git-send-email-andrey.shinkev...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/qcow2_format.py | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index cc432e7ae06c..2f3681bb5f7c 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -165,6 +165,13 @@ class QcowHeaderExtension(Qcow2Struct):
 self.data = fd.read(padded)
 assert self.data is not None

+data_str = self.data[:self.length]
+if all(c in string.printable.encode('ascii') for c in data_str):
+data_str = f"'{ data_str.decode('ascii') }'"
+else:
+data_str = ''
+self.data_str = data_str
+
 if self.magic == QCOW2_EXT_MAGIC_BITMAPS:
 self.obj = Qcow2BitmapExt(data=self.data)
 else:
@@ -174,12 +181,7 @@ class QcowHeaderExtension(Qcow2Struct):
 super().dump()

 if self.obj is None:
-data = self.data[:self.length]
-if all(c in string.printable.encode('ascii') for c in data):
-data = f"'{ data.decode('ascii') }'"
-else:
-data = ''
-print(f'{"data":<25} {data}')
+print(f'{"data":<25} {self.data_str}')
 else:
 self.obj.dump()

-- 
2.28.0

Re: [PATCH 1/2] fdmon-poll: reset npfd when upgrading to fdmon-epoll

2020-08-21 Thread Philippe Mathieu-Daudé

On 8/21/20 12:12 PM, Stefan Hajnoczi wrote:
> npfd keeps track of how many pollfds are currently being monitored. It
> must be reset to 0 when fdmon_poll_wait() returns.
> 
> When npfd reaches a treshold we switch to fdmon-epoll because it scales
> better.
> 
> This patch resets npfd in the case where we switch to fdmon-epoll.
> Forgetting to do so results in the following assertion failure:
> 
>   util/fdmon-poll.c:65: fdmon_poll_wait: Assertion `npfd == 0' failed.
> 
> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1869952
> Fixes: 1f050a4690f62a1e7dabc4f44141e9f762c3769f ("aio-posix: extract ppoll(2) 
> and epoll(7) fd monitoring")
> Signed-off-by: Stefan Hajnoczi 
> ---
>  util/fdmon-poll.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/util/fdmon-poll.c b/util/fdmon-poll.c
> index 488067b679..5fe3b47865 100644
> --- a/util/fdmon-poll.c
> +++ b/util/fdmon-poll.c
> @@ -73,6 +73,7 @@ static int fdmon_poll_wait(AioContext *ctx, AioHandlerList 
> *ready_list,
>  
>  /* epoll(7) is faster above a certain number of fds */
>  if (fdmon_epoll_try_upgrade(ctx, npfd)) {
> +npfd = 0; /* we won't need pollfds[], reset npfd */
>  return ctx->fdmon_ops->wait(ctx, ready_list, timeout);
>  }
>  
> 

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v5 03/15] block/nvme: Let nvme_create_queue_pair() fail gracefully

2020-08-21 Thread Stefano Garzarella

On Fri, Aug 21, 2020 at 03:36:47PM +0200, Philippe Mathieu-Daudé wrote:
> On 8/21/20 11:44 AM, Stefano Garzarella wrote:
> > On Thu, Aug 20, 2020 at 06:58:49PM +0200, Philippe Mathieu-DaudÃƒÂ© wrote:
> >> As nvme_create_queue_pair() is allowed to fail, replace the
> >> alloc() calls by try_alloc() to avoid aborting QEMU.
> >>
> >> Reviewed-by: Stefan Hajnoczi 
> >> Signed-off-by: Philippe Mathieu-DaudÃƒÂ© 
> >> ---
> >>  block/nvme.c | 12 ++--
> >>  1 file changed, 10 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/block/nvme.c b/block/nvme.c
> >> index 8c30a5fee28..e1893b4e792 100644
> >> --- a/block/nvme.c
> >> +++ b/block/nvme.c
> >> @@ -213,14 +213,22 @@ static NVMeQueuePair 
> >> *nvme_create_queue_pair(BlockDriverState *bs,
> >>  int i, r;
> >>  BDRVNVMeState *s = bs->opaque;
> >>  Error *local_err = NULL;
> >> -NVMeQueuePair *q = g_new0(NVMeQueuePair, 1);
> >> +NVMeQueuePair *q;
> >>  uint64_t prp_list_iova;
> >>  
> >> +q = g_try_new0(NVMeQueuePair, 1);
> >> +if (!q) {
> >> +return NULL;
> >> +}
> >> +q->prp_list_pages = qemu_try_blockalign0(bs,
> >> +  s->page_size * NVME_QUEUE_SIZE);
> > 
> > Here you use NVME_QUEUE_SIZE instead of NVME_NUM_REQS, is that an
> > intentional change?
> 
> No... Thanks for spotting this, I missed it because git didn't
> emit any warning while rebasing on top of "block/nvme: support nested
> aio_poll".
> This value has been changed in 1086e95da17 ("block/nvme: switch to a
> NVMeRequest freelist").
> 
> Good catch!
> I'll respin (after reviewing the 'nested aio_poll' changes).

Cool, with that fixed the patch LGTM:

Reviewed-by: Stefano Garzarella 

> 
> > 
> > Maybe is not an issue, sice NVME_QUEUE_SIZE is bigger than
> > NVME_NUM_REQS, but we should mention in the commit message.
> > 
> > Thanks,
> > Stefano
> > 
> >> +if (!q->prp_list_pages) {
> >> +goto fail;
> >> +}
> >>  qemu_mutex_init(&q->lock);
> >>  q->s = s;
> >>  q->index = idx;
> >>  qemu_co_queue_init(&q->free_req_queue);
> >> -q->prp_list_pages = qemu_blockalign0(bs, s->page_size * 
> >> NVME_NUM_REQS);
> >>  q->completion_bh = aio_bh_new(bdrv_get_aio_context(bs),
> >>nvme_process_completion_bh, q);
> >>  r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
> >> -- 
> >> 2.26.2
> >>
> >>
> > 
>

1 2 >

1 - 100 of 141 matches

Mail list logo