On 2015/4/3 0:26, Fam Zheng wrote: > On Thu, 04/02 17:21, Paolo Bonzini wrote: >> >> >> On 02/04/2015 17:16, Fam Zheng wrote: >>>>>>>>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and >>>>>>>>> the zero size ultimately is used to compute virtqueue_push's len >>>>>>>>> argument. Therefore, reads from virtio-blk devices did not >>>>>>>>> migrate their results correctly. (Writes were okay). >>>>>>> >>>>>>> Can't we move qemu_iovec_destroy to virtio_blk_free_request? >>>>> >>>>> You would still have to add more code to differentiate reads and >>>>> writes---I think. >>> Yeah, but the extra field will not be needed. >> >> Can you post an alternative patch? One small complication is that >> is_write is in mrb but not in mrb->reqs[x]. virtio_blk_rw_complete is >> already doing >> >> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); >> bool is_read = !(p & VIRTIO_BLK_T_OUT); >> >> but only in a slow path. > > OK, so it looks like a new field is the simplest way to achieve. > > There is another problem with your patch - read_size is not initialized in > non-RW paths like scsi and flush. > > I think the optimization for write is a separate thing, though. Shouldn't > below > patch already fix the migration issue? > > diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c > index 000c38d..ee6e198 100644 > --- a/hw/block/virtio-blk.c > +++ b/hw/block/virtio-blk.c > @@ -92,13 +92,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) > next = req->mr_next; > trace_virtio_blk_rw_complete(req, ret); > > - if (req->qiov.nalloc != -1) { > - /* If nalloc is != 1 req->qiov is a local copy of the original > - * external iovec. It was allocated in submit_merged_requests > - * to be able to merge requests. */ > - qemu_iovec_destroy(&req->qiov); > - } > - > if (ret) { > int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); > bool is_read = !(p & VIRTIO_BLK_T_OUT); > @@ -109,6 +102,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret) > > virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); > block_acct_done(blk_get_stats(req->dev->blk), &req->acct); > + > + if (req->qiov.nalloc != -1) { > + /* This means req->qiov is a local copy of the original external > + * iovec. It was allocated in virtio_blk_submit_multireq in order > + * to merge requests. */ > + qemu_iovec_destroy(&req->qiov); > + } > virtio_blk_free_request(req); > } > } > > > > . >
Can we allocate a new request for the merged requests? diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 000c38d..d39381f 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -92,11 +92,10 @@ static void virtio_blk_rw_complete(void *opaque, int ret) next = req->mr_next; trace_virtio_blk_rw_complete(req, ret); - if (req->qiov.nalloc != -1) { - /* If nalloc is != 1 req->qiov is a local copy of the original - * external iovec. It was allocated in submit_merged_requests - * to be able to merge requests. */ + if (req->in == NULL) { qemu_iovec_destroy(&req->qiov); + virtio_blk_free_request(req); + continue; } if (ret) { @@ -313,29 +312,33 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, int start, int num_reqs, int niov) { - QEMUIOVector *qiov = &mrb->reqs[start]->qiov; + VirtIOBlockReq *merged_request; + QEMUIOVector *qiov; int64_t sector_num = mrb->reqs[start]->sector_num; - int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE; + int nb_sectors = 0; bool is_write = mrb->is_write; if (num_reqs > 1) { int i; - struct iovec *tmp_iov = qiov->iov; - int tmp_niov = qiov->niov; - /* mrb->reqs[start]->qiov was initialized from external so we can't - * modifiy it here. We need to initialize it locally and then add the - * external iovecs. */ - qemu_iovec_init(qiov, niov) + merged_request = virtio_blk_alloc_request(mrb->reqs[start]->dev); - for (i = 0; i < tmp_niov; i++) { - qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); - } + /* use the 'in' field to judge whether the request is + a merged request */ + merged_request->in = NULL; + + qiov = &merged_request->qiov; + qemu_iovec_init(qiov, niov); - for (i = start + 1; i < start + num_reqs; i++) { + for (i = start; i < start + num_reqs; i++) { qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, mrb->reqs[i]->qiov.size); - mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; + if (i > start) { + mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; + } else { + merged_request->mr_next = mrb->reqs[i]; + } + nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE; } assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE); @@ -345,14 +348,18 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, block_acct_merge_done(blk_get_stats(blk), is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, num_reqs - 1); + } else { + merged_request = mrb->reqs[start]; + qiov = &mrb->reqs[start]->qiov; + nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE; } if (is_write) { blk_aio_writev(blk, sector_num, qiov, nb_sectors, - virtio_blk_rw_complete, mrb->reqs[start]); + virtio_blk_rw_complete, merged_request); } else { blk_aio_readv(blk, sector_num, qiov, nb_sectors, - virtio_blk_rw_complete, mrb->reqs[start]); + virtio_blk_rw_complete, merged_request); } } -- Bin Wu