Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread Jason Dillaman
On Thu, Feb 16, 2017 at 10:13 AM, Alexandre DERUMIER
 wrote:
> Hi, I would like to bench it with small 4k read/write.
>
> On the ceph side,do we need this PR ? :
> https://github.com/ceph/ceph/pull/13447

Yes, that is the correct PR for the client-side librbd changes. You
should be able to test it against Hammer/Jewel-release clusters.

-- 
Jason



Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread Jeff Cody
On Thu, Feb 16, 2017 at 05:00:02PM +0800, jaze...@gmail.com wrote:
> From: tianqing 
> 
> Rbd can do readv and writev directly, so wo do not need to transform
> iov to buf or vice versa any more.
> 
> Signed-off-by: tianqing 
> ---
>  block/rbd.c | 49 ++---
>  1 file changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/block/rbd.c b/block/rbd.c
> index a57b3e3..75ae1d6 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -47,7 +47,7 @@
>   */
>  
>  /* rbd_aio_discard added in 0.1.2 */
> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0)
>  #define LIBRBD_SUPPORTS_DISCARD
>  #else
>  #undef LIBRBD_SUPPORTS_DISCARD
> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB {
>  BlockAIOCB common;
>  int64_t ret;
>  QEMUIOVector *qiov;
> +/* Note:
> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h.
> + */

I understand that since this feature relies on new symbols in the library,
the ifdef can't be entirely avoided.  However, there are a few changes that
can make the code much more readable, and greatly reduce the use of ifdefs
through the code.


For instance, you could have helper constants:

#ifdef LIBRBD_SUPPORTS_IOVEC
#define LIBRBD_USE_IOVEC1
#else
#define LIBRBD_USE_IOVEC0
#endif

Now that can be used for a more natural code, with minimal ifdefs.  The
compiler optimizations should still remove the code, as well.


A helper function for memset will also keep things simpler:

static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
{
if (LIBRBD_USE_IOVEC) {
RBDAIOCB *acb = rcb->acb;
iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
   acb->qiov->size - offs);
} else {
memset(rcb->buf + offs, 0, rcb->size - offs);
}
}



> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  char *bounce;
> +#endif

This conditional can go away (see [1]).

>  RBDAIOCmd cmd;
>  int error;
>  struct BDRVRBDState *s;
> @@ -83,7 +88,9 @@ typedef struct RADOSCB {
>  RBDAIOCB *acb;
>  struct BDRVRBDState *s;
>  int64_t size;
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  char *buf;
> +#endif

As can this.

>  int64_t ret;
>  } RADOSCB;
>  
> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>  }
>  } else {
>  if (r < 0) {
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  memset(rcb->buf, 0, rcb->size);
> +#else
> +iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, 
> acb->qiov->size);
> +#endif

This just becomes:

qemu_rbd_memset(rcb, 0);

>  acb->ret = r;
>  acb->error = 1;
>  } else if (r < rcb->size) {
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  memset(rcb->buf + r, 0, rcb->size - r);
> +#else
> +iov_memset(acb->qiov->iov, acb->qiov->niov,
> +   r, 0, acb->qiov->size - r);
> +#endif
> +

Same here:

qemu_rbd_memset(rcb, r);


>  if (!acb->error) {
>  acb->ret = rcb->size;
>  }
> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>  
>  g_free(rcb);
>  
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  if (acb->cmd == RBD_AIO_READ) {
>  qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
>  }
>  qemu_vfree(acb->bounce);
> +#endif

[1]

Instead of a conditional, you can now use the constant with an if statement:

if (!LIBRBD_USE_IOVEC) {
  if (acb->cmd == RBD_AIO_READ) {
  qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
  }
  qemu_vfree(acb->bounce);
}


>  acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
>  
>  qemu_aio_unref(acb);
> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>  RBDAIOCB *acb;
>  RADOSCB *rcb = NULL;
>  rbd_completion_t c;
> -char *buf;
>  int r;
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> +char *buf = NULL;
> +#endif

This variable can completely go away with some code re-arrangement (below)

>  
>  BDRVRBDState *s = bs->opaque;
>
> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>  acb->cmd = cmd;
>  acb->qiov = qiov;
>  assert(!qiov || qiov->size == size);

[2]

Allocate rcb here

> +#ifndef LIBRBD_SUPPORTS_IOVEC
> +

You can use if(!LIBRBD_USE_IOVEC) here then.

>  if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
>  acb->bounce = NULL;
>  } else {
> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>  goto failed;
>  }
>  }
> -acb->ret = 0;
> -acb->error = 0;
> -acb->s = s;
> -
>  if (cmd == RBD_AIO_WRITE) {
>  qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
>  }
> -
>  buf = acb->bounce;

This can just become rcb->buf = acb->bounce

> +#endif

> +acb->ret = 0;
> +acb->error = 0;
> +acb->s = s;
>  
>  rcb = g_new(RADOSCB, 1);

Move this to [2]

Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread Eric Blake
On 02/16/2017 03:00 AM, jaze...@gmail.com wrote:
> From: tianqing 
> 
> Rbd can do readv and writev directly, so wo do not need to transform
> iov to buf or vice versa any more.

In general, we prefer new revisions of a patch series to be sent as a
new top-level thread, rather than buried in-reply-to the earlier
revision, as nesting can mess up some of the automated tooling that
tests whether a patch will introduce problems.

> 
> Signed-off-by: tianqing 
> ---
>  block/rbd.c | 49 ++---
>  1 file changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/block/rbd.c b/block/rbd.c
> index a57b3e3..75ae1d6 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -47,7 +47,7 @@
>   */
>  
>  /* rbd_aio_discard added in 0.1.2 */
> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0)

Does the comment need updating, too?

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread Alexandre DERUMIER
>>No yet. I just test on one qemu-kvm vm. It works fine. 
>>The performance may need more time. 
>>Any one can test on this patch if you do fast 

Hi, I would like to bench it with small 4k read/write.

On the ceph side,do we need this PR ? :
https://github.com/ceph/ceph/pull/13447


- Mail original -
De: "jazeltq" 
À: "Tiger Hu" 
Cc: "Josh Durgin" , "Jeff Cody" , 
"dillaman" , "Kevin Wolf" , 
mre...@redhat.com, qemu-bl...@nongnu.org, "qemu-devel" , 
"ceph-devel" , "tianqing" 
Envoyé: Jeudi 16 Février 2017 15:03:52
Objet: Re: [RFC v5] RBD: Add support readv,writev for rbd

No yet. I just test on one qemu-kvm vm. It works fine. 
The performance may need more time. 
Any one can test on this patch if you do fast 

2017-02-16 20:07 GMT+08:00 Tiger Hu : 
> Tianqing, 
> 
> Do we have any performance data for this patch? Thanks. 
> 
> Tiger 
>> 在 2017年2月16日,下午5:00,jaze...@gmail.com 写道: 
>> 
>> From: tianqing  
>> 
>> Rbd can do readv and writev directly, so wo do not need to transform 
>> iov to buf or vice versa any more. 
>> 
>> Signed-off-by: tianqing  
>> --- 
>> block/rbd.c | 49 ++--- 
>> 1 file changed, 42 insertions(+), 7 deletions(-) 
>> 
>> diff --git a/block/rbd.c b/block/rbd.c 
>> index a57b3e3..75ae1d6 100644 
>> --- a/block/rbd.c 
>> +++ b/block/rbd.c 
>> @@ -47,7 +47,7 @@ 
>> */ 
>> 
>> /* rbd_aio_discard added in 0.1.2 */ 
>> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) 
>> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) 
>> #define LIBRBD_SUPPORTS_DISCARD 
>> #else 
>> #undef LIBRBD_SUPPORTS_DISCARD 
>> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB { 
>> BlockAIOCB common; 
>> int64_t ret; 
>> QEMUIOVector *qiov; 
>> +/* Note: 
>> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h. 
>> + */ 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> char *bounce; 
>> +#endif 
>> RBDAIOCmd cmd; 
>> int error; 
>> struct BDRVRBDState *s; 
>> @@ -83,7 +88,9 @@ typedef struct RADOSCB { 
>> RBDAIOCB *acb; 
>> struct BDRVRBDState *s; 
>> int64_t size; 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> char *buf; 
>> +#endif 
>> int64_t ret; 
>> } RADOSCB; 
>> 
>> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) 
>> } 
>> } else { 
>> if (r < 0) { 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> memset(rcb->buf, 0, rcb->size); 
>> +#else 
>> + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); 
>> +#endif 
>> acb->ret = r; 
>> acb->error = 1; 
>> } else if (r < rcb->size) { 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> memset(rcb->buf + r, 0, rcb->size - r); 
>> +#else 
>> + iov_memset(acb->qiov->iov, acb->qiov->niov, 
>> + r, 0, acb->qiov->size - r); 
>> +#endif 
>> + 
>> if (!acb->error) { 
>> acb->ret = rcb->size; 
>> } 
>> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) 
>> 
>> g_free(rcb); 
>> 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> if (acb->cmd == RBD_AIO_READ) { 
>> qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); 
>> } 
>> qemu_vfree(acb->bounce); 
>> +#endif 
>> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); 
>> 
>> qemu_aio_unref(acb); 
>> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, 
>> RBDAIOCB *acb; 
>> RADOSCB *rcb = NULL; 
>> rbd_completion_t c; 
>> - char *buf; 
>> int r; 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> + char *buf = NULL; 
>> +#endif 
>> 
>> BDRVRBDState *s = bs->opaque; 
>> 
>> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, 
>> acb->cmd = cmd; 
>> acb->qiov = qiov; 
>> assert(!qiov || qiov->size == size); 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> + 
>> if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { 
>> acb->bounce = NULL; 
>> } else { 
>> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, 
>> goto failed; 
>> } 
>> } 
>> - acb->ret = 0; 
>> - acb->error = 0; 
>> - acb->s = s; 
>> - 
>> if (cmd == RBD_AIO_WRITE) { 
>> qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); 
>> } 
>> - 
>> buf = acb->bounce; 
>> +#endif 
>> + acb->ret = 0; 
>> + acb->error = 0; 
>> + acb->s = s; 
>> 
>> rcb = g_new(RADOSCB, 1); 
>> + 
>> rcb->acb = acb; 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> rcb->buf = buf; 
>> +#endif 
>> rcb->s = acb->s; 
>> rcb->size = size; 
>> r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); 
>> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, 
>> 
>> switch (cmd) { 
>> case RBD_AIO_WRITE: 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> r = rbd_aio_write(s->image, off, size, buf, c); 
>> +#else 
>> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); 
>> +#endif 
>> break; 
>> case RBD_AIO_READ: 
>> +#ifndef LIBRBD_SUPPORTS_IOVEC 
>> r = rbd_aio_read(s->image, off, size, buf, c); 
>> +#else 
>> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); 
>> +#endif 
>> break; 
>> case RBD_AIO_DISCARD: 
>> r = rbd_aio_discard_wrapper(s->image, off, size, c); 
>> @@ -719,7 +752,9 @@ failed_completion: 
>> rbd_aio_releas

Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread Tiger Hu
Tianqing,

Do we have any performance data for this patch? Thanks.

Tiger
> 在 2017年2月16日,下午5:00,jaze...@gmail.com 写道:
> 
> From: tianqing 
> 
> Rbd can do readv and writev directly, so wo do not need to transform
> iov to buf or vice versa any more.
> 
> Signed-off-by: tianqing 
> ---
> block/rbd.c | 49 ++---
> 1 file changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/block/rbd.c b/block/rbd.c
> index a57b3e3..75ae1d6 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -47,7 +47,7 @@
>  */
> 
> /* rbd_aio_discard added in 0.1.2 */
> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0)
> #define LIBRBD_SUPPORTS_DISCARD
> #else
> #undef LIBRBD_SUPPORTS_DISCARD
> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB {
> BlockAIOCB common;
> int64_t ret;
> QEMUIOVector *qiov;
> +/* Note:
> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h.
> + */
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> char *bounce;
> +#endif
> RBDAIOCmd cmd;
> int error;
> struct BDRVRBDState *s;
> @@ -83,7 +88,9 @@ typedef struct RADOSCB {
> RBDAIOCB *acb;
> struct BDRVRBDState *s;
> int64_t size;
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> char *buf;
> +#endif
> int64_t ret;
> } RADOSCB;
> 
> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
> }
> } else {
> if (r < 0) {
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> memset(rcb->buf, 0, rcb->size);
> +#else
> +iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, 
> acb->qiov->size);
> +#endif
> acb->ret = r;
> acb->error = 1;
> } else if (r < rcb->size) {
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> memset(rcb->buf + r, 0, rcb->size - r);
> +#else
> +iov_memset(acb->qiov->iov, acb->qiov->niov,
> +   r, 0, acb->qiov->size - r);
> +#endif
> +
> if (!acb->error) {
> acb->ret = rcb->size;
> }
> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
> 
> g_free(rcb);
> 
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> if (acb->cmd == RBD_AIO_READ) {
> qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
> }
> qemu_vfree(acb->bounce);
> +#endif
> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
> 
> qemu_aio_unref(acb);
> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> RBDAIOCB *acb;
> RADOSCB *rcb = NULL;
> rbd_completion_t c;
> -char *buf;
> int r;
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> +char *buf = NULL;
> +#endif
> 
> BDRVRBDState *s = bs->opaque;
> 
> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> acb->cmd = cmd;
> acb->qiov = qiov;
> assert(!qiov || qiov->size == size);
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> +
> if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
> acb->bounce = NULL;
> } else {
> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> goto failed;
> }
> }
> -acb->ret = 0;
> -acb->error = 0;
> -acb->s = s;
> -
> if (cmd == RBD_AIO_WRITE) {
> qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
> }
> -
> buf = acb->bounce;
> +#endif
> +acb->ret = 0;
> +acb->error = 0;
> +acb->s = s;
> 
> rcb = g_new(RADOSCB, 1);
> +
> rcb->acb = acb;
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> rcb->buf = buf;
> +#endif
> rcb->s = acb->s;
> rcb->size = size;
> r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
> 
> switch (cmd) {
> case RBD_AIO_WRITE:
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> r = rbd_aio_write(s->image, off, size, buf, c);
> +#else
> +r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> +#endif
> break;
> case RBD_AIO_READ:
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> r = rbd_aio_read(s->image, off, size, buf, c);
> +#else
> +r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> +#endif
> break;
> case RBD_AIO_DISCARD:
> r = rbd_aio_discard_wrapper(s->image, off, size, c);
> @@ -719,7 +752,9 @@ failed_completion:
> rbd_aio_release(c);
> failed:
> g_free(rcb);
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> qemu_vfree(acb->bounce);
> +#endif
> qemu_aio_unref(acb);
> return NULL;
> }
> -- 
> 2.10.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html




Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread Jaze Lee
2017-02-16 22:14 GMT+08:00 Jason Dillaman :
> On Thu, Feb 16, 2017 at 4:00 AM,   wrote:
>> From: tianqing 
>>
>> Rbd can do readv and writev directly, so wo do not need to transform
>> iov to buf or vice versa any more.
>>
>> Signed-off-by: tianqing 
>> ---
>>  block/rbd.c | 49 ++---
>>  1 file changed, 42 insertions(+), 7 deletions(-)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index a57b3e3..75ae1d6 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -47,7 +47,7 @@
>>   */
>>
>>  /* rbd_aio_discard added in 0.1.2 */
>> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
>> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0)
>>  #define LIBRBD_SUPPORTS_DISCARD
>>  #else
>>  #undef LIBRBD_SUPPORTS_DISCARD
>
> Do not change this -- discard support is available in very old
> versions of librbd not just the future Luminous release.

Ok. Thanks a lot.



>
>> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB {
>>  BlockAIOCB common;
>>  int64_t ret;
>>  QEMUIOVector *qiov;
>> +/* Note:
>> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h.
>> + */
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  char *bounce;
>> +#endif
>>  RBDAIOCmd cmd;
>>  int error;
>>  struct BDRVRBDState *s;
>> @@ -83,7 +88,9 @@ typedef struct RADOSCB {
>>  RBDAIOCB *acb;
>>  struct BDRVRBDState *s;
>>  int64_t size;
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  char *buf;
>> +#endif
>>  int64_t ret;
>>  } RADOSCB;
>>
>> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>>  }
>>  } else {
>>  if (r < 0) {
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  memset(rcb->buf, 0, rcb->size);
>> +#else
>> +iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, 
>> acb->qiov->size);
>> +#endif
>>  acb->ret = r;
>>  acb->error = 1;
>>  } else if (r < rcb->size) {
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  memset(rcb->buf + r, 0, rcb->size - r);
>> +#else
>> +iov_memset(acb->qiov->iov, acb->qiov->niov,
>> +   r, 0, acb->qiov->size - r);
>> +#endif
>> +
>>  if (!acb->error) {
>>  acb->ret = rcb->size;
>>  }
>> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>>
>>  g_free(rcb);
>>
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  if (acb->cmd == RBD_AIO_READ) {
>>  qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
>>  }
>>  qemu_vfree(acb->bounce);
>> +#endif
>>  acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
>>
>>  qemu_aio_unref(acb);
>> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>>  RBDAIOCB *acb;
>>  RADOSCB *rcb = NULL;
>>  rbd_completion_t c;
>> -char *buf;
>>  int r;
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> +char *buf = NULL;
>> +#endif
>>
>>  BDRVRBDState *s = bs->opaque;
>>
>> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>>  acb->cmd = cmd;
>>  acb->qiov = qiov;
>>  assert(!qiov || qiov->size == size);
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> +
>>  if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
>>  acb->bounce = NULL;
>>  } else {
>> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>>  goto failed;
>>  }
>>  }
>> -acb->ret = 0;
>> -acb->error = 0;
>> -acb->s = s;
>> -
>>  if (cmd == RBD_AIO_WRITE) {
>>  qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
>>  }
>> -
>>  buf = acb->bounce;
>> +#endif
>> +acb->ret = 0;
>> +acb->error = 0;
>> +acb->s = s;
>>
>>  rcb = g_new(RADOSCB, 1);
>> +
>>  rcb->acb = acb;
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  rcb->buf = buf;
>> +#endif
>>  rcb->s = acb->s;
>>  rcb->size = size;
>>  r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, 
>> &c);
>> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>>
>>  switch (cmd) {
>>  case RBD_AIO_WRITE:
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  r = rbd_aio_write(s->image, off, size, buf, c);
>> +#else
>> +r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
>> +#endif
>>  break;
>>  case RBD_AIO_READ:
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  r = rbd_aio_read(s->image, off, size, buf, c);
>> +#else
>> +r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
>> +#endif
>>  break;
>>  case RBD_AIO_DISCARD:
>>  r = rbd_aio_discard_wrapper(s->image, off, size, c);
>> @@ -719,7 +752,9 @@ failed_completion:
>>  rbd_aio_release(c);
>>  failed:
>>  g_free(rcb);
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>>  qemu_vfree(acb->bounce);
>> +#endif
>>  qemu_aio_unref(acb);
>>  return NULL;
>>  }
>> --
>> 2.10.2
>>
>
>
>
> --
> Jason



-- 
谦谦君子



Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread Jaze Lee
No yet. I just test on one qemu-kvm vm. It works fine.
The performance may need more time.
Any one can test on this patch if you do fast

2017-02-16 20:07 GMT+08:00 Tiger Hu :
> Tianqing,
>
> Do we have any performance data for this patch? Thanks.
>
> Tiger
>> 在 2017年2月16日,下午5:00,jaze...@gmail.com 写道:
>>
>> From: tianqing 
>>
>> Rbd can do readv and writev directly, so wo do not need to transform
>> iov to buf or vice versa any more.
>>
>> Signed-off-by: tianqing 
>> ---
>> block/rbd.c | 49 ++---
>> 1 file changed, 42 insertions(+), 7 deletions(-)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index a57b3e3..75ae1d6 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -47,7 +47,7 @@
>>  */
>>
>> /* rbd_aio_discard added in 0.1.2 */
>> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
>> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0)
>> #define LIBRBD_SUPPORTS_DISCARD
>> #else
>> #undef LIBRBD_SUPPORTS_DISCARD
>> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB {
>> BlockAIOCB common;
>> int64_t ret;
>> QEMUIOVector *qiov;
>> +/* Note:
>> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h.
>> + */
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> char *bounce;
>> +#endif
>> RBDAIOCmd cmd;
>> int error;
>> struct BDRVRBDState *s;
>> @@ -83,7 +88,9 @@ typedef struct RADOSCB {
>> RBDAIOCB *acb;
>> struct BDRVRBDState *s;
>> int64_t size;
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> char *buf;
>> +#endif
>> int64_t ret;
>> } RADOSCB;
>>
>> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>> }
>> } else {
>> if (r < 0) {
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> memset(rcb->buf, 0, rcb->size);
>> +#else
>> +iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, 
>> acb->qiov->size);
>> +#endif
>> acb->ret = r;
>> acb->error = 1;
>> } else if (r < rcb->size) {
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> memset(rcb->buf + r, 0, rcb->size - r);
>> +#else
>> +iov_memset(acb->qiov->iov, acb->qiov->niov,
>> +   r, 0, acb->qiov->size - r);
>> +#endif
>> +
>> if (!acb->error) {
>> acb->ret = rcb->size;
>> }
>> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>>
>> g_free(rcb);
>>
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> if (acb->cmd == RBD_AIO_READ) {
>> qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
>> }
>> qemu_vfree(acb->bounce);
>> +#endif
>> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
>>
>> qemu_aio_unref(acb);
>> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>> RBDAIOCB *acb;
>> RADOSCB *rcb = NULL;
>> rbd_completion_t c;
>> -char *buf;
>> int r;
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> +char *buf = NULL;
>> +#endif
>>
>> BDRVRBDState *s = bs->opaque;
>>
>> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>> acb->cmd = cmd;
>> acb->qiov = qiov;
>> assert(!qiov || qiov->size == size);
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> +
>> if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
>> acb->bounce = NULL;
>> } else {
>> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>> goto failed;
>> }
>> }
>> -acb->ret = 0;
>> -acb->error = 0;
>> -acb->s = s;
>> -
>> if (cmd == RBD_AIO_WRITE) {
>> qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
>> }
>> -
>> buf = acb->bounce;
>> +#endif
>> +acb->ret = 0;
>> +acb->error = 0;
>> +acb->s = s;
>>
>> rcb = g_new(RADOSCB, 1);
>> +
>> rcb->acb = acb;
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> rcb->buf = buf;
>> +#endif
>> rcb->s = acb->s;
>> rcb->size = size;
>> r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, 
>> &c);
>> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>>
>> switch (cmd) {
>> case RBD_AIO_WRITE:
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> r = rbd_aio_write(s->image, off, size, buf, c);
>> +#else
>> +r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
>> +#endif
>> break;
>> case RBD_AIO_READ:
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> r = rbd_aio_read(s->image, off, size, buf, c);
>> +#else
>> +r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
>> +#endif
>> break;
>> case RBD_AIO_DISCARD:
>> r = rbd_aio_discard_wrapper(s->image, off, size, c);
>> @@ -719,7 +752,9 @@ failed_completion:
>> rbd_aio_release(c);
>> failed:
>> g_free(rcb);
>> +#ifndef LIBRBD_SUPPORTS_IOVEC
>> qemu_vfree(acb->bounce);
>> +#endif
>> qemu_aio_unref(acb);
>> return NULL;
>> }
>> --
>> 2.10.2
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> the body of a mess

Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread Jason Dillaman
On Thu, Feb 16, 2017 at 4:00 AM,   wrote:
> From: tianqing 
>
> Rbd can do readv and writev directly, so wo do not need to transform
> iov to buf or vice versa any more.
>
> Signed-off-by: tianqing 
> ---
>  block/rbd.c | 49 ++---
>  1 file changed, 42 insertions(+), 7 deletions(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index a57b3e3..75ae1d6 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -47,7 +47,7 @@
>   */
>
>  /* rbd_aio_discard added in 0.1.2 */
> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0)
>  #define LIBRBD_SUPPORTS_DISCARD
>  #else
>  #undef LIBRBD_SUPPORTS_DISCARD

Do not change this -- discard support is available in very old
versions of librbd not just the future Luminous release.

> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB {
>  BlockAIOCB common;
>  int64_t ret;
>  QEMUIOVector *qiov;
> +/* Note:
> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h.
> + */
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  char *bounce;
> +#endif
>  RBDAIOCmd cmd;
>  int error;
>  struct BDRVRBDState *s;
> @@ -83,7 +88,9 @@ typedef struct RADOSCB {
>  RBDAIOCB *acb;
>  struct BDRVRBDState *s;
>  int64_t size;
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  char *buf;
> +#endif
>  int64_t ret;
>  } RADOSCB;
>
> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>  }
>  } else {
>  if (r < 0) {
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  memset(rcb->buf, 0, rcb->size);
> +#else
> +iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, 
> acb->qiov->size);
> +#endif
>  acb->ret = r;
>  acb->error = 1;
>  } else if (r < rcb->size) {
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  memset(rcb->buf + r, 0, rcb->size - r);
> +#else
> +iov_memset(acb->qiov->iov, acb->qiov->niov,
> +   r, 0, acb->qiov->size - r);
> +#endif
> +
>  if (!acb->error) {
>  acb->ret = rcb->size;
>  }
> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
>
>  g_free(rcb);
>
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  if (acb->cmd == RBD_AIO_READ) {
>  qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
>  }
>  qemu_vfree(acb->bounce);
> +#endif
>  acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
>
>  qemu_aio_unref(acb);
> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>  RBDAIOCB *acb;
>  RADOSCB *rcb = NULL;
>  rbd_completion_t c;
> -char *buf;
>  int r;
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> +char *buf = NULL;
> +#endif
>
>  BDRVRBDState *s = bs->opaque;
>
> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>  acb->cmd = cmd;
>  acb->qiov = qiov;
>  assert(!qiov || qiov->size == size);
> +#ifndef LIBRBD_SUPPORTS_IOVEC
> +
>  if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
>  acb->bounce = NULL;
>  } else {
> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>  goto failed;
>  }
>  }
> -acb->ret = 0;
> -acb->error = 0;
> -acb->s = s;
> -
>  if (cmd == RBD_AIO_WRITE) {
>  qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
>  }
> -
>  buf = acb->bounce;
> +#endif
> +acb->ret = 0;
> +acb->error = 0;
> +acb->s = s;
>
>  rcb = g_new(RADOSCB, 1);
> +
>  rcb->acb = acb;
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  rcb->buf = buf;
> +#endif
>  rcb->s = acb->s;
>  rcb->size = size;
>  r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, 
> &c);
> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
>
>  switch (cmd) {
>  case RBD_AIO_WRITE:
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  r = rbd_aio_write(s->image, off, size, buf, c);
> +#else
> +r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
> +#endif
>  break;
>  case RBD_AIO_READ:
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  r = rbd_aio_read(s->image, off, size, buf, c);
> +#else
> +r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
> +#endif
>  break;
>  case RBD_AIO_DISCARD:
>  r = rbd_aio_discard_wrapper(s->image, off, size, c);
> @@ -719,7 +752,9 @@ failed_completion:
>  rbd_aio_release(c);
>  failed:
>  g_free(rcb);
> +#ifndef LIBRBD_SUPPORTS_IOVEC
>  qemu_vfree(acb->bounce);
> +#endif
>  qemu_aio_unref(acb);
>  return NULL;
>  }
> --
> 2.10.2
>



-- 
Jason



[Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd

2017-02-16 Thread jazeltq
From: tianqing 

Rbd can do readv and writev directly, so wo do not need to transform
iov to buf or vice versa any more.

Signed-off-by: tianqing 
---
 block/rbd.c | 49 ++---
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index a57b3e3..75ae1d6 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -47,7 +47,7 @@
  */
 
 /* rbd_aio_discard added in 0.1.2 */
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
+#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0)
 #define LIBRBD_SUPPORTS_DISCARD
 #else
 #undef LIBRBD_SUPPORTS_DISCARD
@@ -73,7 +73,12 @@ typedef struct RBDAIOCB {
 BlockAIOCB common;
 int64_t ret;
 QEMUIOVector *qiov;
+/* Note:
+ * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h.
+ */
+#ifndef LIBRBD_SUPPORTS_IOVEC
 char *bounce;
+#endif
 RBDAIOCmd cmd;
 int error;
 struct BDRVRBDState *s;
@@ -83,7 +88,9 @@ typedef struct RADOSCB {
 RBDAIOCB *acb;
 struct BDRVRBDState *s;
 int64_t size;
+#ifndef LIBRBD_SUPPORTS_IOVEC
 char *buf;
+#endif
 int64_t ret;
 } RADOSCB;
 
@@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
 }
 } else {
 if (r < 0) {
+#ifndef LIBRBD_SUPPORTS_IOVEC
 memset(rcb->buf, 0, rcb->size);
+#else
+iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size);
+#endif
 acb->ret = r;
 acb->error = 1;
 } else if (r < rcb->size) {
+#ifndef LIBRBD_SUPPORTS_IOVEC
 memset(rcb->buf + r, 0, rcb->size - r);
+#else
+iov_memset(acb->qiov->iov, acb->qiov->niov,
+   r, 0, acb->qiov->size - r);
+#endif
+
 if (!acb->error) {
 acb->ret = rcb->size;
 }
@@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
 
 g_free(rcb);
 
+#ifndef LIBRBD_SUPPORTS_IOVEC
 if (acb->cmd == RBD_AIO_READ) {
 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
 }
 qemu_vfree(acb->bounce);
+#endif
 acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
 
 qemu_aio_unref(acb);
@@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
 RBDAIOCB *acb;
 RADOSCB *rcb = NULL;
 rbd_completion_t c;
-char *buf;
 int r;
+#ifndef LIBRBD_SUPPORTS_IOVEC
+char *buf = NULL;
+#endif
 
 BDRVRBDState *s = bs->opaque;
 
@@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
 acb->cmd = cmd;
 acb->qiov = qiov;
 assert(!qiov || qiov->size == size);
+#ifndef LIBRBD_SUPPORTS_IOVEC
+
 if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
 acb->bounce = NULL;
 } else {
@@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
 goto failed;
 }
 }
-acb->ret = 0;
-acb->error = 0;
-acb->s = s;
-
 if (cmd == RBD_AIO_WRITE) {
 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
 }
-
 buf = acb->bounce;
+#endif
+acb->ret = 0;
+acb->error = 0;
+acb->s = s;
 
 rcb = g_new(RADOSCB, 1);
+
 rcb->acb = acb;
+#ifndef LIBRBD_SUPPORTS_IOVEC
 rcb->buf = buf;
+#endif
 rcb->s = acb->s;
 rcb->size = size;
 r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
 
 switch (cmd) {
 case RBD_AIO_WRITE:
+#ifndef LIBRBD_SUPPORTS_IOVEC
 r = rbd_aio_write(s->image, off, size, buf, c);
+#else
+r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
+#endif
 break;
 case RBD_AIO_READ:
+#ifndef LIBRBD_SUPPORTS_IOVEC
 r = rbd_aio_read(s->image, off, size, buf, c);
+#else
+r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
+#endif
 break;
 case RBD_AIO_DISCARD:
 r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -719,7 +752,9 @@ failed_completion:
 rbd_aio_release(c);
 failed:
 g_free(rcb);
+#ifndef LIBRBD_SUPPORTS_IOVEC
 qemu_vfree(acb->bounce);
+#endif
 qemu_aio_unref(acb);
 return NULL;
 }
-- 
2.10.2