Add new message to send multiple writes to server in a single message. Prevents the outgoing queue from overflowing when a long latency operation is followed by a series of posted writes.
Originally-by: John Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: John Levon <john.le...@nutanix.com> --- hw/vfio-user/protocol.h | 21 ++++++++++ hw/vfio-user/proxy.h | 12 ++++++ hw/vfio-user/device.c | 40 +++++++++++++++++++ hw/vfio-user/proxy.c | 84 +++++++++++++++++++++++++++++++++++++++ hw/vfio-user/trace-events | 1 + 5 files changed, 158 insertions(+) diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h index 8f589faef4..f747e70e50 100644 --- a/hw/vfio-user/protocol.h +++ b/hw/vfio-user/protocol.h @@ -42,6 +42,7 @@ enum vfio_user_command { VFIO_USER_DMA_WRITE = 12, VFIO_USER_DEVICE_RESET = 13, VFIO_USER_DIRTY_PAGES = 14, + VFIO_USER_REGION_WRITE_MULTI = 15, VFIO_USER_MAX, }; @@ -75,6 +76,7 @@ typedef struct { #define VFIO_USER_CAP_PGSIZES "pgsizes" #define VFIO_USER_CAP_MAP_MAX "max_dma_maps" #define VFIO_USER_CAP_MIGR "migration" +#define VFIO_USER_CAP_MULTI "write_multiple" /* "migration" members */ #define VFIO_USER_CAP_PGSIZE "pgsize" @@ -221,4 +223,23 @@ typedef struct { char data[]; } VFIOUserBitmap; +/* + * VFIO_USER_REGION_WRITE_MULTI + */ +#define VFIO_USER_MULTI_DATA 8 +#define VFIO_USER_MULTI_MAX 200 + +typedef struct { + uint64_t offset; + uint32_t region; + uint32_t count; + char data[VFIO_USER_MULTI_DATA]; +} VFIOUserWROne; + +typedef struct { + VFIOUserHdr hdr; + uint64_t wr_cnt; + VFIOUserWROne wrs[VFIO_USER_MULTI_MAX]; +} VFIOUserWRMulti; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h index 22ed66c54f..ae09b9cc60 100644 --- a/hw/vfio-user/proxy.h +++ b/hw/vfio-user/proxy.h @@ -88,6 +88,8 @@ typedef struct VFIOUserProxy { VFIOUserMsg *last_nowait; VFIOUserMsg *part_recv; size_t recv_left; + VFIOUserWRMulti *wr_multi; + int num_outgoing; enum proxy_state state; } VFIOUserProxy; @@ -95,6 +97,11 @@ typedef struct VFIOUserProxy { #define VFIO_PROXY_CLIENT 0x1 #define VFIO_PROXY_FORCE_QUEUED 0x4 #define VFIO_PROXY_NO_POST 0x8 +#define VFIO_PROXY_USE_MULTI 0x16 + +/* coalescing high and low water marks for VFIOProxy num_outgoing */ +#define VFIO_USER_OUT_HIGH 1024 +#define VFIO_USER_OUT_LOW 128 typedef struct VFIODevice VFIODevice; @@ -122,4 +129,9 @@ void vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr, void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size); void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error); +void vfio_user_flush_multi(VFIOUserProxy *proxy); +void vfio_user_create_multi(VFIOUserProxy *proxy); +void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index, + off_t offset, uint32_t count, void *data); + #endif /* VFIO_USER_PROXY_H */ diff --git a/hw/vfio-user/device.c b/hw/vfio-user/device.c index eb2194c0eb..79375ddc96 100644 --- a/hw/vfio-user/device.c +++ b/hw/vfio-user/device.c @@ -10,6 +10,8 @@ */ #include "qemu/osdep.h" +#include "qemu/lockable.h" +#include "qemu/thread.h" #include "hw/vfio-user/device.h" #include "hw/vfio-user/trace.h" @@ -296,6 +298,7 @@ static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index, VFIOUserRegionRW *msgp = NULL; VFIOUserProxy *proxy = vbasedev->proxy; int size = sizeof(*msgp) + count; + bool can_multi; int flags = 0; int ret; @@ -311,6 +314,43 @@ static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index, flags |= VFIO_USER_NO_REPLY; } + /* write eligible to be in a WRITE_MULTI msg ? */ + can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post && + count <= VFIO_USER_MULTI_DATA; + + /* + * This should be a rare case, so first check without the lock, + * if we're wrong, vfio_send_queued() will flush any posted writes + * we missed here + */ + if (proxy->wr_multi != NULL || + (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) { + + /* + * re-check with lock + * + * if already building a WRITE_MULTI msg, + * add this one if possible else flush pending before + * sending the current one + * + * else if outgoing queue is over the highwater, + * start a new WRITE_MULTI message + */ + WITH_QEMU_LOCK_GUARD(&proxy->lock) { + if (proxy->wr_multi != NULL) { + if (can_multi) { + vfio_user_add_multi(proxy, index, off, count, data); + return count; + } + vfio_user_flush_multi(proxy); + } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) { + vfio_user_create_multi(proxy); + vfio_user_add_multi(proxy, index, off, count, data); + return count; + } + } + } + msgp = g_malloc0(size); vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); msgp->offset = off; diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c index 13f2407845..dbaa322952 100644 --- a/hw/vfio-user/proxy.c +++ b/hw/vfio-user/proxy.c @@ -16,12 +16,14 @@ #include "hw/vfio-user/proxy.h" #include "hw/vfio-user/trace.h" #include "qapi/error.h" +#include "qobject/qbool.h" #include "qobject/qdict.h" #include "qobject/qjson.h" #include "qobject/qnum.h" #include "qemu/error-report.h" #include "qemu/lockable.h" #include "qemu/main-loop.h" +#include "qemu/thread.h" #include "system/iothread.h" static IOThread *vfio_user_iothread; @@ -444,6 +446,11 @@ static void vfio_user_send(void *opaque) } qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, vfio_user_recv, NULL, NULL, proxy); + + /* queue empty - send any pending multi write msgs */ + if (proxy->wr_multi != NULL) { + vfio_user_flush_multi(proxy); + } } } @@ -464,6 +471,7 @@ static int vfio_user_send_one(VFIOUserProxy *proxy) } QTAILQ_REMOVE(&proxy->outgoing, msg, next); + proxy->num_outgoing--; if (msg->type == VFIO_MSG_ASYNC) { vfio_user_recycle(proxy, msg); } else { @@ -571,11 +579,18 @@ static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg) { int ret; + /* older coalesced writes go first */ + if (proxy->wr_multi != NULL && + ((msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REQUEST)) { + vfio_user_flush_multi(proxy); + } + /* * Unsent outgoing msgs - add to tail */ if (!QTAILQ_EMPTY(&proxy->outgoing)) { QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); + proxy->num_outgoing++; return 0; } @@ -589,6 +604,7 @@ static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg) } if (ret == QIO_CHANNEL_ERR_BLOCK) { QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); + proxy->num_outgoing = 1; qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, vfio_user_recv, proxy->ctx, vfio_user_send, proxy); @@ -1112,12 +1128,27 @@ static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp) return caps_parse(proxy, qdict, caps_migr, errp); } +static bool check_multi(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QBool *qb = qobject_to(QBool, qobj); + + if (qb == NULL) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MULTI); + return false; + } + if (qbool_get_bool(qb)) { + proxy->flags |= VFIO_PROXY_USE_MULTI; + } + return true; +} + static struct cap_entry caps_cap[] = { { VFIO_USER_CAP_MAX_FDS, check_max_fds }, { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, { VFIO_USER_CAP_PGSIZES, check_pgsizes }, { VFIO_USER_CAP_MAP_MAX, check_max_dma }, { VFIO_USER_CAP_MIGR, check_migr }, + { VFIO_USER_CAP_MULTI, check_multi }, { NULL } }; @@ -1176,6 +1207,7 @@ static GString *caps_json(void) qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); + qdict_put_bool(capdict, VFIO_USER_CAP_MULTI, true); qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); @@ -1228,3 +1260,55 @@ bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp) trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); return true; } + +void vfio_user_flush_multi(VFIOUserProxy *proxy) +{ + VFIOUserMsg *msg; + VFIOUserWRMulti *wm = proxy->wr_multi; + int ret; + + proxy->wr_multi = NULL; + + /* adjust size for actual # of writes */ + wm->hdr.size -= (VFIO_USER_MULTI_MAX - wm->wr_cnt) * sizeof(VFIOUserWROne); + + msg = vfio_user_getmsg(proxy, &wm->hdr, NULL); + msg->id = wm->hdr.id; + msg->rsize = 0; + msg->type = VFIO_MSG_ASYNC; + trace_vfio_user_wrmulti("flush", wm->wr_cnt); + + ret = vfio_user_send_queued(proxy, msg); + if (ret < 0) { + vfio_user_recycle(proxy, msg); + } +} + +void vfio_user_create_multi(VFIOUserProxy *proxy) +{ + VFIOUserWRMulti *wm; + + wm = g_malloc0(sizeof(*wm)); + vfio_user_request_msg(&wm->hdr, VFIO_USER_REGION_WRITE_MULTI, + sizeof(*wm), VFIO_USER_NO_REPLY); + proxy->wr_multi = wm; +} + +void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index, + off_t offset, uint32_t count, void *data) +{ + VFIOUserWRMulti *wm = proxy->wr_multi; + VFIOUserWROne *w1 = &wm->wrs[wm->wr_cnt]; + + w1->offset = offset; + w1->region = index; + w1->count = count; + memcpy(&w1->data, data, count); + + wm->wr_cnt++; + trace_vfio_user_wrmulti("add", wm->wr_cnt); + if (wm->wr_cnt == VFIO_USER_MULTI_MAX || + proxy->num_outgoing < VFIO_USER_OUT_LOW) { + vfio_user_flush_multi(proxy); + } +} diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events index 7ef98813b3..64fac9137f 100644 --- a/hw/vfio-user/trace-events +++ b/hw/vfio-user/trace-events @@ -11,6 +11,7 @@ vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d offset 0x%"PRIx64" count %d" vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index %d flags 0x%x count %d" vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t flags) " index %d start %d count %d flags 0x%x" +vfio_user_wrmulti(const char *s, uint64_t wr_cnt) " %s count 0x%"PRIx64 # container.c vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x async_ops %d" -- 2.43.0