On 28/09/2023 21:20, Markus Armbruster wrote:
> qemu_rdma_dump_id() dumps RDMA device details to stdout.
>
> rdma_start_outgoing_migration() calls it via qemu_rdma_source_init()
> and qemu_rdma_resolve_host() to show source device details.
> rdma_start_incoming_migration() arranges its call via
> rdma_accept_incoming_migration() and qemu_rdma_accept() to show
> destination device details.
>
> Two issues:
>
> 1. rdma_start_outgoing_migration() can run in HMP context. The
> information should arguably go the monitor, not stdout.
>
> 2. ibv_query_port() failure is reported as error. Its callers remain
> unaware of this failure (qemu_rdma_dump_id() can't fail), so
> reporting this to the user as an error is problematic.
>
> Fixable, but the device detail dump is noise, except when
> troubleshooting. Tracing is a better fit. Similar function
> qemu_rdma_dump_id() was converted to tracing in commit
> 733252deb8b (Tracify migration/rdma.c).
>
> Convert qemu_rdma_dump_id(), too.
>
> While there, touch up qemu_rdma_dump_gid()'s outdated comment.
>
> Signed-off-by: Markus Armbruster
Reviewed-by: Li Zhijian
> ---
> migration/rdma.c | 23 ---
> migration/trace-events | 2 ++
> 2 files changed, 10 insertions(+), 15 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index dba0802fca..07aef9a071 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -734,38 +734,31 @@ static void rdma_delete_block(RDMAContext *rdma,
> RDMALocalBlock *block)
> }
>
> /*
> - * Put in the log file which RDMA device was opened and the details
> - * associated with that device.
> + * Trace RDMA device open, with device details.
>*/
> static void qemu_rdma_dump_id(const char *who, struct ibv_context *verbs)
> {
> struct ibv_port_attr port;
>
> if (ibv_query_port(verbs, 1, )) {
> -error_report("Failed to query port information");
> +trace_qemu_rdma_dump_id_failed(who);
> return;
> }
>
> -printf("%s RDMA Device opened: kernel name %s "
> - "uverbs device name %s, "
> - "infiniband_verbs class device path %s, "
> - "infiniband class device path %s, "
> - "transport: (%d) %s\n",
> -who,
> +trace_qemu_rdma_dump_id(who,
> verbs->device->name,
> verbs->device->dev_name,
> verbs->device->dev_path,
> verbs->device->ibdev_path,
> port.link_layer,
> -(port.link_layer == IBV_LINK_LAYER_INFINIBAND) ?
> "Infiniband" :
> - ((port.link_layer == IBV_LINK_LAYER_ETHERNET)
> -? "Ethernet" : "Unknown"));
> +port.link_layer == IBV_LINK_LAYER_INFINIBAND ? "Infiniband"
> +: port.link_layer == IBV_LINK_LAYER_ETHERNET ? "Ethernet"
> +: "Unknown");
> }
>
> /*
> - * Put in the log file the RDMA gid addressing information,
> - * useful for folks who have trouble understanding the
> - * RDMA device hierarchy in the kernel.
> + * Trace RDMA gid addressing information.
> + * Useful for understanding the RDMA device hierarchy in the kernel.
>*/
> static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id)
> {
> diff --git a/migration/trace-events b/migration/trace-events
> index d733107ec6..4ce16ae866 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -213,6 +213,8 @@ qemu_rdma_close(void) ""
> qemu_rdma_connect_pin_all_requested(void) ""
> qemu_rdma_connect_pin_all_outcome(bool pin) "%d"
> qemu_rdma_dest_init_trying(const char *host, const char *ip) "%s => %s"
> +qemu_rdma_dump_id_failed(const char *who) "%s RDMA Device opened, but can't
> query port information"
> +qemu_rdma_dump_id(const char *who, const char *name, const char *dev_name,
> const char *dev_path, const char *ibdev_path, int transport, const char
> *transport_name) "%s RDMA Device opened: kernel name %s uverbs device name
> %s, infiniband_verbs class device path %s, infiniband class device path %s,
> transport: (%d) %s"
> qemu_rdma_dump_gid(const char *who, const char *src, const char *dst) "%s
> Source GID: %s, Dest GID: %s"
> qemu_rdma_exchange_get_response_start(const char *desc) "CONTROL: %s
> receiving..."
> qemu_rdma_exchange_get_response_none(const char *desc, int type) "Surprise:
> got %s (%d)"