Re: [PATCH v12 4/8] qmp: add QMP command x-query-virtio-status

2022-02-10 Thread Pankaj Gupta
'backend-features': 'uint64',
> +'num-vqs': 'int',
> +'status': 'uint8',
> +'isr': 'uint8',
> +'queue-sel': 'uint16',
> +'vm-running': 'bool',
> +'broken': 'bool',
> +'disabled': 'bool',
> +'use-started': 'bool',
> +'started': 'bool',
> +'start-on-kick': 'bool',
> +'disable-legacy-check': 'bool',
> +'bus-name': 'str',
> +'use-guest-notifier-mask': 'bool',
> +'*vhost-dev': 'VhostStatus' } }
> +
> +##
> +# @x-query-virtio-status:
> +#
> +# Poll for a comprehensive status of a given virtio device
> +#
> +# @path: Canonical QOM path of the VirtIODevice
> +#
> +# Features:
> +# @unstable: This command is meant for debugging.
> +#
> +# Returns: VirtioStatus of the virtio device
> +#
> +# Since: 7.0
> +#
> +# Examples:
> +#
> +# 1. Poll for the status of virtio-crypto (no vhost-crypto active)
> +#
> +# -> { "execute": "x-query-virtio-status",
> +#  "arguments": { "path": "/machine/peripheral/crypto0/virtio-backend" }
> +#}
> +# <- { "return": {
> +#"device-endian": "little",
> +#"bus-name": "",
> +#"disable-legacy-check": false,
> +#"name": "virtio-crypto",
> +#"started": true,
> +#"device-id": 20,
> +#"backend-features": 0,
> +#"start-on-kick": false,
> +#"isr": 1,
> +#"broken": false,
> +#"status": 15,
> +#"num-vqs": 2,
> +#"guest-features": 5100273664,
> +#"host-features": 6325010432,
> +#"use-guest-notifier-mask": true,
> +#"vm-running": true,
> +#"queue-sel": 1,
> +#"disabled": false,
> +#"vhost-started": false,
> +#"use-started": true
> +#  }
> +#}
> +#
> +# 2. Poll for the status of virtio-net (vhost-net is active)
> +#
> +# -> { "execute": "x-query-virtio-status",
> +#  "arguments": { "path": 
> "/machine/peripheral-anon/device[1]/virtio-backend" }
> +#}
> +# <- { "return": {
> +#"device-endian": "little",
> +#"bus-name": "",
> +#"disabled-legacy-check": false,
> +#"name": "virtio-net",
> +#"started": true,
> +#"device-id": 1,
> +#"vhost-dev": {
> +#   "n-tmp-sections": 4,
> +#   "n-mem-sections": 4,
> +#   "max-queues": 1,
> +#   "backend-cap": 2,
> +#   "log-size": 0,
> +#   "backend-features": 0,
> +#   "nvqs": 2,
> +#   "protocol-features": 0,
> +#   "vq-index": 0,
> +#   "log-enabled": false,
> +#   "acked-features": 5100306432,
> +#   "features": 13908344832
> +#},
> +#"backend-features": 6337593319,
> +#"start-on-kick": false,
> +#"isr": 1,
> +#"broken": false,
> +#"status": 15,
> +#"num-vqs": 3,
> +#"guest-features": 5111807911,
> +#"host-features": 6337593319,
> +#"use-guest-notifier-mask": true,
> +#"vm-running": true,
> +#"queue-sel": 2,
> +#"disabled": false,
> +#"vhost-started": true,
> +#"use-started": true
> +#  }
> +#}
> +#
> +##
> +
> +{ 'command': 'x-query-virtio-status',
> +  'data': { 'path': 'str' },
> +  'returns': 'VirtioStatus',
> +  'features': [ 'unstable' ] }

Reviewed-by: Pankaj Gupta 



Re: [PATCH v12 3/8] qmp: add QMP command x-query-virtio

2022-02-10 Thread Pankaj Gupta
 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
>
>  vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
> +
> +QTAILQ_INIT(_list);
>  }
>
>  bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
> @@ -3884,6 +3897,37 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice 
> *vdev)
>  return virtio_bus_ioeventfd_enabled(vbus);
>  }
>
> +VirtioInfoList *qmp_x_query_virtio(Error **errp)
> +{
> +VirtioInfoList *list = NULL;
> +VirtioInfoList *node;
> +VirtIODevice *vdev;
> +
> +QTAILQ_FOREACH(vdev, _list, next) {
> +DeviceState *dev = DEVICE(vdev);
> +Error *err = NULL;
> +QObject *obj = qmp_qom_get(dev->canonical_path, "realized", );
> +
> +if (err == NULL) {
> +GString *is_realized = qobject_to_json_pretty(obj, true);
> +/* virtio device is NOT realized, remove it from list */
> +if (!strncmp(is_realized->str, "false", 4)) {
> +QTAILQ_REMOVE(_list, vdev, next);
> +} else {
> +node = g_new0(VirtioInfoList, 1);
> +node->value = g_new(VirtioInfo, 1);
> +node->value->path = g_strdup(dev->canonical_path);
> +node->value->name = g_strdup(vdev->name);
> +QAPI_LIST_PREPEND(list, node->value);
> +}
> +   g_string_free(is_realized, true);
> +}
> +qobject_unref(obj);
> +}
> +
> +return list;
> +}
> +
>  static const TypeInfo virtio_device_info = {
>  .name = TYPE_VIRTIO_DEVICE,
>  .parent = TYPE_DEVICE,
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 90e6080..8f4e4c1 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -110,6 +110,7 @@ struct VirtIODevice
>  bool use_guest_notifier_mask;
>  AddressSpace *dma_as;
>  QLIST_HEAD(, VirtQueue) *vector_queues;
> +QTAILQ_ENTRY(VirtIODevice) next;
>  };
>
>  struct VirtioDeviceClass {
> diff --git a/qapi/meson.build b/qapi/meson.build
> index c0c49c1..e332f28 100644
> --- a/qapi/meson.build
> +++ b/qapi/meson.build
> @@ -48,6 +48,7 @@ qapi_all_modules = [
>'sockets',
>'trace',
>'transaction',
> +  'virtio',
>'yank',
>  ]
>  if have_system
> diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
> index 4912b97..1512ada 100644
> --- a/qapi/qapi-schema.json
> +++ b/qapi/qapi-schema.json
> @@ -93,3 +93,4 @@
>  { 'include': 'audio.json' }
>  { 'include': 'acpi.json' }
>  { 'include': 'pci.json' }
> +{ 'include': 'virtio.json' }
> diff --git a/qapi/virtio.json b/qapi/virtio.json
> new file mode 100644
> index 000..aee0e40
> --- /dev/null
> +++ b/qapi/virtio.json
> @@ -0,0 +1,68 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +#
> +
> +##
> +# = Virtio devices
> +##
> +
> +##
> +# @VirtioInfo:
> +#
> +# Basic information about a given VirtIODevice
> +#
> +# @path: The VirtIODevice's canonical QOM path
> +#
> +# @name: Name of the VirtIODevice
> +#
> +# Since: 7.0
> +#
> +##
> +{ 'struct': 'VirtioInfo',
> +  'data': { 'path': 'str',
> +'name': 'str' } }
> +
> +##
> +# @x-query-virtio:
> +#
> +# Returns a list of all realized VirtIODevices
> +#
> +# Features:
> +# @unstable: This command is meant for debugging.
> +#
> +# Returns: List of gathered VirtIODevices
> +#
> +# Since: 7.0
> +#
> +# Example:
> +#
> +# -> { "execute": "x-query-virtio" }
> +# <- { "return": [
> +#{
> +#"path": "/machine/peripheral-anon/device[4]/virtio-backend",
> +#"name": "virtio-input"
> +#},
> +#{
> +#"path": "/machine/peripheral/crypto0/virtio-backend",
> +#"name": "virtio-crypto"
> +#},
> +#{
> +#        "path": "/machine/peripheral-anon/device[2]/virtio-backend",
> +#"name": "virtio-scsi"
> +#},
> +#{
> +#"path": "/machine/peripheral-anon/device[1]/virtio-backend",
> +#"name": "virtio-net"
> +#},
> +#{
> +#"path": "/machine/peripheral-anon/device[0]/virtio-backend",
> +#"name": "virtio-serial"
> +#}
> +#  ]
> +#}
> +#
> +##
> +
> +{ 'command': 'x-query-virtio',
> +  'returns': [ 'VirtioInfo' ],
> +  'features': [ 'unstable' ] }
> diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
> index 7f103ea..fd00ee2 100644
> --- a/tests/qtest/qmp-cmd-test.c
> +++ b/tests/qtest/qmp-cmd-test.c
> @@ -103,6 +103,7 @@ static bool query_is_ignored(const char *cmd)
>  "query-gic-capabilities", /* arm */
>  /* Success depends on target-specific build configuration: */
>  "query-pci",  /* CONFIG_PCI */
> +"x-query-virtio", /* CONFIG_VIRTIO */
>  /* Success depends on launching SEV guest */
>  "query-sev-launch-measure",
>  /* Success depends on Host or Hypervisor SEV support */

Reviewed-by: Pankaj Gupta 

Re: [PATCH v12 1/8] virtio: drop name parameter for virtio_init()

2022-02-10 Thread Pankaj Gupta
uot;virtio-rng", VIRTIO_ID_RNG, 0);
> +virtio_init(vdev, VIRTIO_ID_RNG, 0);
>
>  vrng->vq = virtio_add_queue(vdev, 8, handle_input);
>  vrng->quota_remaining = vrng->conf.max_bytes;
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index aae72fb..734b7fb 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -132,6 +132,56 @@ struct VirtQueue
>  QLIST_ENTRY(VirtQueue) node;
>  };
>
> +const char *virtio_device_names[] = {
> +[VIRTIO_ID_NET] = "virtio-net",
> +[VIRTIO_ID_BLOCK] = "virtio-blk",
> +[VIRTIO_ID_CONSOLE] = "virtio-serial",
> +[VIRTIO_ID_RNG] = "virtio-rng",
> +[VIRTIO_ID_BALLOON] = "virtio-balloon",
> +[VIRTIO_ID_IOMEM] = "virtio-iomem",
> +[VIRTIO_ID_RPMSG] = "virtio-rpmsg",
> +[VIRTIO_ID_SCSI] = "virtio-scsi",
> +[VIRTIO_ID_9P] = "virtio-9p",
> +[VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
> +[VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
> +[VIRTIO_ID_CAIF] = "virtio-caif",
> +[VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
> +[VIRTIO_ID_GPU] = "virtio-gpu",
> +[VIRTIO_ID_CLOCK] = "virtio-clk",
> +[VIRTIO_ID_INPUT] = "virtio-input",
> +[VIRTIO_ID_VSOCK] = "vhost-vsock",
> +[VIRTIO_ID_CRYPTO] = "virtio-crypto",
> +[VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
> +[VIRTIO_ID_PSTORE] = "virtio-pstore",
> +[VIRTIO_ID_IOMMU] = "virtio-iommu",
> +[VIRTIO_ID_MEM] = "virtio-mem",
> +[VIRTIO_ID_SOUND] = "virtio-sound",
> +[VIRTIO_ID_FS] = "virtio-user-fs",
> +[VIRTIO_ID_PMEM] = "virtio-pmem",
> +[VIRTIO_ID_RPMB] = "virtio-rpmb",
> +[VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
> +[VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
> +[VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
> +[VIRTIO_ID_SCMI] = "virtio-scmi",
> +[VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
> +[VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
> +[VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
> +[VIRTIO_ID_CAN] = "virtio-can",
> +[VIRTIO_ID_DMABUF] = "virtio-dmabuf",
> +[VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
> +[VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
> +[VIRTIO_ID_BT] = "virtio-bluetooth",
> +[VIRTIO_ID_GPIO] = "virtio-gpio"
> +};
> +
> +static const char *virtio_id_to_name(uint16_t device_id)
> +{
> +assert(device_id < G_N_ELEMENTS(virtio_device_names));
> +const char *name = virtio_device_names[device_id];
> +assert(name != NULL);
> +return name;
> +}
> +
>  /* Called within call_rcu().  */
>  static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
>  {
> @@ -3209,8 +3259,7 @@ void virtio_instance_init_common(Object *proxy_obj, 
> void *data,
>  qdev_alias_all_properties(vdev, proxy_obj);
>  }
>
> -void virtio_init(VirtIODevice *vdev, const char *name,
> - uint16_t device_id, size_t config_size)
> +void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
>  {
>  BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
>  VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> @@ -3239,7 +3288,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
>  vdev->vq[i].host_notifier_enabled = false;
>  }
>
> -vdev->name = name;
> +vdev->name = virtio_id_to_name(device_id);
>  vdev->config_len = config_size;
>  if (vdev->config_len) {
>  vdev->config = g_malloc0(config_size);
> diff --git a/include/hw/virtio/vhost-vsock-common.h 
> b/include/hw/virtio/vhost-vsock-common.h
> index d8b565b..076b7ab 100644
> --- a/include/hw/virtio/vhost-vsock-common.h
> +++ b/include/hw/virtio/vhost-vsock-common.h
> @@ -44,7 +44,7 @@ int vhost_vsock_common_start(VirtIODevice *vdev);
>  void vhost_vsock_common_stop(VirtIODevice *vdev);
>  int vhost_vsock_common_pre_save(void *opaque);
>  int vhost_vsock_common_post_load(void *opaque, int version_id);
> -void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name);
> +void vhost_vsock_common_realize(VirtIODevice *vdev);
>  void vhost_vsock_common_unrealize(VirtIODevice *vdev);
>  uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t 
> features,
>   Error **errp);
> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
> index 2179b75..afff9e1 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -22,6 +22,7 @@
>  #include "sysemu/vhost-user-backend.h"
>
>  #include "standard-headers/linux/virtio_gpu.h"
> +#include "standard-headers/linux/virtio_ids.h"
>  #include "qom/object.h"
>
>  #define TYPE_VIRTIO_GPU_BASE "virtio-gpu-base"
> @@ -37,8 +38,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUGL, VIRTIO_GPU_GL)
>  #define TYPE_VHOST_USER_GPU "vhost-user-gpu"
>  OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU)
>
> -#define VIRTIO_ID_GPU 16
> -
>  struct virtio_gpu_simple_resource {
>  uint32_t resource_id;
>  uint32_t width;
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index f095637..2a0be70 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -165,8 +165,8 @@ struct VirtioDeviceClass {
>  void virtio_instance_init_common(Object *proxy_obj, void *data,
>   size_t vdev_size, const char *vdev_name);
>
> -void virtio_init(VirtIODevice *vdev, const char *name,
> - uint16_t device_id, size_t config_size);
> +void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size);
> +
>  void virtio_cleanup(VirtIODevice *vdev);
>
>  void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 
> 3);

Reviewed-by: Pankaj Gupta 

Re: [PATCH v4 4/5] virtio-blk: default num_queues to -smp N

2020-05-28 Thread Pankaj Gupta
> Automatically size the number of virtio-blk-pci request virtqueues to
> match the number of vCPUs.  Other transports continue to default to 1
> request virtqueue.
>
> A 1:1 virtqueue:vCPU mapping ensures that completion interrupts are
> handled on the same vCPU that submitted the request.  No IPI is
> necessary to complete an I/O request and performance is improved.
>
> Performance improves from 78k to 104k IOPS on a 32 vCPU guest with 101
> virtio-blk-pci devices (ioengine=libaio, iodepth=1, bs=4k, rw=randread
> with NVMe storage).
>
> Signed-off-by: Stefan Hajnoczi 
> Reviewed-by: Cornelia Huck 
> ---
>  include/hw/virtio/virtio-blk.h | 2 ++
>  hw/block/virtio-blk.c  | 6 +-
>  hw/core/machine.c  | 1 +
>  hw/virtio/virtio-blk-pci.c | 7 ++-
>  4 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> index 1e62f869b2..4e5e903f4a 100644
> --- a/include/hw/virtio/virtio-blk.h
> +++ b/include/hw/virtio/virtio-blk.h
> @@ -30,6 +30,8 @@ struct virtio_blk_inhdr
>  unsigned char status;
>  };
>
> +#define VIRTIO_BLK_AUTO_NUM_QUEUES UINT16_MAX
> +
>  struct VirtIOBlkConf
>  {
>  BlockConf conf;
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index f5f6fc925e..3c36b38255 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -1135,6 +1135,9 @@ static void virtio_blk_device_realize(DeviceState *dev, 
> Error **errp)
>  error_setg(errp, "Device needs media, but drive is empty");
>  return;
>  }
> +if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
> +conf->num_queues = 1;
> +}
>  if (!conf->num_queues) {
>  error_setg(errp, "num-queues property must be larger than 0");
>  return;
> @@ -1274,7 +1277,8 @@ static Property virtio_blk_properties[] = {
>  #endif
>  DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
>  true),
> -DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1),
> +DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues,
> +   VIRTIO_BLK_AUTO_NUM_QUEUES),
>  DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256),
>  DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, 
> true),
>  DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD,
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index df7664bc8d..4aba3bdd3c 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -29,6 +29,7 @@
>  #include "migration/vmstate.h"
>
>  GlobalProperty hw_compat_5_0[] = {
> +{ "virtio-blk-device", "num-queues", "1"},
>  { "virtio-scsi-device", "num_queues", "1"},
>  { "vhost-scsi", "num_queues", "1"},
>  { "vhost-user-scsi", "num_queues", "1"},
> diff --git a/hw/virtio/virtio-blk-pci.c b/hw/virtio/virtio-blk-pci.c
> index 28838fa958..2f0ede3863 100644
> --- a/hw/virtio/virtio-blk-pci.c
> +++ b/hw/virtio/virtio-blk-pci.c
> @@ -50,9 +50,14 @@ static void virtio_blk_pci_realize(VirtIOPCIProxy 
> *vpci_dev, Error **errp)
>  {
>  VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev);
>  DeviceState *vdev = DEVICE(>vdev);
> +VirtIOBlkConf *conf = >vdev.conf;
> +
> +if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
> +conf->num_queues = virtio_pci_optimal_num_queues(0);
> +}
>
>  if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
> -vpci_dev->nvectors = dev->vdev.conf.num_queues + 1;
> +vpci_dev->nvectors = conf->num_queues + 1;
>  }
>
>  qdev_set_parent_bus(vdev, BUS(_dev->bus));

Looks good to me.
Reviewed-by: Pankaj Gupta 



Re: [PATCH v4 2/5] virtio-scsi: introduce a constant for fixed virtqueues

2020-05-28 Thread Pankaj Gupta
> The event and control virtqueues are always present, regardless of the
> multi-queue configuration.  Define a constant so that virtqueue number
> calculations are easier to read.
>
> Signed-off-by: Stefan Hajnoczi 
> Reviewed-by: Cornelia Huck 
> ---
>  include/hw/virtio/virtio-scsi.h | 3 +++
>  hw/scsi/vhost-user-scsi.c   | 2 +-
>  hw/scsi/virtio-scsi.c   | 7 ---
>  hw/virtio/vhost-scsi-pci.c  | 3 ++-
>  hw/virtio/vhost-user-scsi-pci.c | 3 ++-
>  hw/virtio/virtio-scsi-pci.c | 3 ++-
>  6 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
> index 24e768909d..9f293bcb80 100644
> --- a/include/hw/virtio/virtio-scsi.h
> +++ b/include/hw/virtio/virtio-scsi.h
> @@ -36,6 +36,9 @@
>  #define VIRTIO_SCSI_MAX_TARGET  255
>  #define VIRTIO_SCSI_MAX_LUN 16383
>
> +/* Number of virtqueues that are always present */
> +#define VIRTIO_SCSI_VQ_NUM_FIXED2
> +
>  typedef struct virtio_scsi_cmd_req VirtIOSCSICmdReq;
>  typedef struct virtio_scsi_cmd_resp VirtIOSCSICmdResp;
>  typedef struct virtio_scsi_ctrl_tmf_req VirtIOSCSICtrlTMFReq;
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index cbb5d97599..f8bd158c31 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -115,7 +115,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, 
> Error **errp)
>  goto free_virtio;
>  }
>
> -vsc->dev.nvqs = 2 + vs->conf.num_queues;
> +vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
>  vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs);
>  vsc->dev.vq_index = 0;
>  vsc->dev.backend_features = 0;
> diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
> index 9b72094a61..f3d60683bd 100644
> --- a/hw/scsi/virtio-scsi.c
> +++ b/hw/scsi/virtio-scsi.c
> @@ -191,7 +191,7 @@ static void virtio_scsi_save_request(QEMUFile *f, 
> SCSIRequest *sreq)
>  VirtIOSCSIReq *req = sreq->hba_private;
>  VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(req->dev);
>  VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
> -uint32_t n = virtio_get_queue_index(req->vq) - 2;
> +uint32_t n = virtio_get_queue_index(req->vq) - VIRTIO_SCSI_VQ_NUM_FIXED;
>
>  assert(n < vs->conf.num_queues);
>  qemu_put_be32s(f, );
> @@ -892,10 +892,11 @@ void virtio_scsi_common_realize(DeviceState *dev,
>  sizeof(VirtIOSCSIConfig));
>
>  if (s->conf.num_queues == 0 ||
> -s->conf.num_queues > VIRTIO_QUEUE_MAX - 2) {
> +s->conf.num_queues > VIRTIO_QUEUE_MAX - 
> VIRTIO_SCSI_VQ_NUM_FIXED) {
>  error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
>   "must be a positive integer less than %d.",
> -   s->conf.num_queues, VIRTIO_QUEUE_MAX - 2);
> +   s->conf.num_queues,
> +   VIRTIO_QUEUE_MAX - VIRTIO_SCSI_VQ_NUM_FIXED);
>  virtio_cleanup(vdev);
>  return;
>  }
> diff --git a/hw/virtio/vhost-scsi-pci.c b/hw/virtio/vhost-scsi-pci.c
> index 5da6bb6449..2b25db9a3c 100644
> --- a/hw/virtio/vhost-scsi-pci.c
> +++ b/hw/virtio/vhost-scsi-pci.c
> @@ -50,7 +50,8 @@ static void vhost_scsi_pci_realize(VirtIOPCIProxy 
> *vpci_dev, Error **errp)
>  VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
>
>  if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
> -vpci_dev->nvectors = vs->conf.num_queues + 3;
> +vpci_dev->nvectors = vs->conf.num_queues +
> + VIRTIO_SCSI_VQ_NUM_FIXED + 1;
>  }
>
>  qdev_set_parent_bus(vdev, BUS(_dev->bus));
> diff --git a/hw/virtio/vhost-user-scsi-pci.c b/hw/virtio/vhost-user-scsi-pci.c
> index 6f3375fe55..80710ab170 100644
> --- a/hw/virtio/vhost-user-scsi-pci.c
> +++ b/hw/virtio/vhost-user-scsi-pci.c
> @@ -56,7 +56,8 @@ static void vhost_user_scsi_pci_realize(VirtIOPCIProxy 
> *vpci_dev, Error **errp)
>  VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
>
>  if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
> -vpci_dev->nvectors = vs->conf.num_queues + 3;
> +vpci_dev->nvectors = vs->conf.num_queues +
> + VIRTIO_SCSI_VQ_NUM_FIXED + 1;
>  }
>
>  qdev_set_parent_bus(vdev, BUS(_dev->bus));
> diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c
> index e82e7e5680..c52d68053a 100644
> --- a/hw/virtio/virtio-scsi-pci.c
> +++ b/hw/virtio/virtio-scsi-pci.c
> @@ -51,7 +51,8 @@ static void virtio_scsi_pci_realize(VirtIOPCIProxy 
> *vpci_dev, Error **errp)
>  char *bus_name;
>
>  if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
> -vpci_dev->nvectors = vs->conf.num_queues + 3;
> +vpci_dev->nvectors = vs->conf.num_queues +
> + VIRTIO_SCSI_VQ_NUM_FIXED + 1;
>  }
>
>  /*
> --
Better readability with no change in logic. Code looks good to me.

Reviewed-by: Pankaj Gupta 



Re: [PATCH RESEND v3 0/4] virtio-pci: enable blk and scsi multi-queue by default

2020-03-30 Thread Pankaj Gupta
For best case its really a good idea to configure default number of
queues to the number of CPU's.

For the series:
Reviewed-by: Pankaj Gupta 



Re: [PATCH v4 1/2] virtio-blk: delete vqs on the error path in realize()

2020-03-30 Thread Pankaj Gupta
Reviewed-by: Pankaj Gupta 



Re: [Qemu-block] [Qemu-devel] [PATCH v2 0/1] Don't obey the kernel block device max transfer len / max segments for raw block devices

2019-07-12 Thread Pankaj Gupta


> Linux block devices, even in O_DIRECT mode don't have any user visible
> limit on transfer size / number of segments, which underlying kernel block
> device can have.
> The kernel block layer takes care of enforcing these limits by splitting the
> bios.
> 
> By limiting the transfer sizes, we force qemu to do the splitting itself
> which
> introduces various overheads.
> It is especially visible in nbd server, where the low max transfer size of
> the
> underlying device forces us to advertise this over NBD, thus increasing the
> traffic overhead in case of image conversion which benefits from large
> blocks.
> 
> More information can be found here:
> https://bugzilla.redhat.com/show_bug.cgi?id=1647104
> 
> Tested this with qemu-img convert over nbd and natively and to my 
> surprise,Reviewed-by: Stefano Garzarella 
> even native IO performance improved a bit.
> 
> (The device on which it was tested is Intel Optane DC P4800X,
> which has 128k max transfer size reported by the kernel)
> 
> The benchmark:
> 
> Images were created using:
> 
> Sparse image:  qemu-img create -f qcow2 /dev/nvme0n1p3 1G / 10G / 100G
> Allocated image: qemu-img create -f qcow2 /dev/nvme0n1p3 -o
> preallocation=metadata  1G / 10G / 100G
> 
> The test was:
> 
>  echo "convert native:"
>  rm -rf /dev/shm/disk.img
>  time qemu-img convert -p -f qcow2 -O raw -T none $FILE /dev/shm/disk.img >
>  /dev/zero
> 
>  echo "convert via nbd:"
>  qemu-nbd -k /tmp/nbd.sock -v  -f qcow2 $FILE -x export --cache=none
>  --aio=native --fork
>  rm -rf /dev/shm/disk.img
>  time qemu-img convert -p -f raw -O raw
>  nbd:unix:/tmp/nbd.sock:exportname=export /dev/shm/disk.img > /dev/zero
> 
> The results:
> 
> =
> 1G sparse image:
>  native:
>   before: 0.027s
>   after: 0.027s
>  nbd:
>   before: 0.287s
>   after: 0.035s
> 
> =
> 100G sparse image:
>  native:
>   before: 0.028s
>   after: 0.028s
>  nbd:
>   before: 23.796s
>   after: 0.109s
> 
> =
> 1G preallocated image:
>  native:
>before: 0.454s
>after: 0.427s
>  nbd:
>before: 0.649s
>after: 0.546s
> 
> The block limits of max transfer size/max segment size are retained
> for the SCSI passthrough because in this case the kernel passes the userspace
> request
> directly to the kernel scsi driver, bypassing the block layer, and thus there
> is no code to split
> such requests.
> 
> Fam, since you was the original author of the code that added
> these limits, could you share your opinion on that?
> What was the reason besides SCSI passthrough?
> 
> V2:
> 
> *  Manually tested to not break the scsi passthrough with a nested VM
> *  As Eric suggested, refactored the area around the fstat.
> *  Spelling/grammar fixes
> 
> Best regards,
>   Maxim Levitsky
> 
> Maxim Levitsky (1):
>   raw-posix.c - use max transfer length / max segement count only for
> SCSI passthrough
> 
>  block/file-posix.c | 54 --
>  1 file changed, 28 insertions(+), 26 deletions(-)
> 
> --

I am not familiar with SCSI passthrough special case. But overall looks good to 
me.

Feel free to add:
Reviewed-by: Pankaj Gupta 

> 2.17.2
> 
> 
> 



Re: [Qemu-block] [Qemu-devel] [PATCH v4 0/6] virtio-blk: add DISCARD and WRITE_ZEROES features

2019-02-08 Thread Pankaj Gupta


> 
> This series adds the support of DISCARD and WRITE_ZEROES commands
> and extends the virtio-blk-test to test WRITE_ZEROES command when
> the feature is enabled.
> 
> v4:
> - fixed error with mingw compiler in patch 4
>   gcc and clang want %lu, but mingw wants %llu for BDRV_REQUEST_MAX_SECTORS.
>   Since is less than INT_MAX, I casted it to integer and I used %d in the
>   format string of error_setg. (mingw now is happy)
> 
> v3:
> - rebased on master (I removed Based-on tag since the new virtio headers from
>   linux v5.0-rc1 are merged)
> - added patch 2 to add host_features field (as in virtio-net) [Michael]
> - fixed patch 3 (previously 2/5) using the new host_features field
> - fixed patch 4 (previously 3/5) following the Stefan's comments:
> - fixed name of functions and fields
> - used vdev and s pointers
> - removed "wz-may-unmap" property
> - split "dwz-max-sectors" in two properties
> 
> v2:
> - added patch 1 to use virtio_blk_handle_rw_error() with discard operation
> - added patch 2 to make those new features machine-type dependent (thanks
> David)
> - fixed patch 3 (previously patch 1/2) adding more checks, block_acct_start()
> for WRITE_ZEROES requests, and configurable parameters to
> initialize the limits (max_sectors, wzeroes_may_unmap).
> (thanks Stefan)
> I moved in a new function the code to handle a single
> segment,
> in order to simplify the support of multiple segments in the
> future.
> - added patch 4 to change the assert on data_size following the discussion
> with
> Thomas, Changpeng, Michael, and Stefan (thanks all)
> - fixed patch 5 (previously patch 2/2) using local dwz_hdr variable instead
> of
> dynamic allocation (thanks Thomas)
> 
> Thanks,
> Stefano
> 
> Stefano Garzarella (6):
>   virtio-blk: add acct_failed param to virtio_blk_handle_rw_error()
>   virtio-blk: add host_features field in VirtIOBlock
>   virtio-blk: add "discard" and "write-zeroes" properties
>   virtio-blk: add DISCARD and WRITE_ZEROES features
>   tests/virtio-blk: change assert on data_size in virtio_blk_request()
>   tests/virtio-blk: add test for WRITE_ZEROES command
> 
>  hw/block/virtio-blk.c  | 214 +++--
>  hw/core/machine.c  |   2 +
>  include/hw/virtio/virtio-blk.h |   5 +-
>  tests/virtio-blk-test.c    |  75 +++-
>  4 files changed, 282 insertions(+), 14 deletions(-)
> 
> --
> 2.20.1

Don't know all of the details of qemu block related 
stuff. But overall patch series look good to me.
 
Acked-by: Pankaj Gupta 

> 
> 
> 



Re: [Qemu-block] [Qemu-devel] Some question about savem/qcow2 incremental snapshot

2018-06-08 Thread Pankaj Gupta
> Hi Junyan,

Just want to add in same email thread there is discussion by Stefan & Kevin 
describing 
ways of handling NVDIMM similar to block device with the pros & cons of the 
approach. 

In light of that we have to consider this as memory but support of qcow2 is 
required 
for snapshot, handling block errors and other advance features. 

Hope this helps. 

Thanks, 
Pankaj 

> > I think nvdimm kind memory can really save the content(no matter real or
> > emulated). But I think it is still
> 

> > memory, as I understand, its data should be stored in the qcow2 image or
> > some
> > external snapshot data
> 

> > image, so that we can copy this qcow2 image to other place and restore the
> > same environment.
> 

> Emulated NVDIMM is just a file backed mmaped region in guest address space.
> So, whatever IO
> operations guest does its done directly in memory pages and synced to backup
> file.

> For this usecase, normally 'raw' image format is supported. If we have to
> support qcow2 format, we have to
> mmap different chunks for ranges of host virtual memory regions into guest
> address space so that whenever guest does IO
> into some offset host/qemu knows the corresponding location. Also, there is
> need to manage large number of these
> small chunks.

> > Qcow2 image contain all vm state, disk data and memory data, so I think
> > nvdimm's data should also be
> 

> > stored in this qcow2 image.
> 

> That will not work for emulated NVDIMM. I guest same is for pass-through or
> actual NVDIMM.

> > I am really a new guy in vmm field and do not know the usage of qcow2 with
> > DAX. So far as I know, DAX
> 

> > is a kernel FS option to let the page mapping bypass all block device logic
> > and can improve performance.
> 

> > But qcow2 is a user space file format used by qemu to emulate disks(Am I
> > right?), so I have no idea about
> 

> > that.
> 

> FYI DAX is direct access, whenever guest does file operations to DAX capable
> NVDIMM device, it bypasses
> regular block device logic and uses iomap calls to direct IO into the NVDIMM
> device. But qcow2 disk emulation
> part is host thing and it should be able to provide support, if it provides a
> mmaped area as guest physical or host
> virtual address.

> Thanks,
> Pankaj

> > Thanks
> 

> > Junyan
> 

> > From: Qemu-devel  on
> > behalf
> > of Pankaj Gupta 
> 
> > Sent: Friday, June 8, 2018 7:59:24 AM
> 
> > To: Junyan He
> 
> > Cc: Kevin Wolf; qemu block; qemu-de...@nongnu.org; Stefan Hajnoczi; Max
> > Reitz
> 
> > Subject: Re: [Qemu-devel] [Qemu-block] Some question about savem/qcow2
> > incremental snapshot
> 

> > Hi Junyan,
> 

> > AFAICU you are trying to utilize qcow2 capabilities to do incremental
> 
> > snapshot. As I understand NVDIMM device (being it real or emulated), its
> 
> > contents are always be backed up in backing device.
> 

> > Now, the question comes to take a snapshot at some point in time. You are
> 
> > trying to achieve this with qcow2 format (not checked code yet), I have
> > below
> 
> > queries:
> 

> > - Are you implementing this feature for both actual DAX device pass-through
> 
> > as well as emulated DAX?
> 
> > - Are you using additional qcow2 disk for storing/taking snapshots? How we
> > are
> 
> > planning to use this feature?
> 

> > Reason I asked this question is if we concentrate on integrating qcow2
> 
> > with DAX, we will have a full fledged solution for most of the use-cases.
> 

> > Thanks,
> 
> > Pankaj
> 

> > >
> 
> > > Dear all:
> 
> > >
> 
> > > I just switched from graphic/media field to virtualization at the end of
> > > the
> 
> > > last year,
> 
> > > so I am sorry that though I have already try my best but I still feel a
> 
> > > little dizzy
> 
> > > about your previous discussion about NVDimm via block layer:)
> 
> > > In today's qemu, we use the SaveVMHandlers functions to handle both
> > > snapshot
> 
> > > and migration.
> 
> > > So for nvdimm kind memory, its migration and snapshot use the same way as
> > > the
> 
> > > ram(savevm_ram_handlers). But the difference is the size of nvdimm may be
> 
> > > huge, and the load
> 
> > > and store speed is slower. According to my usage, when I use 256G nvdimm
> > > as
> 
> > > memory backend,
> 
> > > it may take more than 5 minutes to complete one snapshot saving, and
> > > after
&g

Re: [Qemu-block] [Qemu-devel] Some question about savem/qcow2 incremental snapshot

2018-06-08 Thread Pankaj Gupta
Hi Junyan, 

> I think nvdimm kind memory can really save the content(no matter real or
> emulated). But I think it is still

> memory, as I understand, its data should be stored in the qcow2 image or some
> external snapshot data

> image, so that we can copy this qcow2 image to other place and restore the
> same environment.

Emulated NVDIMM is just a file backed mmaped region in guest address space. So, 
whatever IO 
operations guest does its done directly in memory pages and synced to backup 
file. 

For this usecase, normally 'raw' image format is supported. If we have to 
support qcow2 format, we have to 
mmap different chunks for ranges of host virtual memory regions into guest 
address space so that whenever guest does IO 
into some offset host/qemu knows the corresponding location. Also, there is 
need to manage large number of these 
small chunks. 

> Qcow2 image contain all vm state, disk data and memory data, so I think
> nvdimm's data should also be

> stored in this qcow2 image.

That will not work for emulated NVDIMM. I guest same is for pass-through or 
actual NVDIMM. 

> I am really a new guy in vmm field and do not know the usage of qcow2 with
> DAX. So far as I know, DAX

> is a kernel FS option to let the page mapping bypass all block device logic
> and can improve performance.

> But qcow2 is a user space file format used by qemu to emulate disks(Am I
> right?), so I have no idea about

> that.

FYI DAX is direct access, whenever guest does file operations to DAX capable 
NVDIMM device, it bypasses 
regular block device logic and uses iomap calls to direct IO into the NVDIMM 
device. But qcow2 disk emulation 
part is host thing and it should be able to provide support, if it provides a 
mmaped area as guest physical or host 
virtual address. 

Thanks, 
Pankaj 

> Thanks

> Junyan

> From: Qemu-devel  on behalf
> of Pankaj Gupta 
> Sent: Friday, June 8, 2018 7:59:24 AM
> To: Junyan He
> Cc: Kevin Wolf; qemu block; qemu-de...@nongnu.org; Stefan Hajnoczi; Max Reitz
> Subject: Re: [Qemu-devel] [Qemu-block] Some question about savem/qcow2
> incremental snapshot

> Hi Junyan,

> AFAICU you are trying to utilize qcow2 capabilities to do incremental
> snapshot. As I understand NVDIMM device (being it real or emulated), its
> contents are always be backed up in backing device.

> Now, the question comes to take a snapshot at some point in time. You are
> trying to achieve this with qcow2 format (not checked code yet), I have below
> queries:

> - Are you implementing this feature for both actual DAX device pass-through
> as well as emulated DAX?
> - Are you using additional qcow2 disk for storing/taking snapshots? How we
> are
> planning to use this feature?

> Reason I asked this question is if we concentrate on integrating qcow2
> with DAX, we will have a full fledged solution for most of the use-cases.

> Thanks,
> Pankaj

> >
> > Dear all:
> >
> > I just switched from graphic/media field to virtualization at the end of
> > the
> > last year,
> > so I am sorry that though I have already try my best but I still feel a
> > little dizzy
> > about your previous discussion about NVDimm via block layer:)
> > In today's qemu, we use the SaveVMHandlers functions to handle both
> > snapshot
> > and migration.
> > So for nvdimm kind memory, its migration and snapshot use the same way as
> > the
> > ram(savevm_ram_handlers). But the difference is the size of nvdimm may be
> > huge, and the load
> > and store speed is slower. According to my usage, when I use 256G nvdimm as
> > memory backend,
> > it may take more than 5 minutes to complete one snapshot saving, and after
> > saving the qcow2
> > image is bigger than 50G. For migration, this may not be a problem because
> > we
> > do not need
> > extra disk space and the guest is not paused when in migration process. But
> > for snapshot,
> > we need to pause the VM and the user experience is bad, and we got concerns
> > about that.
> > I posted this question in Jan this year but failed to get enough reply.
> > Then
> > I sent a RFC patch
> > set in Mar, basic idea is using the dependency snapshot and dirty log trace
> > in kernel to
> > optimize this.
> >
> > https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg04530.html
> >
> > I use the simple way to handle this,
> > 1. Separate the nvdimm region from ram when do snapshot.
> > 2. If the first time, we dump all the nvdimm data the same as ram, and
> > enable
> > dirty log trace
> > for nvdimm kind region.
> > 3. If not the first time, we find the previous snapshot point and add
> > reference to its clusters
> >

Re: [Qemu-block] [Qemu-devel] Some question about savem/qcow2 incremental snapshot

2018-06-08 Thread Pankaj Gupta


Hi Junyan,

AFAICU you are trying to utilize qcow2 capabilities to do incremental
snapshot. As I understand NVDIMM device (being it real or emulated), its
contents are always be backed up in backing device.  

Now, the question comes to take a snapshot at some point in time. You are 
trying to achieve this with qcow2 format (not checked code yet), I have below 
queries:

- Are you implementing this feature for both actual DAX device pass-through 
  as well as emulated DAX?
- Are you using additional qcow2 disk for storing/taking snapshots? How we are 
  planning to use this feature?

Reason I asked this question is if we concentrate on integrating qcow2
with DAX, we will have a full fledged solution for most of the use-cases. 

Thanks,
Pankaj 

> 
> Dear all:
> 
> I just switched from graphic/media field to virtualization at the end of the
> last year,
> so I am sorry that though I have already try my best but I still feel a
> little dizzy
> about your previous discussion about NVDimm via block layer:)
> In today's qemu, we use the SaveVMHandlers functions to handle both snapshot
> and migration.
> So for nvdimm kind memory, its migration and snapshot use the same way as the
> ram(savevm_ram_handlers). But the difference is the size of nvdimm may be
> huge, and the load
> and store speed is slower. According to my usage, when I use 256G nvdimm as
> memory backend,
> it may take more than 5 minutes to complete one snapshot saving, and after
> saving the qcow2
> image is bigger than 50G. For migration, this may not be a problem because we
> do not need
> extra disk space and the guest is not paused when in migration process. But
> for snapshot,
> we need to pause the VM and the user experience is bad, and we got concerns
> about that.
> I posted this question in Jan this year but failed to get enough reply. Then
> I sent a RFC patch
> set in Mar, basic idea is using the dependency snapshot and dirty log trace
> in kernel to
> optimize this.
> 
> https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg04530.html
> 
> I use the simple way to handle this,
> 1. Separate the nvdimm region from ram when do snapshot.
> 2. If the first time, we dump all the nvdimm data the same as ram, and enable
> dirty log trace
> for nvdimm kind region.
> 3. If not the first time, we find the previous snapshot point and add
> reference to its clusters
> which is used to store nvdimm data. And this time, we just save dirty page
> bitmap and dirty pages.
> Because the previous nvdimm data clusters is ref added, we do not need to
> worry about its deleting.
> 
> I encounter a lot of problems:
> 1. Migration and snapshot logic is mixed and need to separate them for
> nvdimm.
> 2. Cluster has its alignment. When do snapshot, we just save data to disk
> continuous. Because we
> need to add ref to cluster, we really need to consider the alignment. I just
> use a little trick way
> to padding some data to alignment now, and I think it is not a good way.
> 3. Dirty log trace may have some performance problem.
> 
> In theory, this manner can be used to handle all kind of huge memory
> snapshot, we need to find the
> balance between guest performance(Because of dirty log trace) and snapshot
> saving time.
> 
> Thanks
> Junyan
> 
> 
> -Original Message-
> From: Stefan Hajnoczi [mailto:stefa...@redhat.com]
> Sent: Thursday, May 31, 2018 6:49 PM
> To: Kevin Wolf 
> Cc: Max Reitz ; He, Junyan ; Pankaj
> Gupta ; qemu-de...@nongnu.org; qemu block
> 
> Subject: Re: [Qemu-block] [Qemu-devel] Some question about savem/qcow2
> incremental snapshot
> 
> On Wed, May 30, 2018 at 06:07:19PM +0200, Kevin Wolf wrote:
> > Am 30.05.2018 um 16:44 hat Stefan Hajnoczi geschrieben:
> > > On Mon, May 14, 2018 at 02:48:47PM +0100, Stefan Hajnoczi wrote:
> > > > On Fri, May 11, 2018 at 07:25:31PM +0200, Kevin Wolf wrote:
> > > > > Am 10.05.2018 um 10:26 hat Stefan Hajnoczi geschrieben:
> > > > > > On Wed, May 09, 2018 at 07:54:31PM +0200, Max Reitz wrote:
> > > > > > > On 2018-05-09 12:16, Stefan Hajnoczi wrote:
> > > > > > > > On Tue, May 08, 2018 at 05:03:09PM +0200, Kevin Wolf wrote:
> > > > > > > >> Am 08.05.2018 um 16:41 hat Eric Blake geschrieben:
> > > > > > > >>> On 12/25/2017 01:33 AM, He Junyan wrote:
> > > > > > > >> I think it makes sense to invest some effort into such
> > > > > > > >> interfaces, but be prepared for a long journey.
> > > > > > > > 
> > > > > > > > I like the suggestion but it needs to be followed up with
> > > > > > > >

Re: [Qemu-block] [Qemu-devel] [PATCH] block, migration: Use qemu_madvise inplace of madvise

2017-02-17 Thread Pankaj Gupta

> 
> * Pankaj Gupta (pagu...@redhat.com) wrote:
> > 
> > Thanks for your comments. I have below query.
> > > 
> > > On Fri 17 Feb 2017 09:06:04 AM CET, Pankaj Gupta wrote:
> > > >  To maintain consistency at all the places use qemu_madvise wrapper
> > > >  inplace of madvise call.
> > > 
> > > >  if (length > 0) {
> > > > -madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
> > > > +qemu_madvise((uint8_t *) t + offset, length,
> > > > QEMU_MADV_DONTNEED);
> > > 
> > > This was changed two months ago from qemu_madvise() to madvise(), is
> > > there any reason why you want to revert that change? Those two calls are
> > > not equivalent, please see commit 2f2c8d6b371cfc6689affb0b7e for an
> > > explanation.
> > > 
> > > > -if (madvise(start, length, MADV_DONTNEED)) {
> > > > +if (qemu_madvise(start, length, QEMU_MADV_DONTNEED)) {
> > > >  error_report("%s MADV_DONTNEED: %s", __func__,
> > > >  strerror(errno));
> > 
> > I checked history of only change related to 'postcopy'.
> > 
> > For my linux machine:
> > 
> > ./config-host.mak
> > 
> > CONFIG_MADVISE=y
> > CONFIG_POSIX_MADVISE=y
> > 
> > As both these options are set for Linux, every time we call call
> > 'qemu_madvise' ==>"madvise(addr, len, advice);" will
> > be compiled/called. I don't understand why '2f2c8d6b371cfc6689affb0b7e'
> > explicitly changed for :"#ifdef CONFIG_LINUX"
> > I think its better to write generic function maybe in a wrapper then to
> > conditionally set something at different places.
> 
> No; the problem is that the behaviours are different.
> You're right that the current build on Linux defines MADVISE and thus we are
> safe because qemu_madvise
> takes teh CONFIG_MADVISE/madvise route - but we need to be explicit that it's
> only
> the madvise() route that's safe, not any of the calls implemented by
> qemu_madvise, because if in the future someone was to rearrange qemu_madvise
> to prefer posix_madvise postcopy would break in a very subtle way.

Agree. 
We can add comment explaining this?

> 
> IMHO it might even be better to remove the definition of QEMU_MADV_DONTNEED
> altogether
> and make a name that wasn't ambiguous between the two, since the posix
> definition is
> so different.

I think 'posix_madvise' was added for systems which didnot have 'madvise'.
If I look at makefile, first we check what all calls are available and then 
set config option accordingly. We give 'madvise' precedence over 
'posix_madvise' 
if both are present. 

For the systems which don't have madvise call 'posix_madvise' is called which 
as per
discussion is not right thing for 'DONTNEED' option. It will not give desired 
results.

Either we have to find right alternative or else it is already broken for 
systems which
don't support madvise.

> 
> Dave
> 
> > int qemu_madvise(void *addr, size_t len, int advice)
> > {
> > if (advice == QEMU_MADV_INVALID) {
> > errno = EINVAL;
> > return -1;
> > }
> > #if defined(CONFIG_MADVISE)
> > return madvise(addr, len, advice);
> > #elif defined(CONFIG_POSIX_MADVISE)
> > return posix_madvise(addr, len, advice);
> > #else
> > errno = EINVAL;
> > return -1;
> > #endif
> > }
> > 
> > > 
> > > And this is the same case.
> > > 
> > > Berto
> > > 
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
> 
> 



Re: [Qemu-block] [PATCH] block, migration: Use qemu_madvise inplace of madvise

2017-02-17 Thread Pankaj Gupta

Thanks for your comments. I have below query.
> 
> On Fri 17 Feb 2017 09:06:04 AM CET, Pankaj Gupta wrote:
> >  To maintain consistency at all the places use qemu_madvise wrapper
> >  inplace of madvise call.
> 
> >  if (length > 0) {
> > -madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
> > +qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
> 
> This was changed two months ago from qemu_madvise() to madvise(), is
> there any reason why you want to revert that change? Those two calls are
> not equivalent, please see commit 2f2c8d6b371cfc6689affb0b7e for an
> explanation.
> 
> > -if (madvise(start, length, MADV_DONTNEED)) {
> > +if (qemu_madvise(start, length, QEMU_MADV_DONTNEED)) {
> >  error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));

I checked history of only change related to 'postcopy'.

For my linux machine:

./config-host.mak

CONFIG_MADVISE=y
CONFIG_POSIX_MADVISE=y

As both these options are set for Linux, every time we call call 'qemu_madvise' 
==>"madvise(addr, len, advice);" will 
be compiled/called. I don't understand why '2f2c8d6b371cfc6689affb0b7e' 
explicitly changed for :"#ifdef CONFIG_LINUX"
I think its better to write generic function maybe in a wrapper then to 
conditionally set something at different places.

int qemu_madvise(void *addr, size_t len, int advice)
{
if (advice == QEMU_MADV_INVALID) {
errno = EINVAL;
return -1;
}
#if defined(CONFIG_MADVISE)
return madvise(addr, len, advice);
#elif defined(CONFIG_POSIX_MADVISE)
return posix_madvise(addr, len, advice);
#else
errno = EINVAL;
return -1;
#endif
}

> 
> And this is the same case.
> 
> Berto
> 



[Qemu-block] [PATCH] block, migration: Use qemu_madvise inplace of madvise

2017-02-17 Thread Pankaj Gupta
 To maintain consistency at all the places use qemu_madvise wrapper
 inplace of madvise call.

Signed-off-by: Pankaj Gupta <pagu...@redhat.com>
---
 block/qcow2-cache.c  | 2 +-
 migration/postcopy-ram.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 1d25147..4991ca5 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -74,7 +74,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, 
Qcow2Cache *c,
 size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
 size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
 if (length > 0) {
-madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
+qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
 }
 #endif
 }
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a40dddb..558fec1 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -213,7 +213,7 @@ int postcopy_ram_discard_range(MigrationIncomingState *mis, 
uint8_t *start,
size_t length)
 {
 trace_postcopy_ram_discard_range(start, length);
-if (madvise(start, length, MADV_DONTNEED)) {
+if (qemu_madvise(start, length, QEMU_MADV_DONTNEED)) {
 error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
 return -1;
 }
-- 
2.7.4