Re: [PATCH 00/11] Fixes for clang-13 plus tcg/ppc

2021-08-13 Thread Brad Smith

On 7/12/2021 5:55 PM, Richard Henderson wrote:

The goal here was to address Brad's report for clang vs ppc32.

Somewhere in between here and there I forgot about the ppc32 part,
needed a newer clang for gcc135, accidentally built master instead
of the clang-12 release branch, fixed a bunch of buggy looking
things, and only then remembered I was building ppc64 and wasn't
going to test what I thought I would.

So: Brad, could you double-check this fixes your problem?


Yes, this does. Thank you.


Others: Only patch 7 obviously should have been using the
variable indicated as unused.  But please double-check.


r~


Cc: Alex Bennée 
Cc: Brad Smith 
Cc: David Gibson 
Cc: Eric Blake 
Cc: Gerd Hoffmann 
Cc: Greg Kurz 
Cc: Jason Wang 
Cc: Laurent Vivier 
Cc: qemu-block@nongnu.org
Cc: qemu-...@nongnu.org
Cc: Vladimir Sementsov-Ogievskiy 


Richard Henderson (11):
   nbd/server: Remove unused variable
   accel/tcg: Remove unused variable in cpu_exec
   util/selfmap: Discard mapping on error
   net/checksum: Remove unused variable in net_checksum_add_iov
   hw/audio/adlib: Remove unused variable in adlib_callback
   hw/ppc/spapr_events: Remove unused variable from check_exception
   hw/pci-hist/pnv_phb4: Fix typo in pnv_phb4_ioda_write
   linux-user/syscall: Remove unused variable from execve
   tests/unit: Remove unused variable from test_io
   tcg/ppc: Replace TCG_TARGET_CALL_DARWIN with _CALL_DARWIN
   tcg/ppc: Ensure _CALL_SYSV is set for 32-bit ELF

  accel/tcg/cpu-exec.c |  3 ---
  hw/audio/adlib.c |  3 +--
  hw/pci-host/pnv_phb4.c   |  2 +-
  hw/ppc/spapr_events.c|  5 -
  linux-user/syscall.c |  3 ---
  nbd/server.c |  4 
  net/checksum.c   |  4 +---
  tests/unit/test-iov.c|  5 +
  util/selfmap.c   | 28 
  tcg/ppc/tcg-target.c.inc | 25 -
  10 files changed, 40 insertions(+), 42 deletions(-)





[PATCH] qemu-nbd: Change default cache mode to writeback

2021-08-13 Thread Nir Soffer
Both qemu and qemu-img use writeback cache mode by default, which is
already documented in qemu(1). qemu-nbd uses writethrough cache mode by
default, and the default cache mode is not documented.

According to the qemu-nbd(8):

   --cache=CACHE
  The  cache  mode  to be used with the file.  See the
  documentation of the emulator's -drive cache=... option for
  allowed values.

qemu(1) says:

The default mode is cache=writeback.

So users have no reason to assume that qemu-nbd is using writethough
cache mode. The only hint is the painfully slow writing when using the
defaults.

Looking in git history, it seems that qemu used writethrough in the past
to support broken guests that did not flush data properly, or could not
flush due to limitations in qemu. But qemu-nbd clients can use
NBD_CMD_FLUSH to flush data, so using writethrough does not help anyone.

Change the default cache mode to writback, and document the default and
available values properly in the online help and manual.

With this change converting image via qemu-nbd is 3.5 times faster.

$ qemu-img create dst.img 50g
$ qemu-nbd -t -f raw -k /tmp/nbd.sock dst.img

Before this change:

$ hyperfine -r3 "./qemu-img convert -p -f raw -O raw -T none -W 
fedora34.img nbd+unix:///?socket=/tmp/nbd.sock"
Benchmark #1: ./qemu-img convert -p -f raw -O raw -T none -W fedora34.img 
nbd+unix:///?socket=/tmp/nbd.sock
  Time (mean ± σ): 83.639 s ±  5.970 s[User: 2.733 s, System: 6.112 
s]
  Range (min … max):   76.749 s … 87.245 s3 runs

After this change:

$ hyperfine -r3 "./qemu-img convert -p -f raw -O raw -T none -W 
fedora34.img nbd+unix:///?socket=/tmp/nbd.sock"
Benchmark #1: ./qemu-img convert -p -f raw -O raw -T none -W fedora34.img 
nbd+unix:///?socket=/tmp/nbd.sock
  Time (mean ± σ): 23.522 s ±  0.433 s[User: 2.083 s, System: 5.475 
s]
  Range (min … max):   23.234 s … 24.019 s3 runs

Users can avoid the issue by using --cache=writeback[1] but the defaults
should give good performance for the common use case.

[1] https://bugzilla.redhat.com/1990656

Signed-off-by: Nir Soffer 
---
 docs/tools/qemu-nbd.rst | 6 --
 qemu-nbd.c  | 6 --
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
index ee862fa0bc..5643da26e9 100644
--- a/docs/tools/qemu-nbd.rst
+++ b/docs/tools/qemu-nbd.rst
@@ -98,8 +98,10 @@ driver options if ``--image-opts`` is specified.
 
 .. option:: --cache=CACHE
 
-  The cache mode to be used with the file.  See the documentation of
-  the emulator's ``-drive cache=...`` option for allowed values.
+  The cache mode to be used with the file. Valid values are:
+  ``none``, ``writeback`` (the default), ``writethrough``,
+  ``directsync`` and ``unsafe``. See the documentation of
+  the emulator's ``-drive cache=...`` option for more info.
 
 .. option:: -n, --nocache
 
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 26ffbf15af..6c18fcd19a 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -135,7 +135,9 @@ static void usage(const char *name)
 "'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
 "'[ID_OR_NAME]'\n"
 "  -n, --nocache disable host cache\n"
-"  --cache=MODE  set cache mode (none, writeback, ...)\n"
+"  --cache=MODE  set cache mode used to access the disk image, 
the\n"
+"valid options are: 'none', 'writeback' 
(default),\n"
+"'writethrough', 'directsync' and 'unsafe'\n"
 "  --aio=MODEset AIO mode (native, io_uring or threads)\n"
 "  --discard=MODEset discard mode (ignore, unmap)\n"
 "  --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
@@ -552,7 +554,7 @@ int main(int argc, char **argv)
 bool alloc_depth = false;
 const char *tlscredsid = NULL;
 bool imageOpts = false;
-bool writethrough = true;
+bool writethrough = false; /* Client will flush as needed. */
 bool fork_process = false;
 bool list = false;
 int old_stderr = -1;
-- 
2.31.1




Re: [RFC PATCH v1] Adding Support for namespace management

2021-08-13 Thread Keith Busch
On Fri, Aug 13, 2021 at 03:02:22PM +0530, Naveen wrote:
> +static uint16_t nvme_identify_ns_common(NvmeCtrl *n, NvmeRequest *req)
> +{
> +NvmeIdNs id_ns = {};
> +
> +id_ns.nsfeat |= (0x4 | 0x10);
> +id_ns.dpc = 0x1f;
> +
> +NvmeLBAF lbaf[16] = {
> +[0] = {.ds = 9},
> +[1] = {.ds = 9, .ms = 8},
> +[2] = {.ds = 9, .ms = 16},
> +[3] = {.ds = 9, .ms = 64},
> +[4] = {.ds = 12},
> +[5] = {.ds = 12, .ms = 8},
> +[6] = {.ds = 12, .ms = 16},
> +[7] = {.ds = 12, .ms = 64},
> +};

Since the lbaf is a copy of what's defined in nvme_ns_init, so should be
defined for reuse.

> +
> +memcpy(_ns.lbaf, , sizeof(lbaf));
> +id_ns.nlbaf = 7;

The identify structure should be what's in common with all namespaces,
and this doesn't look complete. Just off the top of my head, missing
fields include MC and DLFEAT.

> +
> +return nvme_c2h(n, (uint8_t *)_ns, sizeof(NvmeIdNs), req);
> +}
> +
>  static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
>  {
>  NvmeNamespace *ns;
> @@ -4453,8 +4478,10 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
> NvmeRequest *req, bool active)
>  
>  trace_pci_nvme_identify_ns(nsid);
>  
> -if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
> +if (!nvme_nsid_valid(n, nsid)) {
>  return NVME_INVALID_NSID | NVME_DNR;
> +} else if (nsid == NVME_NSID_BROADCAST) {
> +return nvme_identify_ns_common(n, req);
>  }
>  
>  ns = nvme_ns(n, nsid);
> @@ -5184,6 +5211,195 @@ static void nvme_select_iocs_ns(NvmeCtrl *n, 
> NvmeNamespace *ns)
>  }
>  }
>  
> +static int nvme_blk_truncate(BlockBackend *blk, size_t len, Error **errp)
> +{
> +int ret;
> +uint64_t perm, shared_perm;
> +
> +blk_get_perm(blk, , _perm);
> +
> +ret = blk_set_perm(blk, perm | BLK_PERM_RESIZE, shared_perm, errp);
> +if (ret < 0) {
> +return ret;
> +}
> +
> +ret = blk_truncate(blk, len, false, PREALLOC_MODE_OFF, 0, errp);
> +if (ret < 0) {
> +return ret;
> +}
> +
> +ret = blk_set_perm(blk, perm, shared_perm, errp);
> +if (ret < 0) {
> +return ret;
> +}
> +
> +return 0;
> +}
> +
> +static uint32_t nvme_allocate_nsid(NvmeCtrl *n)
> +{
> +uint32_t nsid = 0;
> +for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
> +if (nvme_ns(n, i) || nvme_subsys_ns(n->subsys, i)) {
> +continue;
> +}
> +
> +nsid = i;
> +return nsid;
> +}
> +return nsid;
> +}
> +
> +static uint16_t nvme_namespace_create(NvmeCtrl *n, NvmeRequest *req)
> +{
> +   uint32_t ret;
> +   NvmeIdNs id_ns_host;
> +   NvmeSubsystem *subsys = n->subsys;
> +   Error *err = NULL;
> +   uint8_t flbas_host;
> +   uint64_t ns_size;
> +   int lba_index;
> +   NvmeNamespace *ns;
> +   NvmeCtrl *ctrl;
> +   NvmeIdNs *id_ns;
> +
> +ret = nvme_h2c(n, (uint8_t *)_ns_host, sizeof(id_ns_host), req);
> +if (ret) {
> +return ret;
> +}

Some unusual indentation here.

> +
> +if (id_ns_host.ncap < id_ns_host.nsze) {
> +return NVME_THIN_PROVISION_NO_SUPP | NVME_DNR;
> +} else if (id_ns_host.ncap > id_ns_host.nsze) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +if (!id_ns_host.nsze) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +if (QSLIST_EMPTY(>unallocated_namespaces)) {
> +return NVME_NS_ID_UNAVAILABLE;
> +}
> +
> +ns = QSLIST_FIRST(>unallocated_namespaces);
> +id_ns = >id_ns;
> +flbas_host = (id_ns_host.flbas) & (0xF);
> +
> +if (flbas_host > id_ns->nlbaf) {
> +return NVME_INVALID_FORMAT | NVME_DNR;
> +}
> +
> +ret = nvme_ns_setup(ns, );
> +if (ret) {
> +return ret;
> +}
> +
> +id_ns->flbas = id_ns_host.flbas;
> +id_ns->dps = id_ns_host.dps;
> +id_ns->nmic = id_ns_host.nmic;
> +
> +lba_index = NVME_ID_NS_FLBAS_INDEX(id_ns->flbas);
> +ns_size = id_ns_host.nsze * ((1 << id_ns->lbaf[lba_index].ds) +
> +(id_ns->lbaf[lba_index].ms));
> +id_ns->nvmcap = ns_size;
> +
> +if (ns_size > n->id_ctrl.unvmcap) {
> +return NVME_NS_INSUFF_CAP;
> +}
> +
> +ret = nvme_blk_truncate(ns->blkconf.blk, id_ns->nvmcap, );
> +if (ret) {
> +return ret;
> +}
> +
> +ns->size = blk_getlength(ns->blkconf.blk);
> +nvme_ns_init_format(ns);
> +
> +ns->params.nsid = nvme_allocate_nsid(n);
> +if (!ns->params.nsid) {
> +return NVME_NS_ID_UNAVAILABLE;
> +}
> +subsys->namespaces[ns->params.nsid] = ns;
> +
> +for (int cntlid = 0; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {
> +ctrl = nvme_subsys_ctrl(n->subsys, cntlid);
> +if (ctrl) {
> +ctrl->id_ctrl.unvmcap -= le64_to_cpu(ns->size);
> +}
> +}
> +
> +stl_le_p(>cqe.result, ns->params.nsid);
> +QSLIST_REMOVE_HEAD(>unallocated_namespaces, entry);
> +return NVME_SUCCESS;
> +}
> +
> 

[RFC PATCH v1] Adding Support for namespace management

2021-08-13 Thread Naveen
This patch supports namespace management : create and delete operations.
This patch has been tested with the following command and size of image
file for unallocated namespaces is taken as 0GB. ns_create will look into
the list of unallocated namespaces and it will initialize the same and 
return the nsid of the same. A new mandatory field has been added called
tnvmcap and we have ensured that the total capacity of namespace created
does not exceed tnvmcap

-device nvme-subsys,id=subsys0,tnvmcap=8
-device nvme,serial=foo,id=nvme0,subsys=subsys0
-device nvme,serial=bar,id=nvme1,subsys=subsys0

-drive id=ns1,file=ns1.img,if=none
-device nvme-ns,drive=ns1,bus=nvme0,nsid=1,zoned=false,shared=true
-drive id=ns2,file=ns2.img,if=none
-device nvme-ns,drive=ns2,bus=nvme0,nsid=2,zoned=false,shared=true
-drive id=ns3,file=ns3.img,if=none
-device nvme-ns,drive=ns3,bus=nvme0,nsid=3,zoned=false,shared=true
-drive id=ns4,file=ns4.img,if=none
-device nvme-ns,drive=ns4,bus=nvme0,nsid=4,zoned=false,shared=true

Please review and suggest if any changes are required.

Signed-off-by: Naveen Nagar 
Reviewed-by: Klaus Jensen 
Reviewed-by: Padmakar Kalghatgi 
Reviewed-by: Gollu Appalanaidu 
Reviewed-by: Jaegyu Choi 

---
 hw/nvme/ctrl.c   | 250 +--
 hw/nvme/ns.c |  14 ++-
 hw/nvme/nvme.h   |   6 +-
 hw/nvme/subsys.c |   3 +
 include/block/nvme.h |  18 +++-
 5 files changed, 272 insertions(+), 19 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6baf9e0420..4be23a3092 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -219,6 +219,7 @@ static const uint32_t nvme_cse_acs[256] = {
 [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
 [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
 [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_NS_MANAGEMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
 [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
 [NVME_ADM_CMD_FORMAT_NVM]   = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
 };
@@ -4445,6 +4446,30 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, 
NvmeRequest *req)
 return nvme_c2h(n, id, sizeof(id), req);
 }
 
+static uint16_t nvme_identify_ns_common(NvmeCtrl *n, NvmeRequest *req)
+{
+NvmeIdNs id_ns = {};
+
+id_ns.nsfeat |= (0x4 | 0x10);
+id_ns.dpc = 0x1f;
+
+NvmeLBAF lbaf[16] = {
+[0] = {.ds = 9},
+[1] = {.ds = 9, .ms = 8},
+[2] = {.ds = 9, .ms = 16},
+[3] = {.ds = 9, .ms = 64},
+[4] = {.ds = 12},
+[5] = {.ds = 12, .ms = 8},
+[6] = {.ds = 12, .ms = 16},
+[7] = {.ds = 12, .ms = 64},
+};
+
+memcpy(_ns.lbaf, , sizeof(lbaf));
+id_ns.nlbaf = 7;
+
+return nvme_c2h(n, (uint8_t *)_ns, sizeof(NvmeIdNs), req);
+}
+
 static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
 {
 NvmeNamespace *ns;
@@ -4453,8 +4478,10 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
NvmeRequest *req, bool active)
 
 trace_pci_nvme_identify_ns(nsid);
 
-if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+if (!nvme_nsid_valid(n, nsid)) {
 return NVME_INVALID_NSID | NVME_DNR;
+} else if (nsid == NVME_NSID_BROADCAST) {
+return nvme_identify_ns_common(n, req);
 }
 
 ns = nvme_ns(n, nsid);
@@ -5184,6 +5211,195 @@ static void nvme_select_iocs_ns(NvmeCtrl *n, 
NvmeNamespace *ns)
 }
 }
 
+static int nvme_blk_truncate(BlockBackend *blk, size_t len, Error **errp)
+{
+int ret;
+uint64_t perm, shared_perm;
+
+blk_get_perm(blk, , _perm);
+
+ret = blk_set_perm(blk, perm | BLK_PERM_RESIZE, shared_perm, errp);
+if (ret < 0) {
+return ret;
+}
+
+ret = blk_truncate(blk, len, false, PREALLOC_MODE_OFF, 0, errp);
+if (ret < 0) {
+return ret;
+}
+
+ret = blk_set_perm(blk, perm, shared_perm, errp);
+if (ret < 0) {
+return ret;
+}
+
+return 0;
+}
+
+static uint32_t nvme_allocate_nsid(NvmeCtrl *n)
+{
+uint32_t nsid = 0;
+for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+if (nvme_ns(n, i) || nvme_subsys_ns(n->subsys, i)) {
+continue;
+}
+
+nsid = i;
+return nsid;
+}
+return nsid;
+}
+
+static uint16_t nvme_namespace_create(NvmeCtrl *n, NvmeRequest *req)
+{
+   uint32_t ret;
+   NvmeIdNs id_ns_host;
+   NvmeSubsystem *subsys = n->subsys;
+   Error *err = NULL;
+   uint8_t flbas_host;
+   uint64_t ns_size;
+   int lba_index;
+   NvmeNamespace *ns;
+   NvmeCtrl *ctrl;
+   NvmeIdNs *id_ns;
+
+ret = nvme_h2c(n, (uint8_t *)_ns_host, sizeof(id_ns_host), req);
+if (ret) {
+return ret;
+}
+
+if (id_ns_host.ncap < id_ns_host.nsze) {
+return NVME_THIN_PROVISION_NO_SUPP | NVME_DNR;
+} else if (id_ns_host.ncap > id_ns_host.nsze) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+if (!id_ns_host.nsze) {
+return NVME_INVALID_FIELD | NVME_DNR;
+