Re: [PATCH v8 2/5] hw/block/nvme: pull aio error handling

2020-11-16 Thread Klaus Jensen
On Nov 16 19:18, Klaus Jensen wrote:
> On Nov 16 09:57, Keith Busch wrote:
> > On Thu, Nov 12, 2020 at 08:59:42PM +0100, Klaus Jensen wrote:
> > > +static void nvme_aio_err(NvmeRequest *req, int ret)
> > > +{
> > > +uint16_t status = NVME_SUCCESS;
> > > +Error *local_err = NULL;
> > > +
> > > +switch (req->cmd.opcode) {
> > > +case NVME_CMD_READ:
> > > +status = NVME_UNRECOVERED_READ;
> > > +break;
> > > +case NVME_CMD_FLUSH:
> > > +case NVME_CMD_WRITE:
> > > +case NVME_CMD_WRITE_ZEROES:
> > > +status = NVME_WRITE_FAULT;
> > > +break;
> > > +default:
> > > +status = NVME_INTERNAL_DEV_ERROR;
> > > +break;
> > > +}
> > 
> > Just curious, is there potentially a more appropriate way to set an nvme
> > status based on the value of 'ret'? What is 'ret' representing anyway?
> > Are these errno values?
> > 
> 
> Yes, it's errno values from down below.
> 
> But looking at this more closely, it actually looks like this is where
> we should behave as dictated by the rerror and werror drive options.
> 
> I'll do a follow up patch to fix that.

So, following up on this after looking more into it.

Currently, the device is basically behaving as if werror and rerror were
both set to "report" - that is, report the error to the guest.

Since we currently do not support werror and rerror, I think it is fine
to behave as if it was report and set a meaningful status code that fits
the command that failed (if we can).

But I'll start working on a patch to support rerror/werror, since it
would be nice to support.


signature.asc
Description: PGP signature


[PATCH v5 4/4] hw/block/m25p80: Fix Numonyx fast read dummy cycle count

2020-11-16 Thread Joe Komlodi
Numonyx chips determine the number of cycles to wait based on bits 7:4
in the volatile configuration register.

However, if these bits are 0x0 or 0xF, the number of dummy cycles to
wait is 10 for QIOR and QIOR4 commands or when in QIO mode, and otherwise 8 for
the currently supported fast read commands. [1]

[1]
https://www.micron.com/-/media/client/global/documents/products/data-sheet/nor-flash/serial-nor/mt25q/die-rev-b/mt25q_qlkt_u_02g_cbb_0.pdf?rev=9b167fbf2b3645efba6385949a72e453

Signed-off-by: Joe Komlodi 
Reviewed-by: Francisco Iglesias 
---
 hw/block/m25p80.c | 30 +++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 56bd5bc..a67dc53 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -837,6 +837,30 @@ static uint8_t numonyx_mode(Flash *s)
 }
 }
 
+static uint8_t numonyx_extract_cfg_num_dummies(Flash *s)
+{
+uint8_t num_dummies;
+uint8_t mode;
+assert(get_man(s) == MAN_NUMONYX);
+
+mode = numonyx_mode(s);
+num_dummies = extract32(s->volatile_cfg, 4, 4);
+
+if (num_dummies == 0x0 || num_dummies == 0xf) {
+switch (s->cmd_in_progress) {
+case QIOR:
+case QIOR4:
+num_dummies = 10;
+break;
+default:
+num_dummies = (mode == MODE_QIO) ? 10 : 8;
+break;
+}
+}
+
+return num_dummies;
+}
+
 static void decode_fast_read_cmd(Flash *s)
 {
 s->needed_bytes = get_addr_length(s);
@@ -846,7 +870,7 @@ static void decode_fast_read_cmd(Flash *s)
 s->needed_bytes += 8;
 break;
 case MAN_NUMONYX:
-s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
+s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
 break;
 case MAN_MACRONIX:
 if (extract32(s->volatile_cfg, 6, 2) == 1) {
@@ -885,7 +909,7 @@ static void decode_dio_read_cmd(Flash *s)
 );
 break;
 case MAN_NUMONYX:
-s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
+s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
 break;
 case MAN_MACRONIX:
 switch (extract32(s->volatile_cfg, 6, 2)) {
@@ -925,7 +949,7 @@ static void decode_qio_read_cmd(Flash *s)
 );
 break;
 case MAN_NUMONYX:
-s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
+s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
 break;
 case MAN_MACRONIX:
 switch (extract32(s->volatile_cfg, 6, 2)) {
-- 
2.7.4




[PATCH v5 1/4] hw/block/m25p80: Make Numonyx config field names more accurate

2020-11-16 Thread Joe Komlodi
The previous naming of the configuration registers made it sound like that if
the bits were set the settings would be enabled, while the opposite is true.

Signed-off-by: Joe Komlodi 
Reviewed-by: Francisco Iglesias 
---
 hw/block/m25p80.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 483925f..452d252 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -136,7 +136,7 @@ typedef struct FlashPartInfo {
 #define VCFG_WRAP_SEQUENTIAL 0x2
 #define NVCFG_XIP_MODE_DISABLED (7 << 9)
 #define NVCFG_XIP_MODE_MASK (7 << 9)
-#define VCFG_XIP_MODE_ENABLED (1 << 3)
+#define VCFG_XIP_MODE_DISABLED (1 << 3)
 #define CFG_DUMMY_CLK_LEN 4
 #define NVCFG_DUMMY_CLK_POS 12
 #define VCFG_DUMMY_CLK_POS 4
@@ -144,9 +144,9 @@ typedef struct FlashPartInfo {
 #define EVCFG_VPP_ACCELERATOR (1 << 3)
 #define EVCFG_RESET_HOLD_ENABLED (1 << 4)
 #define NVCFG_DUAL_IO_MASK (1 << 2)
-#define EVCFG_DUAL_IO_ENABLED (1 << 6)
+#define EVCFG_DUAL_IO_DISABLED (1 << 6)
 #define NVCFG_QUAD_IO_MASK (1 << 3)
-#define EVCFG_QUAD_IO_ENABLED (1 << 7)
+#define EVCFG_QUAD_IO_DISABLED (1 << 7)
 #define NVCFG_4BYTE_ADDR_MASK (1 << 0)
 #define NVCFG_LOWER_SEGMENT_MASK (1 << 1)
 
@@ -769,7 +769,7 @@ static void reset_memory(Flash *s)
 s->volatile_cfg |= VCFG_WRAP_SEQUENTIAL;
 if ((s->nonvolatile_cfg & NVCFG_XIP_MODE_MASK)
 != NVCFG_XIP_MODE_DISABLED) {
-s->volatile_cfg |= VCFG_XIP_MODE_ENABLED;
+s->volatile_cfg |= VCFG_XIP_MODE_DISABLED;
 }
 s->volatile_cfg |= deposit32(s->volatile_cfg,
 VCFG_DUMMY_CLK_POS,
@@ -784,10 +784,10 @@ static void reset_memory(Flash *s)
 s->enh_volatile_cfg |= EVCFG_VPP_ACCELERATOR;
 s->enh_volatile_cfg |= EVCFG_RESET_HOLD_ENABLED;
 if (s->nonvolatile_cfg & NVCFG_DUAL_IO_MASK) {
-s->enh_volatile_cfg |= EVCFG_DUAL_IO_ENABLED;
+s->enh_volatile_cfg |= EVCFG_DUAL_IO_DISABLED;
 }
 if (s->nonvolatile_cfg & NVCFG_QUAD_IO_MASK) {
-s->enh_volatile_cfg |= EVCFG_QUAD_IO_ENABLED;
+s->enh_volatile_cfg |= EVCFG_QUAD_IO_DISABLED;
 }
 if (!(s->nonvolatile_cfg & NVCFG_4BYTE_ADDR_MASK)) {
 s->four_bytes_address_mode = true;
-- 
2.7.4




[PATCH v5 2/4] hw/block/m25p80: Fix when VCFG XIP bit is set for Numonyx

2020-11-16 Thread Joe Komlodi
VCFG XIP is set (disabled) when the NVCFG XIP bits are all set (disabled).

Signed-off-by: Joe Komlodi 
Reviewed-by: Francisco Iglesias 
---
 hw/block/m25p80.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 452d252..eb6539f 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -768,7 +768,7 @@ static void reset_memory(Flash *s)
 s->volatile_cfg |= VCFG_DUMMY;
 s->volatile_cfg |= VCFG_WRAP_SEQUENTIAL;
 if ((s->nonvolatile_cfg & NVCFG_XIP_MODE_MASK)
-!= NVCFG_XIP_MODE_DISABLED) {
+== NVCFG_XIP_MODE_DISABLED) {
 s->volatile_cfg |= VCFG_XIP_MODE_DISABLED;
 }
 s->volatile_cfg |= deposit32(s->volatile_cfg,
-- 
2.7.4




[PATCH v5 0/4] hw/block/m25p80: Numonyx: Fix dummy cycles and check for SPI mode on cmds

2020-11-16 Thread Joe Komlodi
Changelog:
v4 -> v5
 - 3/4: Simplify logic when changing state and checking mode.
 - 3/4: numonyx_get_mode -> numonyx_mode
 - 4/4: Reword commit message to include QIO mode.

v3 -> v4
 - 1/4: Patch changed to change names of register fields to be more accurate.
 - 1/4: Revert polarity change from v3.
 - 2/4: Added, fixes polarity of VCFG XIP mode when copied from NVCFG.
 - 3/4: Removed check_cmd_mode function, each command check is done in 
decode_new_cmd instead.
 - 3/4: Add guest error print if JEDEC read is executed in QIO or DIO mode.
 - 3/4: Don't check PP and PP4, they work regardless of mode. PP4_4 is left as 
is.
 - 3/4: Simplify get_mode function.
 - 4/4: Simplify extract_cfg_num_dummies function.
 - 4/4: Use switch statement instead of table for cycle retrieving.

v2 -> v3
 - 1/3: Added, Fixes NVCFG polarity for DIO/QIO.
 - 2/3: Added, Checks if we can execute the current command in standard/DIO/QIO 
mode.
 - 3/3: Was 1/1 in v2.  Added cycle counts for DIO/QIO mode.

v1 -> v2
 - 1/2: Change function name to be more accurate
 - 2/2: Dropped

Hi all,

The series fixes the behavior of the dummy cycle register for Numonyx flashes so
it's closer to how hardware behaves.
It also checks if a command can be executed in the current SPI mode
(standard, DIO, or QIO) before extracting dummy cycles for the command.

On hardware, the dummy cycles for fast read commands are set to a specific value
(8 or 10) if the register is all 0s or 1s.
If the register value isn't all 0s or 1s, then the flash expects the amount of
cycles sent to be equal to the count in the register.

Thanks!
Joe

Joe Komlodi (4):
  hw/block/m25p80: Make Numonyx config field names more accurate
  hw/block/m25p80: Fix when VCFG XIP bit is set for Numonyx
  hw/block/m25p80: Check SPI mode before running some Numonyx commands
  hw/block/m25p80: Fix Numonyx fast read dummy cycle count

 hw/block/m25p80.c | 158 --
 1 file changed, 129 insertions(+), 29 deletions(-)

-- 
2.7.4




[PATCH v5 3/4] hw/block/m25p80: Check SPI mode before running some Numonyx commands

2020-11-16 Thread Joe Komlodi
Some Numonyx flash commands cannot be executed in DIO and QIO mode, such as
trying to do DPP or DOR when in QIO mode.

Signed-off-by: Joe Komlodi 
---
 hw/block/m25p80.c | 114 +-
 1 file changed, 95 insertions(+), 19 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index eb6539f..56bd5bc 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -413,6 +413,12 @@ typedef enum {
 MAN_GENERIC,
 } Manufacturer;
 
+typedef enum {
+MODE_STD = 0,
+MODE_DIO = 1,
+MODE_QIO = 2
+} SPIMode;
+
 #define M25P80_INTERNAL_DATA_BUFFER_SZ 16
 
 struct Flash {
@@ -820,6 +826,17 @@ static void reset_memory(Flash *s)
 trace_m25p80_reset_done(s);
 }
 
+static uint8_t numonyx_mode(Flash *s)
+{
+if (!(s->enh_volatile_cfg & EVCFG_QUAD_IO_DISABLED)) {
+return MODE_QIO;
+} else if (!(s->enh_volatile_cfg & EVCFG_DUAL_IO_DISABLED)) {
+return MODE_DIO;
+} else {
+return MODE_STD;
+}
+}
+
 static void decode_fast_read_cmd(Flash *s)
 {
 s->needed_bytes = get_addr_length(s);
@@ -950,14 +967,8 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 case ERASE4_32K:
 case ERASE_SECTOR:
 case ERASE4_SECTOR:
-case READ:
-case READ4:
-case DPP:
-case QPP:
-case QPP_4:
 case PP:
 case PP4:
-case PP4_4:
 case DIE_ERASE:
 case RDID_90:
 case RDID_AB:
@@ -966,24 +977,84 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 s->len = 0;
 s->state = STATE_COLLECTING_DATA;
 break;
+case READ:
+case READ4:
+if (get_man(s) != MAN_NUMONYX || numonyx_mode(s) == MODE_STD) {
+s->needed_bytes = get_addr_length(s);
+s->pos = 0;
+s->len = 0;
+s->state = STATE_COLLECTING_DATA;
+} else {
+qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Cannot execute cmd %x in "
+  "DIO or QIO mode\n", s->cmd_in_progress);
+}
+break;
+case DPP:
+if (get_man(s) != MAN_NUMONYX || numonyx_mode(s) != MODE_QIO) {
+s->needed_bytes = get_addr_length(s);
+s->pos = 0;
+s->len = 0;
+s->state = STATE_COLLECTING_DATA;
+} else {
+qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Cannot execute cmd %x in "
+  "QIO mode\n", s->cmd_in_progress);
+}
+break;
+case QPP:
+case QPP_4:
+case PP4_4:
+if (get_man(s) != MAN_NUMONYX || numonyx_mode(s) != MODE_DIO) {
+s->needed_bytes = get_addr_length(s);
+s->pos = 0;
+s->len = 0;
+s->state = STATE_COLLECTING_DATA;
+} else {
+qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Cannot execute cmd %x in "
+  "DIO mode\n", s->cmd_in_progress);
+}
+break;
 
 case FAST_READ:
 case FAST_READ4:
+decode_fast_read_cmd(s);
+break;
 case DOR:
 case DOR4:
+if (get_man(s) != MAN_NUMONYX || numonyx_mode(s) != MODE_QIO) {
+decode_fast_read_cmd(s);
+} else {
+qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Cannot execute cmd %x in "
+  "QIO mode\n", s->cmd_in_progress);
+}
+break;
 case QOR:
 case QOR4:
-decode_fast_read_cmd(s);
+if (get_man(s) != MAN_NUMONYX || numonyx_mode(s) != MODE_DIO) {
+decode_fast_read_cmd(s);
+} else {
+qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Cannot execute cmd %x in "
+  "DIO mode\n", s->cmd_in_progress);
+}
 break;
 
 case DIOR:
 case DIOR4:
-decode_dio_read_cmd(s);
+if (get_man(s) != MAN_NUMONYX || numonyx_mode(s) != MODE_QIO) {
+decode_dio_read_cmd(s);
+} else {
+qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Cannot execute cmd %x in "
+  "QIO mode\n", s->cmd_in_progress);
+}
 break;
 
 case QIOR:
 case QIOR4:
-decode_qio_read_cmd(s);
+if (get_man(s) != MAN_NUMONYX || numonyx_mode(s) != MODE_DIO) {
+decode_qio_read_cmd(s);
+} else {
+qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Cannot execute cmd %x in "
+  "DIO mode\n", s->cmd_in_progress);
+}
 break;
 
 case WRSR:
@@ -1035,17 +1106,22 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 break;
 
 case JEDEC_READ:
-trace_m25p80_populated_jedec(s);
-for (i = 0; i < s->pi->id_len; i++) {
-s->data[i] = s->pi->id[i];
-}
-for (; i < SPI_NOR_MAX_ID_LEN; i++) {
-s->data[i] = 0;
-}
+if (get_man(s) != MAN_NUMONYX || numonyx_mode(s) == MODE_STD) {
+trace_m25p80_populated_jedec(s);
+for (i = 0; i < s->pi->id_len; i++) {
+s->data[i] = s->pi->id[i];
+   

[PULL 1/2] nbd: Silence Coverity false positive

2020-11-16 Thread Eric Blake
Coverity noticed (CID 1436125) that we check the return value of
nbd_extent_array_add in most places, but not at the end of
bitmap_to_extents().  The return value exists to break loops before a
future iteration, so there is nothing to check if we are already done
iterating.  Adding a cast to void, plus a comment why, pacifies
Coverity.

Signed-off-by: Eric Blake 
Message-Id: <2020163510.713855-1-ebl...@redhat.com>
[eblake: Prefer cast to void over odd && usage]
Reviewed-by: Richard Henderson 
---
 nbd/server.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index d145e1a69083..613ed2634ada 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -2129,8 +2129,8 @@ static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
 }

 if (!full) {
-/* last non dirty extent */
-nbd_extent_array_add(es, end - start, 0);
+/* last non dirty extent, nothing to do if array is now full */
+(void) nbd_extent_array_add(es, end - start, 0);
 }

 bdrv_dirty_bitmap_unlock(bitmap);
-- 
2.28.0




[PULL 2/2] iotests: Replace deprecated ConfigParser.readfp()

2020-11-16 Thread Eric Blake
From: Kevin Wolf 

iotest 277 fails on Fedora 33 (Python 3.9) because a deprecation warning
changes the output:

nbd-fault-injector.py:230: DeprecationWarning: This method will be
removed in future versions.  Use 'parser.read_file()' instead.

In fact, readfp() has already been deprecated in Python 3.2 and the
replacement has existed since the same version, so we can now
unconditionally switch to read_file().

Signed-off-by: Kevin Wolf 
Message-Id: <20201113100602.15936-1-kw...@redhat.com>
Reviewed-by: Eric Blake 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/nbd-fault-injector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/nbd-fault-injector.py 
b/tests/qemu-iotests/nbd-fault-injector.py
index 78f42c421432..6e11ef89b8b3 100755
--- a/tests/qemu-iotests/nbd-fault-injector.py
+++ b/tests/qemu-iotests/nbd-fault-injector.py
@@ -227,7 +227,7 @@ def parse_config(config):
 def load_rules(filename):
 config = configparser.RawConfigParser()
 with open(filename, 'rt') as f:
-config.readfp(f, filename)
+config.read_file(f, filename)
 return parse_config(config)

 def open_socket(path):
-- 
2.28.0




RE: [PATCH v4 3/4] hw/block/m25p80: Check SPI mode before running some Numonyx commands

2020-11-16 Thread Joe Komlodi
Hi Francisco,

-Original Message-
From: Francisco Iglesias  
Sent: Monday, November 16, 2020 7:59 AM
To: Joe Komlodi 
Cc: qemu-de...@nongnu.org; philippe.mathieu.da...@gmail.com; Francisco Eduardo 
Iglesias ; alist...@alistair23.me; qemu-block@nongnu.org; 
mre...@redhat.com
Subject: Re: [PATCH v4 3/4] hw/block/m25p80: Check SPI mode before running some 
Numonyx commands

Hi Joe,

On Thu, Nov 12, 2020 at 07:10:54PM -0800, Joe Komlodi wrote:
> Some Numonyx flash commands cannot be executed in DIO and QIO mode, 
> such as trying to do DPP or DOR when in QIO mode.
> 
> Signed-off-by: Joe Komlodi 
> ---
>  hw/block/m25p80.c | 134 
> +-
>  1 file changed, 112 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 
> eb6539f..2552f2c 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -413,6 +413,12 @@ typedef enum {
>  MAN_GENERIC,
>  } Manufacturer;
>  
> +typedef enum {
> +MODE_STD = 0,
> +MODE_DIO = 1,
> +MODE_QIO = 2
> +} SPIMode;
> +
>  #define M25P80_INTERNAL_DATA_BUFFER_SZ 16
>  
>  struct Flash {
> @@ -820,6 +826,17 @@ static void reset_memory(Flash *s)
>  trace_m25p80_reset_done(s);
>  }
>  
> +static uint8_t numonyx_get_mode(Flash *s) {
> +if (!(s->enh_volatile_cfg & EVCFG_QUAD_IO_DISABLED)) {
> +return MODE_QIO;
> +} else if (!(s->enh_volatile_cfg & EVCFG_DUAL_IO_DISABLED)) {
> +return MODE_DIO;
> +} else {
> +return MODE_STD;
> +}
> +}
> +
>  static void decode_fast_read_cmd(Flash *s)  {
>  s->needed_bytes = get_addr_length(s); @@ -827,9 +844,11 @@ static 
> void decode_fast_read_cmd(Flash *s)
>  /* Dummy cycles - modeled with bytes writes instead of bits */
>  case MAN_WINBOND:
>  s->needed_bytes += 8;
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_NUMONYX:
>  s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_MACRONIX:
>  if (extract32(s->volatile_cfg, 6, 2) == 1) { @@ -837,19 
> +856,21 @@ static void decode_fast_read_cmd(Flash *s)
>  } else {
>  s->needed_bytes += 8;
>  }
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_SPANSION:
>  s->needed_bytes += extract32(s->spansion_cr2v,
>  SPANSION_DUMMY_CLK_POS,
>  SPANSION_DUMMY_CLK_LEN
>  );
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  default:
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  }
>  s->pos = 0;
>  s->len = 0;
> -s->state = STATE_COLLECTING_DATA;

Above change in this function and the similar ones in below two functions don't 
seem to be needed anymore (s->state = STATE_COLLECTING_DATA is being done in 
all cases).
[Joe] Oops, I'll simplify that.

>  }
>  
>  static void decode_dio_read_cmd(Flash *s) @@ -859,6 +880,7 @@ static 
> void decode_dio_read_cmd(Flash *s)
>  switch (get_man(s)) {
>  case MAN_WINBOND:
>  s->needed_bytes += WINBOND_CONTINUOUS_READ_MODE_CMD_LEN;
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_SPANSION:
>  s->needed_bytes += SPANSION_CONTINUOUS_READ_MODE_CMD_LEN;
> @@ -866,9 +888,11 @@ static void decode_dio_read_cmd(Flash *s)
>  SPANSION_DUMMY_CLK_POS,
>  SPANSION_DUMMY_CLK_LEN
>  );
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_NUMONYX:
>  s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_MACRONIX:
>  switch (extract32(s->volatile_cfg, 6, 2)) { @@ -882,13 
> +906,14 @@ static void decode_dio_read_cmd(Flash *s)
>  s->needed_bytes += 4;
>  break;
>  }
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  default:
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  }
>  s->pos = 0;
>  s->len = 0;
> -s->state = STATE_COLLECTING_DATA;
>  }
>  
>  static void decode_qio_read_cmd(Flash *s) @@ -899,6 +924,7 @@ static 
> void decode_qio_read_cmd(Flash *s)
>  case MAN_WINBOND:
>  s->needed_bytes += WINBOND_CONTINUOUS_READ_MODE_CMD_LEN;
>  s->needed_bytes += 4;
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_SPANSION:
>  s->needed_bytes += SPANSION_CONTINUOUS_READ_MODE_CMD_LEN;
> @@ -906,9 +932,11 @@ static void decode_qio_read_cmd(Flash *s)
>  SPANSION_DUMMY_CLK_POS,
>  SPANSION_DUMMY_CLK_LEN
>  );
> +s->state = STATE_COLLECTING_DATA;
>  

Re: [PATCH v8 2/5] hw/block/nvme: pull aio error handling

2020-11-16 Thread Klaus Jensen
On Nov 16 09:57, Keith Busch wrote:
> On Thu, Nov 12, 2020 at 08:59:42PM +0100, Klaus Jensen wrote:
> > +static void nvme_aio_err(NvmeRequest *req, int ret)
> > +{
> > +uint16_t status = NVME_SUCCESS;
> > +Error *local_err = NULL;
> > +
> > +switch (req->cmd.opcode) {
> > +case NVME_CMD_READ:
> > +status = NVME_UNRECOVERED_READ;
> > +break;
> > +case NVME_CMD_FLUSH:
> > +case NVME_CMD_WRITE:
> > +case NVME_CMD_WRITE_ZEROES:
> > +status = NVME_WRITE_FAULT;
> > +break;
> > +default:
> > +status = NVME_INTERNAL_DEV_ERROR;
> > +break;
> > +}
> 
> Just curious, is there potentially a more appropriate way to set an nvme
> status based on the value of 'ret'? What is 'ret' representing anyway?
> Are these errno values?
> 

Yes, it's errno values from down below.

But looking at this more closely, it actually looks like this is where
we should behave as dictated by the rerror and werror drive options.

I'll do a follow up patch to fix that.


signature.asc
Description: PGP signature


Re: [PATCH v8 5/5] hw/block/nvme: add the dataset management command

2020-11-16 Thread Keith Busch
On Thu, Nov 12, 2020 at 08:59:45PM +0100, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add support for the Dataset Management command and the Deallocate
> attribute. Deallocation results in discards being sent to the underlying
> block device. Whether of not the blocks are actually deallocated is
> affected by the same factors as Write Zeroes (see previous commit).
> 
>  format | discard | dsm (512B)  dsm (4KiB)  dsm (64KiB)
> 
>   qcow2ignore   n   n   n
>   qcow2unmapn   n   y
>   raw  ignore   n   n   n
>   raw  unmapn   y   y
> 
> Again, a raw format and 4KiB LBAs are preferable.
> 
> In order to set the Namespace Preferred Deallocate Granularity and
> Alignment fields (NPDG and NPDA), choose a sane minimum discard
> granularity of 4KiB. If we are using a passthru device supporting
> discard at a 512B granularity, user should set the discard_granularity
> property explicitly. NPDG and NPDA will also account for the
> cluster_size of the block driver if required (i.e. for QCOW2).
> 
> See NVM Express 1.3d, Section 6.7 ("Dataset Management command").

Looks good to me.

Reviewed-by: Keith Busch 



Re: [PATCH v8 3/5] hw/block/nvme: add dulbe support

2020-11-16 Thread Keith Busch
On Thu, Nov 12, 2020 at 08:59:43PM +0100, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add support for reporting the Deallocated or Unwritten Logical Block
> Error (DULBE).
> 
> Rely on the block status flags reported by the block layer and consider
> any block with the BDRV_BLOCK_ZERO flag to be deallocated.
> 
> Multiple factors affect when a Write Zeroes command result in
> deallocation of blocks.
> 
>   * the underlying file system block size
>   * the blockdev format
>   * the 'discard' and 'logical_block_size' parameters
> 
>  format | discard | wz (512B)  wz (4KiB)  wz (64KiB)
> -
>   qcow2ignore   n  n  y
>   qcow2unmapn  n  y
>   raw  ignore   n  y  y
>   raw  unmapn  y  y
> 
> So, this works best with an image in raw format and 4KiB LBAs, since
> holes can then be punched on a per-block basis (this assumes a file
> system with a 4kb block size, YMMV). A qcow2 image, uses a cluster size
> of 64KiB by default and blocks will only be marked deallocated if a full
> cluster is zeroed or discarded. However, this *is* consistent with the
> spec since Write Zeroes "should" deallocate the block if the Deallocate
> attribute is set and "may" deallocate if the Deallocate attribute is not
> set. Thus, we always try to deallocate (the BDRV_REQ_MAY_UNMAP flag is
> always set).

These all sound like reasonable constraints and consistent with the
spec.

Reviewed-by: Keith Busch 



Re: iotest 030 still occasionally intermittently failing

2020-11-16 Thread Peter Maydell
On Mon, 16 Nov 2020 at 17:34, Alberto Garcia  wrote:
> Do you know if there is a core dump or stack trace available ?

Nope, sorry. What you get is what the 'vm-build-netbsd' etc targets
produce, so if you want more diagnostics on failures you have to
arrange for the test harness to produce them...

thanks
-- PMM



Re: [PATCH v8 2/5] hw/block/nvme: pull aio error handling

2020-11-16 Thread Keith Busch
On Thu, Nov 12, 2020 at 08:59:42PM +0100, Klaus Jensen wrote:
> +static void nvme_aio_err(NvmeRequest *req, int ret)
> +{
> +uint16_t status = NVME_SUCCESS;
> +Error *local_err = NULL;
> +
> +switch (req->cmd.opcode) {
> +case NVME_CMD_READ:
> +status = NVME_UNRECOVERED_READ;
> +break;
> +case NVME_CMD_FLUSH:
> +case NVME_CMD_WRITE:
> +case NVME_CMD_WRITE_ZEROES:
> +status = NVME_WRITE_FAULT;
> +break;
> +default:
> +status = NVME_INTERNAL_DEV_ERROR;
> +break;
> +}

Just curious, is there potentially a more appropriate way to set an nvme
status based on the value of 'ret'? What is 'ret' representing anyway?
Are these errno values?



Re: iotest 030 still occasionally intermittently failing

2020-11-16 Thread Alberto Garcia
On Mon 16 Nov 2020 05:16:35 PM CET, Peter Maydell wrote:
> Just saw this on a test run on the OpenBSD VM build-and-test,
> so this test is still intermittently failing...
>
>
>   TESTiotest-qcow2: 030 [fail]
> QEMU  --
> "/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-system-aarch64"
> -nodefaults -display none -accel qtest -machine virt
> QEMU_IMG  --
> "/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-img"
> QEMU_IO   --
> "/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-io"
> --cache writeback --aio threads -f qcow2
> QEMU_NBD  --
> "/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-nbd"
> IMGFMT-- qcow2 (compat=1.1)
> IMGPROTO  -- file
> PLATFORM  -- OpenBSD/amd64 openbsd 6.8
> TEST_DIR  -- /home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/scratch
> SOCK_DIR  -- /tmp/tmp.XpU6JjBMml
> SOCKET_SCM_HELPER --
>
> --- /home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/030.out  Mon
> Nov 16 15:33:05 2020
> +++ /home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/030.out.bad
>  Mon Nov 16 16:03:42 2020
> @@ -1,5 +1,47 @@
> -...
> +WARNING:qemu.machine:qemu received signal 6; command:

Do you know if there is a core dump or stack trace available ?

Berto



iotest 030 still occasionally intermittently failing

2020-11-16 Thread Peter Maydell
Just saw this on a test run on the OpenBSD VM build-and-test,
so this test is still intermittently failing...


  TESTiotest-qcow2: 030 [fail]
QEMU  --
"/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-system-aarch64"
-nodefaults -display none -accel qtest -machine virt
QEMU_IMG  --
"/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-img"
QEMU_IO   --
"/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-io"
--cache writeback --aio threads -f qcow2
QEMU_NBD  --
"/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-nbd"
IMGFMT-- qcow2 (compat=1.1)
IMGPROTO  -- file
PLATFORM  -- OpenBSD/amd64 openbsd 6.8
TEST_DIR  -- /home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/scratch
SOCK_DIR  -- /tmp/tmp.XpU6JjBMml
SOCKET_SCM_HELPER --

--- /home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/030.out  Mon
Nov 16 15:33:05 2020
+++ /home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/030.out.bad
 Mon Nov 16 16:03:42 2020
@@ -1,5 +1,47 @@
-...
+WARNING:qemu.machine:qemu received signal 6; command:
"/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/../../qemu-system-aarch64
-display none -vga none -chardev
socket,id=mon,path=/tmp/tmp.XpU6JjBMml/qemu-42319-monitor.sock -mon
chardev=mon,mode=control -qtest
unix:path=/tmp/tmp.XpU6JjBMml/qemu-42319-qtest.sock -accel qtest
-nodefaults -display none -accel qtest -machine virt -drive
if=virtio,id=drive0,file=/home/qemu/qemu-test.h37iBt/build/tests/qemu-iotests/scratch/img-8.img,format=qcow2,cache=writeback,aio=threads,backing.backing.backing.backing.backing.backing.backing.backing.node-name=node0,backing.backing.backing.backing.backing.backing.backing.node-name=node1,backing.backing.backing.backing.backing.backing.node-name=node2,backing.backing.backing.backing.backing.node-name=node3,backing.backing.backing.backing.node-name=node4,backing.backing.backing.node-name=node5,backing.backing.node-name=node6,backing.node-name=node7,node-name=node8"
+EE..
+==
+ERROR: test_stream_commit_2 (__main__.TestParallelOps)
 --
+Traceback (most recent call last):
+  File "030", line 492, in test_stream_commit_2
+self.assert_no_active_block_jobs()
+  File "/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/iotests.py",
line 932, in assert_no_active_block_jobs
+result = self.vm.qmp('query-block-jobs')
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 559, in qmp
+return self._qmp.cmd(cmd, args=qmp_args)
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/qmp.py",
line 278, in cmd
+return self.cmd_obj(qmp_cmd)
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/qmp.py",
line 259, in cmd_obj
+raise QMPConnectError("Unexpected empty reply from server")
+qemu.qmp.QMPConnectError: Unexpected empty reply from server
+
+==
+ERROR: test_stream_commit_2 (__main__.TestParallelOps)
+--
+Traceback (most recent call last):
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 476, in _do_shutdown
+self._soft_shutdown(timeout, has_quit)
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 456, in _soft_shutdown
+self._qmp.cmd('quit')
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/qmp.py",
line 278, in cmd
+return self.cmd_obj(qmp_cmd)
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/qmp.py",
line 256, in cmd_obj
+self.__sock.sendall(json.dumps(qmp_cmd).encode('utf-8'))
+BrokenPipeError: [Errno 32] Broken pipe
+
+The above exception was the direct cause of the following exception:
+
+Traceback (most recent call last):
+  File "030", line 226, in tearDown
+self.vm.shutdown()
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 506, in shutdown
+self._do_shutdown(timeout, has_quit)
+  File 
"/home/qemu/qemu-test.h37iBt/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 479, in _do_shutdown
+raise AbnormalShutdown("Could not perform graceful shutdown") \
+qemu.machine.AbnormalShutdown: Could not perform graceful shutdown
+
+--
 Ran 27 tests

-OK
+FAILED (errors=2, skipped=1)

thanks
-- PMM



Re: [PATCH v4 4/4] hw/block/m25p80: Fix Numonyx fast read dummy cycle count

2020-11-16 Thread Francisco Iglesias
Hi Joe,

On Thu, Nov 12, 2020 at 07:10:55PM -0800, Joe Komlodi wrote:
> Numonyx chips determine the number of cycles to wait based on bits 7:4
> in the volatile configuration register.
> 
> However, if these bits are 0x0 or 0xF, the number of dummy cycles to
> wait is
> 10 on a QIOR or QIOR4 command, or 8 on any other currently supported
> fast read command. [1]

With above changed to:

"
However, if these bits are 0x0 or 0xF, the number of dummy cycles to wait
is 10 on a QIOR or QIOR4 command or when in QIO mode and else 8 for the
currently supported fast read commands. [1]
"

Reviewed-by: Francisco Iglesias 

Best regards,
Francisco Iglesias


> 
> [1]
> https://www.micron.com/-/media/client/global/documents/products/data-sheet/nor-flash/serial-nor/mt25q/die-rev-b/mt25q_qlkt_u_02g_cbb_0.pdf?rev=9b167fbf2b3645efba6385949a72e453
> 
> Signed-off-by: Joe Komlodi 
> ---
>  hw/block/m25p80.c | 30 +++---
>  1 file changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 2552f2c..0c78015 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -837,6 +837,30 @@ static uint8_t numonyx_get_mode(Flash *s)
>  }
>  }
>  
> +static uint8_t numonyx_extract_cfg_num_dummies(Flash *s)
> +{
> +uint8_t num_dummies;
> +uint8_t mode;
> +assert(get_man(s) == MAN_NUMONYX);
> +
> +mode = numonyx_get_mode(s);
> +num_dummies = extract32(s->volatile_cfg, 4, 4);
> +
> +if (num_dummies == 0x0 || num_dummies == 0xf) {
> +switch (s->cmd_in_progress) {
> +case QIOR:
> +case QIOR4:
> +num_dummies = 10;
> +break;
> +default:
> +num_dummies = (mode == MODE_QIO) ? 10 : 8;
> +break;
> +}
> +}
> +
> +return num_dummies;
> +}
> +
>  static void decode_fast_read_cmd(Flash *s)
>  {
>  s->needed_bytes = get_addr_length(s);
> @@ -847,7 +871,7 @@ static void decode_fast_read_cmd(Flash *s)
>  s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_NUMONYX:
> -s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
>  s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_MACRONIX:
> @@ -891,7 +915,7 @@ static void decode_dio_read_cmd(Flash *s)
>  s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_NUMONYX:
> -s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
>  s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_MACRONIX:
> @@ -935,7 +959,7 @@ static void decode_qio_read_cmd(Flash *s)
>  s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_NUMONYX:
> -s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
>  s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_MACRONIX:
> -- 
> 2.7.4
> 



Re: [PATCH v4 3/4] hw/block/m25p80: Check SPI mode before running some Numonyx commands

2020-11-16 Thread Francisco Iglesias
Hi Joe,

On Thu, Nov 12, 2020 at 07:10:54PM -0800, Joe Komlodi wrote:
> Some Numonyx flash commands cannot be executed in DIO and QIO mode, such as
> trying to do DPP or DOR when in QIO mode.
> 
> Signed-off-by: Joe Komlodi 
> ---
>  hw/block/m25p80.c | 134 
> +-
>  1 file changed, 112 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index eb6539f..2552f2c 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -413,6 +413,12 @@ typedef enum {
>  MAN_GENERIC,
>  } Manufacturer;
>  
> +typedef enum {
> +MODE_STD = 0,
> +MODE_DIO = 1,
> +MODE_QIO = 2
> +} SPIMode;
> +
>  #define M25P80_INTERNAL_DATA_BUFFER_SZ 16
>  
>  struct Flash {
> @@ -820,6 +826,17 @@ static void reset_memory(Flash *s)
>  trace_m25p80_reset_done(s);
>  }
>  
> +static uint8_t numonyx_get_mode(Flash *s)
> +{
> +if (!(s->enh_volatile_cfg & EVCFG_QUAD_IO_DISABLED)) {
> +return MODE_QIO;
> +} else if (!(s->enh_volatile_cfg & EVCFG_DUAL_IO_DISABLED)) {
> +return MODE_DIO;
> +} else {
> +return MODE_STD;
> +}
> +}
> +
>  static void decode_fast_read_cmd(Flash *s)
>  {
>  s->needed_bytes = get_addr_length(s);
> @@ -827,9 +844,11 @@ static void decode_fast_read_cmd(Flash *s)
>  /* Dummy cycles - modeled with bytes writes instead of bits */
>  case MAN_WINBOND:
>  s->needed_bytes += 8;
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_NUMONYX:
>  s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_MACRONIX:
>  if (extract32(s->volatile_cfg, 6, 2) == 1) {
> @@ -837,19 +856,21 @@ static void decode_fast_read_cmd(Flash *s)
>  } else {
>  s->needed_bytes += 8;
>  }
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_SPANSION:
>  s->needed_bytes += extract32(s->spansion_cr2v,
>  SPANSION_DUMMY_CLK_POS,
>  SPANSION_DUMMY_CLK_LEN
>  );
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  default:
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  }
>  s->pos = 0;
>  s->len = 0;
> -s->state = STATE_COLLECTING_DATA;

Above change in this function and the similar ones in below two functions
don't seem to be needed anymore (s->state = STATE_COLLECTING_DATA is being done
in all cases).

>  }
>  
>  static void decode_dio_read_cmd(Flash *s)
> @@ -859,6 +880,7 @@ static void decode_dio_read_cmd(Flash *s)
>  switch (get_man(s)) {
>  case MAN_WINBOND:
>  s->needed_bytes += WINBOND_CONTINUOUS_READ_MODE_CMD_LEN;
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_SPANSION:
>  s->needed_bytes += SPANSION_CONTINUOUS_READ_MODE_CMD_LEN;
> @@ -866,9 +888,11 @@ static void decode_dio_read_cmd(Flash *s)
>  SPANSION_DUMMY_CLK_POS,
>  SPANSION_DUMMY_CLK_LEN
>  );
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_NUMONYX:
>  s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_MACRONIX:
>  switch (extract32(s->volatile_cfg, 6, 2)) {
> @@ -882,13 +906,14 @@ static void decode_dio_read_cmd(Flash *s)
>  s->needed_bytes += 4;
>  break;
>  }
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  default:
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  }
>  s->pos = 0;
>  s->len = 0;
> -s->state = STATE_COLLECTING_DATA;
>  }
>  
>  static void decode_qio_read_cmd(Flash *s)
> @@ -899,6 +924,7 @@ static void decode_qio_read_cmd(Flash *s)
>  case MAN_WINBOND:
>  s->needed_bytes += WINBOND_CONTINUOUS_READ_MODE_CMD_LEN;
>  s->needed_bytes += 4;
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_SPANSION:
>  s->needed_bytes += SPANSION_CONTINUOUS_READ_MODE_CMD_LEN;
> @@ -906,9 +932,11 @@ static void decode_qio_read_cmd(Flash *s)
>  SPANSION_DUMMY_CLK_POS,
>  SPANSION_DUMMY_CLK_LEN
>  );
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_NUMONYX:
>  s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->state = STATE_COLLECTING_DATA;
>  break;
>  case MAN_MACRONIX:
>  switch (extract32(s->volatile_cfg, 6, 2)) {
> @@ -922,13 +950,14 @@ static void decode_qio_read_cmd(Flash *s)
>  s->needed_bytes += 6;
>  break;
>  }
> +s->state = 

Re: [PATCH v4 2/4] hw/block/m25p80: Fix when VCFG XIP bit is set for Numonyx

2020-11-16 Thread Francisco Iglesias
On Thu, Nov 12, 2020 at 07:10:53PM -0800, Joe Komlodi wrote:
> VCFG XIP is set (disabled) when the NVCFG XIP bits are all set (disabled).
> 
> Signed-off-by: Joe Komlodi 

Reviewed-by: Francisco Iglesias 

> ---
>  hw/block/m25p80.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 452d252..eb6539f 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -768,7 +768,7 @@ static void reset_memory(Flash *s)
>  s->volatile_cfg |= VCFG_DUMMY;
>  s->volatile_cfg |= VCFG_WRAP_SEQUENTIAL;
>  if ((s->nonvolatile_cfg & NVCFG_XIP_MODE_MASK)
> -!= NVCFG_XIP_MODE_DISABLED) {
> +== NVCFG_XIP_MODE_DISABLED) {
>  s->volatile_cfg |= VCFG_XIP_MODE_DISABLED;
>  }
>  s->volatile_cfg |= deposit32(s->volatile_cfg,
> -- 
> 2.7.4
> 



Re: [PATCH v4 1/4] hw/block/m25p80: Make Numonyx config field names more accurate

2020-11-16 Thread Francisco Iglesias
On Thu, Nov 12, 2020 at 07:10:52PM -0800, Joe Komlodi wrote:
> The previous naming of the configuration registers made it sound like that if
> the bits were set the settings would be enabled, while the opposite is true.
> 
> Signed-off-by: Joe Komlodi 

Reviewed-by: Francisco Iglesias 

> ---
>  hw/block/m25p80.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 483925f..452d252 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -136,7 +136,7 @@ typedef struct FlashPartInfo {
>  #define VCFG_WRAP_SEQUENTIAL 0x2
>  #define NVCFG_XIP_MODE_DISABLED (7 << 9)
>  #define NVCFG_XIP_MODE_MASK (7 << 9)
> -#define VCFG_XIP_MODE_ENABLED (1 << 3)
> +#define VCFG_XIP_MODE_DISABLED (1 << 3)
>  #define CFG_DUMMY_CLK_LEN 4
>  #define NVCFG_DUMMY_CLK_POS 12
>  #define VCFG_DUMMY_CLK_POS 4
> @@ -144,9 +144,9 @@ typedef struct FlashPartInfo {
>  #define EVCFG_VPP_ACCELERATOR (1 << 3)
>  #define EVCFG_RESET_HOLD_ENABLED (1 << 4)
>  #define NVCFG_DUAL_IO_MASK (1 << 2)
> -#define EVCFG_DUAL_IO_ENABLED (1 << 6)
> +#define EVCFG_DUAL_IO_DISABLED (1 << 6)
>  #define NVCFG_QUAD_IO_MASK (1 << 3)
> -#define EVCFG_QUAD_IO_ENABLED (1 << 7)
> +#define EVCFG_QUAD_IO_DISABLED (1 << 7)
>  #define NVCFG_4BYTE_ADDR_MASK (1 << 0)
>  #define NVCFG_LOWER_SEGMENT_MASK (1 << 1)
>  
> @@ -769,7 +769,7 @@ static void reset_memory(Flash *s)
>  s->volatile_cfg |= VCFG_WRAP_SEQUENTIAL;
>  if ((s->nonvolatile_cfg & NVCFG_XIP_MODE_MASK)
>  != NVCFG_XIP_MODE_DISABLED) {
> -s->volatile_cfg |= VCFG_XIP_MODE_ENABLED;
> +s->volatile_cfg |= VCFG_XIP_MODE_DISABLED;
>  }
>  s->volatile_cfg |= deposit32(s->volatile_cfg,
>  VCFG_DUMMY_CLK_POS,
> @@ -784,10 +784,10 @@ static void reset_memory(Flash *s)
>  s->enh_volatile_cfg |= EVCFG_VPP_ACCELERATOR;
>  s->enh_volatile_cfg |= EVCFG_RESET_HOLD_ENABLED;
>  if (s->nonvolatile_cfg & NVCFG_DUAL_IO_MASK) {
> -s->enh_volatile_cfg |= EVCFG_DUAL_IO_ENABLED;
> +s->enh_volatile_cfg |= EVCFG_DUAL_IO_DISABLED;
>  }
>  if (s->nonvolatile_cfg & NVCFG_QUAD_IO_MASK) {
> -s->enh_volatile_cfg |= EVCFG_QUAD_IO_ENABLED;
> +s->enh_volatile_cfg |= EVCFG_QUAD_IO_DISABLED;
>  }
>  if (!(s->nonvolatile_cfg & NVCFG_4BYTE_ADDR_MASK)) {
>  s->four_bytes_address_mode = true;
> -- 
> 2.7.4
> 



Re: [PATCH] io_uring: do not use pointer after free

2020-11-16 Thread Kevin Wolf
Am 13.11.2020 um 16:41 hat Paolo Bonzini geschrieben:
> Even though only the pointer value is only printed, it is untidy
> and Coverity complains.
> 
> Cc: Stefan Hajnoczi 
> Signed-off-by: Paolo Bonzini 

Thanks, applied to the block branch.

Kevin




Re: [PATCH 1/2] file-posix: allow -EBUSY errors during write zeros on raw block devices

2020-11-16 Thread Kevin Wolf
Am 11.11.2020 um 16:39 hat Maxim Levitsky geschrieben:
> On Linux, fallocate(fd, FALLOC_FL_PUNCH_HOLE) when it is used on a block 
> device,
> without O_DIRECT can return -EBUSY if it races with another write to the same 
> page.
> 
> Since this is rare and discard is not a critical operation, ignore this error
> 
> Signed-off-by: Maxim Levitsky 

I'm applying this one for 5.2, it certainly shouldn't hurt and makes
things work at least, even if possibly not in the optimal way.

Patch 2 seems to be a bit less obvious and discussion is ongoing, so
that's probably more 6.0 material.

Kevin




Re: [RFC PATCH 0/5] ARM: reduce the memory consumed when mapping UEFI flash images

2020-11-16 Thread Markus Armbruster
Philippe Mathieu-Daudé  writes:

> Hi David,
>
> On 11/16/20 11:42 AM, David Edmondson wrote:
>> Currently ARM UEFI images are typically built as 2MB/768kB flash
>> images for code and variables respectively. These images are both then
>> padded out to 64MB before being loaded by QEMU.
>> 
>> Because the images are 64MB each, QEMU allocates 128MB of memory to
>> read them, and then proceeds to read all 128MB from disk (dirtying the
>> memory). Of this 128MB less than 3MB is useful - the rest is zero
>> padding.
>
> 2 years ago I commented the same problem, and suggested to access the
> underlying storage by "block", as this is a "block storage".
>
> Back then the response was "do not try to fix something that works".
> This is why we choose the big hammer "do not accept image size not
> matching device size" way.
>
> While your series seems to help, it only postpone the same
> implementation problem. If what you want is use the least memory
> required, I still believe accessing the device by block is the
> best approach.

"Do not try to fix something that works" is hard to disagree with.
However, at least some users seem to disagree with "this works".  Enough
to overcome the resistance to change?




Re: [RFC PATCH 0/5] ARM: reduce the memory consumed when mapping UEFI flash images

2020-11-16 Thread David Edmondson
On Monday, 2020-11-16 at 12:39:46 +01, Philippe Mathieu-Daudé wrote:

> Hi David,
>
> On 11/16/20 11:42 AM, David Edmondson wrote:
>> Currently ARM UEFI images are typically built as 2MB/768kB flash
>> images for code and variables respectively. These images are both then
>> padded out to 64MB before being loaded by QEMU.
>> 
>> Because the images are 64MB each, QEMU allocates 128MB of memory to
>> read them, and then proceeds to read all 128MB from disk (dirtying the
>> memory). Of this 128MB less than 3MB is useful - the rest is zero
>> padding.
>
> 2 years ago I commented the same problem, and suggested to access the
> underlying storage by "block", as this is a "block storage".
>
> Back then the response was "do not try to fix something that works".
> This is why we choose the big hammer "do not accept image size not
> matching device size" way.
>
> While your series seems to help, it only postpone the same
> implementation problem. If what you want is use the least memory
> required, I still believe accessing the device by block is the
> best approach.

I agree that would reduce the size further, but it feels like it may be
a case of diminishing returns.

Currently the consumption for a single guest is 128MB. This change will
bring it down under 3MB, with the block approach perhaps reducing that
to zero (there will be some buffer block usage presumably, and perhaps a
small cache would make sense, so it won't really be zero).

Scaling that up for 100 guests, we're going from 12.5GB now down to
under 300MB with the changes, and again something around zero with the
block based approach.

I guess that it would mean that the machine model wouldn't have to
change, as we could return zeros for reads outside the underlying device
extent. This seems like the most interesting part - are there likely to
be any consequential *benefits* from reducing the amount of guest
address space consumed by the flash regions?

> Regards,
>
> Phil.
>
>> 
>> On a machine with 100 VMs this wastes over 12GB of memory.
>> 
>> This set of patches aims to reclaim the wasted memory by allowing QEMU
>> to respect the size of the flash images and allocate only the
>> necessary memory. This will, of course, require that the flash build
>> process be modified to avoid padding the images to 64MB.
>> 
>> Because existing machine types expect the full 128MB reserved for
>> flash to be occupied, do so for machine types older than virt-5.2. The
>> changes are beneficial even in this case, because while the full 128MB
>> of memory is allocated, only that required to actually load the flash
>> images from disk is touched.
>> 
>> David Edmondson (5):
>>   hw/block: blk_check_size_and_read_all should report backend name
>>   hw/block: Flash images can be smaller than the device
>>   hw/arm: Convert assertions about flash image size to error_report
>>   hw/arm: Flash image mapping follows image size
>>   hw/arm: Only minimise flash size on older machines
>> 
>>  hw/arm/trace-events  |  2 +
>>  hw/arm/virt-acpi-build.c | 30 --
>>  hw/arm/virt.c| 86 +---
>>  hw/block/block.c | 26 ++--
>>  include/hw/arm/virt.h|  2 +
>>  5 files changed, 97 insertions(+), 49 deletions(-)
>> 

dme.
-- 
Please forgive me if I act a little strange, for I know not what I do.



Re: [PATCH v2 7/7] qapi: More complex uses of QAPI_LIST_APPEND

2020-11-16 Thread Eric Blake
On 11/13/20 1:39 PM, Dr. David Alan Gilbert wrote:
> * Eric Blake (ebl...@redhat.com) wrote:
>> These cases require a bit more thought to review; in each case, the
>> code was appending to a list, but not with a FOOList **tail variable.
>>
>> Signed-off-by: Eric Blake 
>> ---

> 
> 
> 

>> +++ b/monitor/hmp-cmds.c
>> @@ -1699,7 +1699,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict)
>>  void hmp_sendkey(Monitor *mon, const QDict *qdict)
>>  {
>>  const char *keys = qdict_get_str(qdict, "keys");
>> -KeyValueList *keylist, *head = NULL, *tmp = NULL;
>> +KeyValue *v;
>> +KeyValueList *head = NULL, **tail = 
>>  int has_hold_time = qdict_haskey(qdict, "hold-time");
>>  int hold_time = qdict_get_try_int(qdict, "hold-time", -1);
>>  Error *err = NULL;
>> @@ -1716,16 +1717,7 @@ void hmp_sendkey(Monitor *mon, const QDict *qdict)
>>  keyname_len = 4;
>>  }
>>
>> -keylist = g_malloc0(sizeof(*keylist));
>> -keylist->value = g_malloc0(sizeof(*keylist->value));
>> -
>> -if (!head) {
>> -head = keylist;
>> -}
>> -if (tmp) {
>> -tmp->next = keylist;
>> -}
>> -tmp = keylist;
>> +v = g_malloc0(sizeof(*v));
>>
>>  if (strstart(keys, "0x", NULL)) {
>>  char *endp;
>> @@ -1734,16 +1726,17 @@ void hmp_sendkey(Monitor *mon, const QDict *qdict)
>>  if (endp != keys + keyname_len) {
>>  goto err_out;
>>  }
>> -keylist->value->type = KEY_VALUE_KIND_NUMBER;
>> -keylist->value->u.number.data = value;
>> +v->type = KEY_VALUE_KIND_NUMBER;
>> +v->u.number.data = value;
>>  } else {
>>  int idx = index_from_key(keys, keyname_len);
>>  if (idx == Q_KEY_CODE__MAX) {
>>  goto err_out;
>>  }
>> -keylist->value->type = KEY_VALUE_KIND_QCODE;
>> -keylist->value->u.qcode.data = idx;
>> +v->type = KEY_VALUE_KIND_QCODE;
>> +v->u.qcode.data = idx;
>>  }
>> +QAPI_LIST_APPEND(tail, v);
>>
>>  if (!*separator) {
>>  break;
> 
> Don't you need to arrange for 'v' to be free'd in the err_out case?

Good catch.  Pre-patch, the allocation was appended to the list before
it was possible to reach 'goto err_out', but post-patch, the use of a
separate variable and delayed addition to the list matters.  Will fix.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [RFC PATCH 1/5] hw/block: blk_check_size_and_read_all should report backend name

2020-11-16 Thread David Edmondson
On Monday, 2020-11-16 at 12:23:24 +01, Philippe Mathieu-Daudé wrote:

> On 11/16/20 11:42 AM, David Edmondson wrote:
>> If there are problems examining or reading data from the block
>> backend, the error messages should include an appropriate identifier
>> to assist in diagnoses.
>> 
>> Signed-off-by: David Edmondson 
>> ---
>>  hw/block/block.c | 14 +++---
>>  1 file changed, 7 insertions(+), 7 deletions(-)
>> 
>> diff --git a/hw/block/block.c b/hw/block/block.c
>> index 1e34573da7..8b284e1f14 100644
>> --- a/hw/block/block.c
>> +++ b/hw/block/block.c
>> @@ -20,9 +20,6 @@
>>   * BDRV_REQUEST_MAX_BYTES.
>>   * On success, return true.
>>   * On failure, store an error through @errp and return false.
>> - * Note that the error messages do not identify the block backend.
>> - * TODO Since callers don't either, this can result in confusing
>> - * errors.
>>   * This function not intended for actual block devices, which read on
>>   * demand.  It's for things like memory devices that (ab)use a block
>>   * backend to provide persistence.
>> @@ -32,17 +29,19 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void 
>> *buf, hwaddr size,
>>  {
>>  int64_t blk_len;
>>  int ret;
>> +const char *name = blk_name(blk);
>>  
>>  blk_len = blk_getlength(blk);
>>  if (blk_len < 0) {
>>  error_setg_errno(errp, -blk_len,
>> - "can't get size of block backend");
>> + "can't get size of block backend %s",
>
> Maybe '%s' to notice empty name?

Okay.

>> + name);
>>  return false;
>>  }
>>  if (blk_len != size) {
>>  error_setg(errp, "device requires %" HWADDR_PRIu " bytes, "
>> -   "block backend provides %" PRIu64 " bytes",
>> -   size, blk_len);
>> +   "block backend %s provides %" PRIu64 " bytes",
>> +   size, name, blk_len);
>>  return false;
>>  }
>>  
>> @@ -55,7 +54,8 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void 
>> *buf, hwaddr size,
>>  assert(size <= BDRV_REQUEST_MAX_BYTES);
>>  ret = blk_pread(blk, 0, buf, size);
>>  if (ret < 0) {
>> -error_setg_errno(errp, -ret, "can't read block backend");
>> +error_setg_errno(errp, -ret, "can't read block backend %s",
>> + name);
>>  return false;
>>  }
>>  return true;
>> 

dme.
-- 
She looks like Eve Marie Saint in "On the Waterfront".



Re: [PATCH for-5.2] iotests: Replace deprecated ConfigParser.readfp()

2020-11-16 Thread Philippe Mathieu-Daudé
On 11/13/20 11:06 AM, Kevin Wolf wrote:
> iotest 277 fails on Fedora 33 (Python 3.9) because a deprecation warning
> changes the output:
> 
> nbd-fault-injector.py:230: DeprecationWarning: This method will be
> removed in future versions.  Use 'parser.read_file()' instead.
> 
> In fact, readfp() has already been deprecated in Python 3.2 and the
> replacement has existed since the same version, so we can now
> unconditionally switch to read_file().
> 
> Signed-off-by: Kevin Wolf 
> ---
>  tests/qemu-iotests/nbd-fault-injector.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH 1/3] quorum: Require WRITE perm with rewrite-corrupted

2020-11-16 Thread Alberto Garcia
On Fri 13 Nov 2020 10:17:16 PM CET, Max Reitz wrote:
> Using rewrite-corrupted means quorum may issue writes to its children
> just from receiving read requests from its parents.  Thus, it must take
> the WRITE permission when rewrite-corrupted is used.
>
> Signed-off-by: Max Reitz 

Reviewed-by: Alberto Garcia 

Berto



Re: [PATCH-for-5.2] io_uring: do not use pointer after free

2020-11-16 Thread Philippe Mathieu-Daudé
On 11/13/20 4:41 PM, Paolo Bonzini wrote:
> Even though only the pointer value is only printed, it is untidy
> and Coverity complains.
> 
> Cc: Stefan Hajnoczi 
> Signed-off-by: Paolo Bonzini 
> ---
>  block/io_uring.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v8 3/5] hw/block/nvme: add dulbe support

2020-11-16 Thread Klaus Jensen
On Nov 16 20:43, Minwoo Im wrote:
> On 11/12 20:59, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > Add support for reporting the Deallocated or Unwritten Logical Block
> > Error (DULBE).
> > 
> > Rely on the block status flags reported by the block layer and consider
> > any block with the BDRV_BLOCK_ZERO flag to be deallocated.
> > 
> > Multiple factors affect when a Write Zeroes command result in
> > deallocation of blocks.
> > 
> >   * the underlying file system block size
> >   * the blockdev format
> >   * the 'discard' and 'logical_block_size' parameters
> > 
> >  format | discard | wz (512B)  wz (4KiB)  wz (64KiB)
> > -
> >   qcow2ignore   n  n  y
> >   qcow2unmapn  n  y
> >   raw  ignore   n  y  y
> >   raw  unmapn  y  y
> > 
> > So, this works best with an image in raw format and 4KiB LBAs, since
> > holes can then be punched on a per-block basis (this assumes a file
> > system with a 4kb block size, YMMV). A qcow2 image, uses a cluster size
> > of 64KiB by default and blocks will only be marked deallocated if a full
> > cluster is zeroed or discarded. However, this *is* consistent with the
> > spec since Write Zeroes "should" deallocate the block if the Deallocate
> > attribute is set and "may" deallocate if the Deallocate attribute is not
> > set. Thus, we always try to deallocate (the BDRV_REQ_MAY_UNMAP flag is
> > always set).
> > 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/block/nvme-ns.h|  4 ++
> >  include/block/nvme.h  |  5 +++
> >  hw/block/nvme-ns.c|  8 ++--
> >  hw/block/nvme.c   | 91 ++-
> >  hw/block/trace-events |  4 ++
> >  5 files changed, 107 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
> > index 83734f4606e1..44bf6271b744 100644
> > --- a/hw/block/nvme-ns.h
> > +++ b/hw/block/nvme-ns.h
> > @@ -31,6 +31,10 @@ typedef struct NvmeNamespace {
> >  NvmeIdNs id_ns;
> >  
> >  NvmeNamespaceParams params;
> > +
> > +struct {
> > +uint32_t err_rec;
> > +} features;
> >  } NvmeNamespace;
> >  
> >  static inline uint32_t nvme_nsid(NvmeNamespace *ns)
> > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > index 8a46d9cf015f..966c3bb304bd 100644
> > --- a/include/block/nvme.h
> > +++ b/include/block/nvme.h
> > @@ -687,6 +687,7 @@ enum NvmeStatusCodes {
> >  NVME_E2E_REF_ERROR  = 0x0284,
> >  NVME_CMP_FAILURE= 0x0285,
> >  NVME_ACCESS_DENIED  = 0x0286,
> > +NVME_DULB   = 0x0287,
> >  NVME_MORE   = 0x2000,
> >  NVME_DNR= 0x4000,
> >  NVME_NO_COMPLETE= 0x,
> > @@ -903,6 +904,9 @@ enum NvmeIdCtrlLpa {
> >  #define NVME_AEC_NS_ATTR(aec)   ((aec >> 8) & 0x1)
> >  #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
> >  
> > +#define NVME_ERR_REC_TLER(err_rec)  (err_rec & 0x)
> > +#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x1)
> > +
> >  enum NvmeFeatureIds {
> >  NVME_ARBITRATION= 0x1,
> >  NVME_POWER_MANAGEMENT   = 0x2,
> > @@ -1023,6 +1027,7 @@ enum NvmeNsIdentifierType {
> >  
> >  
> >  #define NVME_ID_NS_NSFEAT_THIN(nsfeat)  ((nsfeat & 0x1))
> > +#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1)
> >  #define NVME_ID_NS_FLBAS_EXTENDED(flbas)((flbas >> 4) & 0x1)
> >  #define NVME_ID_NS_FLBAS_INDEX(flbas)   ((flbas & 0xf))
> >  #define NVME_ID_NS_MC_SEPARATE(mc)  ((mc >> 1) & 0x1)
> > diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> > index 31c80cdf5b5f..f1cc734c60f5 100644
> > --- a/hw/block/nvme-ns.c
> > +++ b/hw/block/nvme-ns.c
> > @@ -33,9 +33,7 @@ static void nvme_ns_init(NvmeNamespace *ns)
> >  NvmeIdNs *id_ns = >id_ns;
> >  int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
> >  
> > -if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
> > -ns->id_ns.dlfeat = 0x9;
> > -}
> > +ns->id_ns.dlfeat = 0x9;
> >  
> >  id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
> >  
> > @@ -44,6 +42,9 @@ static void nvme_ns_init(NvmeNamespace *ns)
> >  /* no thin provisioning */
> >  id_ns->ncap = id_ns->nsze;
> >  id_ns->nuse = id_ns->ncap;
> > +
> > +/* support DULBE */
> > +id_ns->nsfeat |= 0x4;
> >  }
> >  
> >  static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> > @@ -92,6 +93,7 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
> > **errp)
> >  }
> >  
> >  nvme_ns_init(ns);
> > +
> >  if (nvme_register_namespace(n, ns, errp)) {
> >  return -1;
> >  }
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index a96a996ddc0d..8d75a89fd872 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -105,6 +105,7 @@ static const bool 

Re: [PATCH v8 2/5] hw/block/nvme: pull aio error handling

2020-11-16 Thread Klaus Jensen
On Nov 16 20:36, Minwoo Im wrote:
> On 11/12 20:59, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > Add a new function, nvme_aio_err, to handle errors resulting from AIOs
> > and use this from the callbacks.
> > 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/block/nvme.c | 61 +
> >  1 file changed, 36 insertions(+), 25 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 51f35e8cf18b..a96a996ddc0d 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -878,6 +878,41 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace 
> > *ns, uint64_t slba,
> >  return NVME_SUCCESS;
> >  }
> >  
> > +static void nvme_aio_err(NvmeRequest *req, int ret)
> > +{
> > +uint16_t status = NVME_SUCCESS;
> > +Error *local_err = NULL;
> > +
> > +switch (req->cmd.opcode) {
> > +case NVME_CMD_READ:
> > +status = NVME_UNRECOVERED_READ;
> > +break;
> > +case NVME_CMD_FLUSH:
> > +case NVME_CMD_WRITE:
> > +case NVME_CMD_WRITE_ZEROES:
> > +status = NVME_WRITE_FAULT;
> > +break;
> > +default:
> > +status = NVME_INTERNAL_DEV_ERROR;
> > +break;
> > +}
> > +
> > +trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
> > +
> > +error_setg_errno(_err, -ret, "aio failed");
> > +error_report_err(local_err);
> > +
> > +/*
> > + * Set the command status code to the first encountered error but 
> > allow a
> > + * subsequent Internal Device Error to trump it.
> > + */
> > +if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
> > +return;
> > +}
> 
> Are we okay with the trace print up there in case that this if is taken?
> I guess if this if is taken, the trace print may not that matter.
> 

Yeah, I was thinking we always print the error that corresponds to the
AIO that we are handling right now.

But maybe we should not include the "translated" nvme error in the trace
at all. That might make more sense.


signature.asc
Description: PGP signature


Re: [PATCH v8 4/5] nvme: add namespace I/O optimization fields to shared header

2020-11-16 Thread Minwoo Im
On 11/12 20:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> This adds the NPWG, NPWA, NPDG, NPDA and NOWS family of fields to the
> shared nvme.h header for use by later patches.
> 
> Signed-off-by: Klaus Jensen 
> Cc: Stefan Hajnoczi 
> Cc: Fam Zheng 
> Reviewed-by: Stefan Hajnoczi 

Reviewed-by: Minwoo Im 



Re: [PATCH v8 3/5] hw/block/nvme: add dulbe support

2020-11-16 Thread Minwoo Im
On 11/12 20:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add support for reporting the Deallocated or Unwritten Logical Block
> Error (DULBE).
> 
> Rely on the block status flags reported by the block layer and consider
> any block with the BDRV_BLOCK_ZERO flag to be deallocated.
> 
> Multiple factors affect when a Write Zeroes command result in
> deallocation of blocks.
> 
>   * the underlying file system block size
>   * the blockdev format
>   * the 'discard' and 'logical_block_size' parameters
> 
>  format | discard | wz (512B)  wz (4KiB)  wz (64KiB)
> -
>   qcow2ignore   n  n  y
>   qcow2unmapn  n  y
>   raw  ignore   n  y  y
>   raw  unmapn  y  y
> 
> So, this works best with an image in raw format and 4KiB LBAs, since
> holes can then be punched on a per-block basis (this assumes a file
> system with a 4kb block size, YMMV). A qcow2 image, uses a cluster size
> of 64KiB by default and blocks will only be marked deallocated if a full
> cluster is zeroed or discarded. However, this *is* consistent with the
> spec since Write Zeroes "should" deallocate the block if the Deallocate
> attribute is set and "may" deallocate if the Deallocate attribute is not
> set. Thus, we always try to deallocate (the BDRV_REQ_MAY_UNMAP flag is
> always set).
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme-ns.h|  4 ++
>  include/block/nvme.h  |  5 +++
>  hw/block/nvme-ns.c|  8 ++--
>  hw/block/nvme.c   | 91 ++-
>  hw/block/trace-events |  4 ++
>  5 files changed, 107 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
> index 83734f4606e1..44bf6271b744 100644
> --- a/hw/block/nvme-ns.h
> +++ b/hw/block/nvme-ns.h
> @@ -31,6 +31,10 @@ typedef struct NvmeNamespace {
>  NvmeIdNs id_ns;
>  
>  NvmeNamespaceParams params;
> +
> +struct {
> +uint32_t err_rec;
> +} features;
>  } NvmeNamespace;
>  
>  static inline uint32_t nvme_nsid(NvmeNamespace *ns)
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 8a46d9cf015f..966c3bb304bd 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -687,6 +687,7 @@ enum NvmeStatusCodes {
>  NVME_E2E_REF_ERROR  = 0x0284,
>  NVME_CMP_FAILURE= 0x0285,
>  NVME_ACCESS_DENIED  = 0x0286,
> +NVME_DULB   = 0x0287,
>  NVME_MORE   = 0x2000,
>  NVME_DNR= 0x4000,
>  NVME_NO_COMPLETE= 0x,
> @@ -903,6 +904,9 @@ enum NvmeIdCtrlLpa {
>  #define NVME_AEC_NS_ATTR(aec)   ((aec >> 8) & 0x1)
>  #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
>  
> +#define NVME_ERR_REC_TLER(err_rec)  (err_rec & 0x)
> +#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x1)
> +
>  enum NvmeFeatureIds {
>  NVME_ARBITRATION= 0x1,
>  NVME_POWER_MANAGEMENT   = 0x2,
> @@ -1023,6 +1027,7 @@ enum NvmeNsIdentifierType {
>  
>  
>  #define NVME_ID_NS_NSFEAT_THIN(nsfeat)  ((nsfeat & 0x1))
> +#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1)
>  #define NVME_ID_NS_FLBAS_EXTENDED(flbas)((flbas >> 4) & 0x1)
>  #define NVME_ID_NS_FLBAS_INDEX(flbas)   ((flbas & 0xf))
>  #define NVME_ID_NS_MC_SEPARATE(mc)  ((mc >> 1) & 0x1)
> diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> index 31c80cdf5b5f..f1cc734c60f5 100644
> --- a/hw/block/nvme-ns.c
> +++ b/hw/block/nvme-ns.c
> @@ -33,9 +33,7 @@ static void nvme_ns_init(NvmeNamespace *ns)
>  NvmeIdNs *id_ns = >id_ns;
>  int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
>  
> -if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
> -ns->id_ns.dlfeat = 0x9;
> -}
> +ns->id_ns.dlfeat = 0x9;
>  
>  id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
>  
> @@ -44,6 +42,9 @@ static void nvme_ns_init(NvmeNamespace *ns)
>  /* no thin provisioning */
>  id_ns->ncap = id_ns->nsze;
>  id_ns->nuse = id_ns->ncap;
> +
> +/* support DULBE */
> +id_ns->nsfeat |= 0x4;
>  }
>  
>  static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> @@ -92,6 +93,7 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
> **errp)
>  }
>  
>  nvme_ns_init(ns);
> +
>  if (nvme_register_namespace(n, ns, errp)) {
>  return -1;
>  }
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index a96a996ddc0d..8d75a89fd872 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -105,6 +105,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
>  
>  static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
>  [NVME_TEMPERATURE_THRESHOLD]= NVME_FEAT_CAP_CHANGE,
> +[NVME_ERROR_RECOVERY]   = NVME_FEAT_CAP_CHANGE | 
> NVME_FEAT_CAP_NS,
>  [NVME_VOLATILE_WRITE_CACHE] = 

Re: [RFC PATCH 0/5] ARM: reduce the memory consumed when mapping UEFI flash images

2020-11-16 Thread Philippe Mathieu-Daudé
Hi David,

On 11/16/20 11:42 AM, David Edmondson wrote:
> Currently ARM UEFI images are typically built as 2MB/768kB flash
> images for code and variables respectively. These images are both then
> padded out to 64MB before being loaded by QEMU.
> 
> Because the images are 64MB each, QEMU allocates 128MB of memory to
> read them, and then proceeds to read all 128MB from disk (dirtying the
> memory). Of this 128MB less than 3MB is useful - the rest is zero
> padding.

2 years ago I commented the same problem, and suggested to access the
underlying storage by "block", as this is a "block storage".

Back then the response was "do not try to fix something that works".
This is why we choose the big hammer "do not accept image size not
matching device size" way.

While your series seems to help, it only postpone the same
implementation problem. If what you want is use the least memory
required, I still believe accessing the device by block is the
best approach.

Regards,

Phil.

> 
> On a machine with 100 VMs this wastes over 12GB of memory.
> 
> This set of patches aims to reclaim the wasted memory by allowing QEMU
> to respect the size of the flash images and allocate only the
> necessary memory. This will, of course, require that the flash build
> process be modified to avoid padding the images to 64MB.
> 
> Because existing machine types expect the full 128MB reserved for
> flash to be occupied, do so for machine types older than virt-5.2. The
> changes are beneficial even in this case, because while the full 128MB
> of memory is allocated, only that required to actually load the flash
> images from disk is touched.
> 
> David Edmondson (5):
>   hw/block: blk_check_size_and_read_all should report backend name
>   hw/block: Flash images can be smaller than the device
>   hw/arm: Convert assertions about flash image size to error_report
>   hw/arm: Flash image mapping follows image size
>   hw/arm: Only minimise flash size on older machines
> 
>  hw/arm/trace-events  |  2 +
>  hw/arm/virt-acpi-build.c | 30 --
>  hw/arm/virt.c| 86 +---
>  hw/block/block.c | 26 ++--
>  include/hw/arm/virt.h|  2 +
>  5 files changed, 97 insertions(+), 49 deletions(-)
> 




Re: [PATCH v8 2/5] hw/block/nvme: pull aio error handling

2020-11-16 Thread Minwoo Im
On 11/12 20:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add a new function, nvme_aio_err, to handle errors resulting from AIOs
> and use this from the callbacks.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 61 +
>  1 file changed, 36 insertions(+), 25 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 51f35e8cf18b..a96a996ddc0d 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -878,6 +878,41 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace 
> *ns, uint64_t slba,
>  return NVME_SUCCESS;
>  }
>  
> +static void nvme_aio_err(NvmeRequest *req, int ret)
> +{
> +uint16_t status = NVME_SUCCESS;
> +Error *local_err = NULL;
> +
> +switch (req->cmd.opcode) {
> +case NVME_CMD_READ:
> +status = NVME_UNRECOVERED_READ;
> +break;
> +case NVME_CMD_FLUSH:
> +case NVME_CMD_WRITE:
> +case NVME_CMD_WRITE_ZEROES:
> +status = NVME_WRITE_FAULT;
> +break;
> +default:
> +status = NVME_INTERNAL_DEV_ERROR;
> +break;
> +}
> +
> +trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
> +
> +error_setg_errno(_err, -ret, "aio failed");
> +error_report_err(local_err);
> +
> +/*
> + * Set the command status code to the first encountered error but allow a
> + * subsequent Internal Device Error to trump it.
> + */
> +if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
> +return;
> +}

Are we okay with the trace print up there in case that this if is taken?
I guess if this if is taken, the trace print may not that matter.



Re: [PATCH v8 1/5] hw/block/nvme: remove superfluous NvmeCtrl parameter

2020-11-16 Thread Minwoo Im
On 11/12 20:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> nvme_check_bounds has no use of the NvmeCtrl parameter; remove it.
> 
> Signed-off-by: Klaus Jensen 

Reviewed-by: Minwoo Im 



Re: [RFC PATCH 1/5] hw/block: blk_check_size_and_read_all should report backend name

2020-11-16 Thread Philippe Mathieu-Daudé
On 11/16/20 11:42 AM, David Edmondson wrote:
> If there are problems examining or reading data from the block
> backend, the error messages should include an appropriate identifier
> to assist in diagnoses.
> 
> Signed-off-by: David Edmondson 
> ---
>  hw/block/block.c | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/block/block.c b/hw/block/block.c
> index 1e34573da7..8b284e1f14 100644
> --- a/hw/block/block.c
> +++ b/hw/block/block.c
> @@ -20,9 +20,6 @@
>   * BDRV_REQUEST_MAX_BYTES.
>   * On success, return true.
>   * On failure, store an error through @errp and return false.
> - * Note that the error messages do not identify the block backend.
> - * TODO Since callers don't either, this can result in confusing
> - * errors.
>   * This function not intended for actual block devices, which read on
>   * demand.  It's for things like memory devices that (ab)use a block
>   * backend to provide persistence.
> @@ -32,17 +29,19 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void 
> *buf, hwaddr size,
>  {
>  int64_t blk_len;
>  int ret;
> +const char *name = blk_name(blk);
>  
>  blk_len = blk_getlength(blk);
>  if (blk_len < 0) {
>  error_setg_errno(errp, -blk_len,
> - "can't get size of block backend");
> + "can't get size of block backend %s",

Maybe '%s' to notice empty name?

> + name);
>  return false;
>  }
>  if (blk_len != size) {
>  error_setg(errp, "device requires %" HWADDR_PRIu " bytes, "
> -   "block backend provides %" PRIu64 " bytes",
> -   size, blk_len);
> +   "block backend %s provides %" PRIu64 " bytes",
> +   size, name, blk_len);
>  return false;
>  }
>  
> @@ -55,7 +54,8 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void 
> *buf, hwaddr size,
>  assert(size <= BDRV_REQUEST_MAX_BYTES);
>  ret = blk_pread(blk, 0, buf, size);
>  if (ret < 0) {
> -error_setg_errno(errp, -ret, "can't read block backend");
> +error_setg_errno(errp, -ret, "can't read block backend %s",
> + name);
>  return false;
>  }
>  return true;
> 




Re: [PATCH 2/2] monitor: increase amount of data for monitor to read

2020-11-16 Thread Andrey Shinkevich

On 09.11.2020 12:55, Vladimir Sementsov-Ogievskiy wrote:

06.11.2020 15:42, Andrey Shinkevich wrote:

QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.

Suggested-by: Denis V. Lunev 
Signed-off-by: Andrey Shinkevich 
---
  chardev/char-fd.c  | 64 
+-

  chardev/char-socket.c  | 54 +++---
  chardev/char.c | 40 +
  include/chardev/char.h | 15 +++
  monitor/monitor.c  |  2 +-
  tests/qemu-iotests/247.out |  2 +-
  6 files changed, 159 insertions(+), 18 deletions(-)


[...]


+    ret = qio_channel_read(
+    chan, (gchar *)thl.buf, len, NULL);
+    if (ret == 0) {
+    remove_fd_in_watch(chr);
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+    thl = (const JSONthrottle){0};
+    return FALSE;
+    }
+    if (ret < 0) {
+    return TRUE;
+    }


large code chunk is shared with fd_chr_read_hmp(). Would be not bad to 
avoid duplication..




There were two reasons to split the function:
1. Not to make the code complicated.
2. Avoid unused buffer of 4k on the stack:
   fd_chr_read_hmp() { uint8_t buf[CHR_READ_BUF_LEN];..


+    thl.load = ret;
+    thl.cursor = 0;
+    }
+
+    size = thl.load;
+    start = thl.buf + thl.cursor;


you may use uint8_t* pointer type for thl.curser and get rid of size and 
start variables.




For the 'start', yes. And I will want the 'size' anyway.

[...]


+int qemu_chr_end_position(const char *buf, int size, JSONthrottle *thl)
+{
+    int i;
+
+    for (i = 0; i < size; i++) {
+    switch (buf[i]) {
+    case ' ':
+    case '\n':
+    case '\r':
+    continue;
+    case '{':
+    thl->brace_count++;
+    break;
+    case '}':
+    thl->brace_count--;
+    break;
+    case '[':
+    thl->bracket_count++;
+    break;
+    case ']':
+    thl->bracket_count--;


I don't think you need to care about square brackets, as QMP queries and 
answers are always json objects, i.e. in pair of '{' and '}'.




I've kept the brackets because it is another condition to put a command 
into the requests queue (see json_message_process_token()).



Andrey



[RFC PATCH 2/5] hw/block: Flash images can be smaller than the device

2020-11-16 Thread David Edmondson
When loading a flash image into a device, allow the image to be
smaller than the extent of the device.

Signed-off-by: David Edmondson 
---
 hw/block/block.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/hw/block/block.c b/hw/block/block.c
index 8b284e1f14..40262546bd 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -16,8 +16,8 @@
 
 /*
  * Read the entire contents of @blk into @buf.
- * @blk's contents must be @size bytes, and @size must be at most
- * BDRV_REQUEST_MAX_BYTES.
+ * @blk's contents must not be more than @size bytes, and must be at
+ * most BDRV_REQUEST_MAX_BYTES in length.
  * On success, return true.
  * On failure, store an error through @errp and return false.
  * This function not intended for actual block devices, which read on
@@ -38,10 +38,10 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void 
*buf, hwaddr size,
  name);
 return false;
 }
-if (blk_len != size) {
-error_setg(errp, "device requires %" HWADDR_PRIu " bytes, "
-   "block backend %s provides %" PRIu64 " bytes",
-   size, name, blk_len);
+if (blk_len > size) {
+error_setg(errp, "block backend %s is too large for device "
+   "(%" PRIu64 " > %" HWADDR_PRIu ")",
+   name, blk_len, size);
 return false;
 }
 
@@ -51,8 +51,8 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void 
*buf, hwaddr size,
  * should probably rework the device to be more like an actual
  * block device and read only on demand.
  */
-assert(size <= BDRV_REQUEST_MAX_BYTES);
-ret = blk_pread(blk, 0, buf, size);
+assert(blk_len <= BDRV_REQUEST_MAX_BYTES);
+ret = blk_pread(blk, 0, buf, blk_len);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "can't read block backend %s",
  name);
-- 
2.28.0




[RFC PATCH 0/5] ARM: reduce the memory consumed when mapping UEFI flash images

2020-11-16 Thread David Edmondson
Currently ARM UEFI images are typically built as 2MB/768kB flash
images for code and variables respectively. These images are both then
padded out to 64MB before being loaded by QEMU.

Because the images are 64MB each, QEMU allocates 128MB of memory to
read them, and then proceeds to read all 128MB from disk (dirtying the
memory). Of this 128MB less than 3MB is useful - the rest is zero
padding.

On a machine with 100 VMs this wastes over 12GB of memory.

This set of patches aims to reclaim the wasted memory by allowing QEMU
to respect the size of the flash images and allocate only the
necessary memory. This will, of course, require that the flash build
process be modified to avoid padding the images to 64MB.

Because existing machine types expect the full 128MB reserved for
flash to be occupied, do so for machine types older than virt-5.2. The
changes are beneficial even in this case, because while the full 128MB
of memory is allocated, only that required to actually load the flash
images from disk is touched.

David Edmondson (5):
  hw/block: blk_check_size_and_read_all should report backend name
  hw/block: Flash images can be smaller than the device
  hw/arm: Convert assertions about flash image size to error_report
  hw/arm: Flash image mapping follows image size
  hw/arm: Only minimise flash size on older machines

 hw/arm/trace-events  |  2 +
 hw/arm/virt-acpi-build.c | 30 --
 hw/arm/virt.c| 86 +---
 hw/block/block.c | 26 ++--
 include/hw/arm/virt.h|  2 +
 5 files changed, 97 insertions(+), 49 deletions(-)

-- 
2.28.0




[RFC PATCH 1/5] hw/block: blk_check_size_and_read_all should report backend name

2020-11-16 Thread David Edmondson
If there are problems examining or reading data from the block
backend, the error messages should include an appropriate identifier
to assist in diagnoses.

Signed-off-by: David Edmondson 
---
 hw/block/block.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/block/block.c b/hw/block/block.c
index 1e34573da7..8b284e1f14 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -20,9 +20,6 @@
  * BDRV_REQUEST_MAX_BYTES.
  * On success, return true.
  * On failure, store an error through @errp and return false.
- * Note that the error messages do not identify the block backend.
- * TODO Since callers don't either, this can result in confusing
- * errors.
  * This function not intended for actual block devices, which read on
  * demand.  It's for things like memory devices that (ab)use a block
  * backend to provide persistence.
@@ -32,17 +29,19 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void 
*buf, hwaddr size,
 {
 int64_t blk_len;
 int ret;
+const char *name = blk_name(blk);
 
 blk_len = blk_getlength(blk);
 if (blk_len < 0) {
 error_setg_errno(errp, -blk_len,
- "can't get size of block backend");
+ "can't get size of block backend %s",
+ name);
 return false;
 }
 if (blk_len != size) {
 error_setg(errp, "device requires %" HWADDR_PRIu " bytes, "
-   "block backend provides %" PRIu64 " bytes",
-   size, blk_len);
+   "block backend %s provides %" PRIu64 " bytes",
+   size, name, blk_len);
 return false;
 }
 
@@ -55,7 +54,8 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void 
*buf, hwaddr size,
 assert(size <= BDRV_REQUEST_MAX_BYTES);
 ret = blk_pread(blk, 0, buf, size);
 if (ret < 0) {
-error_setg_errno(errp, -ret, "can't read block backend");
+error_setg_errno(errp, -ret, "can't read block backend %s",
+ name);
 return false;
 }
 return true;
-- 
2.28.0




[RFC PATCH 5/5] hw/arm: Only minimise flash size on older machines

2020-11-16 Thread David Edmondson
Prior to 5.2 the flash images loaded into the bottom 128MB always
filled the region. Ensure that this continues to be the case.

Signed-off-by: David Edmondson 
---
 hw/arm/virt-acpi-build.c | 11 +++---
 hw/arm/virt.c| 79 ++--
 include/hw/arm/virt.h|  3 +-
 3 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 2c08d36624..6e3d72a9e9 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -117,16 +117,17 @@ static void acpi_dsdt_add_flash1(Aml *scope, int index,
 aml_append(scope, dev);
 }
 
-static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap,
-PFlashCFI01 *flash[2])
+static void acpi_dsdt_add_flash(Aml *scope, VirtMachineState *vms)
 {
+MemMapEntry *flash_memmap = >memmap[VIRT_FLASH];
+
 acpi_dsdt_add_flash1(scope, 0,
  flash_memmap->base,
- virt_flash_size(flash[0]));
+ virt_flash_size(vms, vms->flash[0]));
 
 acpi_dsdt_add_flash1(scope, 1,
  flash_memmap->base + flash_memmap->size / 2,
- virt_flash_size(flash[1]));
+ virt_flash_size(vms, vms->flash[1]));
 }
 
 static void acpi_dsdt_add_virtio(Aml *scope,
@@ -606,7 +607,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 acpi_dsdt_add_uart(scope, [VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
 if (vmc->acpi_expose_flash) {
-acpi_dsdt_add_flash(scope, [VIRT_FLASH], vms->flash);
+acpi_dsdt_add_flash(scope, vms);
 }
 acpi_dsdt_add_fw_cfg(scope, [VIRT_FW_CFG]);
 acpi_dsdt_add_virtio(scope, [VIRT_MMIO],
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 03ec844bf3..e851622cb5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -933,9 +933,15 @@ static void create_virtio_devices(const VirtMachineState 
*vms)
 
 #define VIRT_FLASH_SECTOR_SIZE (256 * KiB)
 
-int64_t virt_flash_size(PFlashCFI01 *flash)
+int64_t virt_flash_size(VirtMachineState *vms, PFlashCFI01 *flash)
 {
-return blk_getlength(pflash_cfi01_get_blk(flash));
+VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
+
+if (vmc->maximize_flash_size) {
+return vms->memmap[VIRT_FLASH].size / 2;
+} else {
+return blk_getlength(pflash_cfi01_get_blk(flash));
+}
 }
 
 static PFlashCFI01 *virt_flash_create1(VirtMachineState *vms,
@@ -1014,47 +1020,65 @@ static void virt_flash_map(VirtMachineState *vms,
 MemMapEntry *m = >memmap[VIRT_FLASH];
 
 virt_flash_map1(vms->flash[0], m->base,
-virt_flash_size(vms->flash[0]), secure_sysmem);
+virt_flash_size(vms, vms->flash[0]), secure_sysmem);
 
 virt_flash_map1(vms->flash[1], m->base + m->size / 2,
-virt_flash_size(vms->flash[1]), sysmem);
+virt_flash_size(vms, vms->flash[1]), sysmem);
 }
 
 static void virt_flash_fdt(VirtMachineState *vms,
MemoryRegion *sysmem,
MemoryRegion *secure_sysmem)
 {
+VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
 bool secure = sysmem != secure_sysmem;
 MemMapEntry *m = >memmap[VIRT_FLASH];
 hwaddr flashbase0 = m->base;
 hwaddr flashbase1 = m->base + m->size / 2;
-hwaddr flashsize0 = virt_flash_size(vms->flash[0]);
-hwaddr flashsize1 = virt_flash_size(vms->flash[1]);
+hwaddr flashsize0 = virt_flash_size(vms, vms->flash[0]);
+hwaddr flashsize1 = virt_flash_size(vms, vms->flash[1]);
 char *nodename;
 
-if (secure) {
-nodename = g_strdup_printf("/secflash@%" PRIx64, flashbase0);
-} else {
+if (vmc->maximize_flash_size && !secure) {
+/* Report both flash devices as a single node in the DT */
 nodename = g_strdup_printf("/flash@%" PRIx64, flashbase0);
-}
-qemu_fdt_add_subnode(vms->fdt, nodename);
-qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
-qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
- 2, flashbase0, 2, flashsize0);
-qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
-if (secure) {
-qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
-qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
-}
-g_free(nodename);
+qemu_fdt_add_subnode(vms->fdt, nodename);
+qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
+ 2, flashbase0, 2, flashsize0,
+ 2, flashbase1, 2, flashsize1);
+qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
+g_free(nodename);
+} else {
+/*
+ * If we are not intending to fill the flash region or one 

[RFC PATCH 3/5] hw/arm: Convert assertions about flash image size to error_report

2020-11-16 Thread David Edmondson
Rather than throwing an assertion, provide a more detailed report if a
flash image is inappropriately sized or aligned.

Signed-off-by: David Edmondson 
---
 hw/arm/virt.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 27dbeb549e..f9f10987dc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -967,9 +967,21 @@ static void virt_flash_map1(PFlashCFI01 *flash,
 MemoryRegion *sysmem)
 {
 DeviceState *dev = DEVICE(flash);
+const char *name = blk_name(pflash_cfi01_get_blk(flash));
+
+if (size == 0 || !QEMU_IS_ALIGNED(size, VIRT_FLASH_SECTOR_SIZE)) {
+error_report("system firmware block device %s has invalid size "
+ "%" PRId64, name, size);
+info_report("its size must be a non-zero multiple of 0x%" PRIx64,
+VIRT_FLASH_SECTOR_SIZE);
+exit(1);
+}
+if (!(size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX)) {
+error_report("system firmware block device %s is too large "
+ "(%" PRId64 ")", name, size);
+exit(1);
+}
 
-assert(QEMU_IS_ALIGNED(size, VIRT_FLASH_SECTOR_SIZE));
-assert(size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX);
 qdev_prop_set_uint32(dev, "num-blocks", size / VIRT_FLASH_SECTOR_SIZE);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), _fatal);
 
-- 
2.28.0




[RFC PATCH 4/5] hw/arm: Flash image mapping follows image size

2020-11-16 Thread David Edmondson
When mapping flash images into the bottom 128MB, create mappings that
match the size of the underlying block device rather than 64MB.

Signed-off-by: David Edmondson 
---
 hw/arm/trace-events  |  2 +
 hw/arm/virt-acpi-build.c | 29 ---
 hw/arm/virt.c| 79 +---
 include/hw/arm/virt.h|  1 +
 4 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index a335ee891d..a9174f8fba 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -53,3 +53,5 @@ smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier 
node for iommu mr=%s
 smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu 
mr=%s"
 smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova, 
uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d iova=0x%"PRIx64" tg=%d 
num_pages=0x%"PRIx64
 
+# virt.c
+virt_flash_map1(const char *name, uint64_t base, uint64_t size) "map %s at 
0x%"PRIx64" + 0x%"PRIx64
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 9747a6458f..2c08d36624 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -102,28 +102,31 @@ static void acpi_dsdt_add_fw_cfg(Aml *scope, const 
MemMapEntry *fw_cfg_memmap)
 aml_append(scope, dev);
 }
 
-static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap)
+static void acpi_dsdt_add_flash1(Aml *scope, int index,
+ hwaddr base, hwaddr size)
 {
 Aml *dev, *crs;
-hwaddr base = flash_memmap->base;
-hwaddr size = flash_memmap->size / 2;
 
-dev = aml_device("FLS0");
+dev = aml_device("FLS%u", index);
 aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0015")));
-aml_append(dev, aml_name_decl("_UID", aml_int(0)));
+aml_append(dev, aml_name_decl("_UID", aml_int(index)));
 
 crs = aml_resource_template();
 aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));
 aml_append(dev, aml_name_decl("_CRS", crs));
 aml_append(scope, dev);
+}
 
-dev = aml_device("FLS1");
-aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0015")));
-aml_append(dev, aml_name_decl("_UID", aml_int(1)));
-crs = aml_resource_template();
-aml_append(crs, aml_memory32_fixed(base + size, size, AML_READ_WRITE));
-aml_append(dev, aml_name_decl("_CRS", crs));
-aml_append(scope, dev);
+static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap,
+PFlashCFI01 *flash[2])
+{
+acpi_dsdt_add_flash1(scope, 0,
+ flash_memmap->base,
+ virt_flash_size(flash[0]));
+
+acpi_dsdt_add_flash1(scope, 1,
+ flash_memmap->base + flash_memmap->size / 2,
+ virt_flash_size(flash[1]));
 }
 
 static void acpi_dsdt_add_virtio(Aml *scope,
@@ -603,7 +606,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 acpi_dsdt_add_uart(scope, [VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
 if (vmc->acpi_expose_flash) {
-acpi_dsdt_add_flash(scope, [VIRT_FLASH]);
+acpi_dsdt_add_flash(scope, [VIRT_FLASH], vms->flash);
 }
 acpi_dsdt_add_fw_cfg(scope, [VIRT_FW_CFG]);
 acpi_dsdt_add_virtio(scope, [VIRT_MMIO],
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f9f10987dc..03ec844bf3 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -50,6 +50,7 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/tpm.h"
 #include "sysemu/kvm.h"
+#include "sysemu/block-backend.h"
 #include "hw/loader.h"
 #include "exec/address-spaces.h"
 #include "qemu/bitops.h"
@@ -78,6 +79,7 @@
 #include "hw/virtio/virtio-iommu.h"
 #include "hw/char/pl011.h"
 #include "qemu/guest-random.h"
+#include "trace.h"
 
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
 static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@@ -931,6 +933,11 @@ static void create_virtio_devices(const VirtMachineState 
*vms)
 
 #define VIRT_FLASH_SECTOR_SIZE (256 * KiB)
 
+int64_t virt_flash_size(PFlashCFI01 *flash)
+{
+return blk_getlength(pflash_cfi01_get_blk(flash));
+}
+
 static PFlashCFI01 *virt_flash_create1(VirtMachineState *vms,
 const char *name,
 const char *alias_prop_name)
@@ -969,6 +976,8 @@ static void virt_flash_map1(PFlashCFI01 *flash,
 DeviceState *dev = DEVICE(flash);
 const char *name = blk_name(pflash_cfi01_get_blk(flash));
 
+trace_virt_flash_map1(name, base, size);
+
 if (size == 0 || !QEMU_IS_ALIGNED(size, VIRT_FLASH_SECTOR_SIZE)) {
 error_report("system firmware block device %s has invalid size "
  "%" PRId64, name, size);
@@ -995,63 +1004,57 @@ static void virt_flash_map(VirtMachineState *vms,
MemoryRegion *secure_sysmem)
 {
 /*
- * Map two flash devices to fill the VIRT_FLASH space in the memmap.

Re: [PATCH v7 00/21] preallocate filter

2020-11-16 Thread Vladimir Sementsov-Ogievskiy

13.11.2020 22:33, Max Reitz wrote:

On 21.10.20 16:58, Vladimir Sementsov-Ogievskiy wrote:

Hi all!

Here is a filter, which does preallocation on write.

v7:
01: add Alberto's r-b
07: don't remove sentence from the comment
08: - drop extra "s->file_end = end;" line
 - improve check/set perm handlers
09: add Max's r-b
10: add Max's r-b
11: new
12: - skip if preallocate unsupported
 - drop auto and quick groups
13: new
14: - improve 'average' field of result spec
 - drop extra 'dim = ...' line
15-18: new
19: a lot of changes
20: new
21: add results dump to json


Thanks, applied to my block-next branch (for 6.0):

https://git.xanclic.moe/XanClic/qemu/commits/branch/block-next



Thank you!


--
Best regards,
Vladimir