from:"Klaus Jensen"

Re: [PATCH] hw/nvme: fix mo field in io mgnt send

2024-05-24 Thread Klaus Jensen

On May  8 09:36, Vincent Fu wrote:
> On 5/7/24 10:05, Vincent Fu wrote:
> > On 5/6/24 04:06, Klaus Jensen wrote:
> > > The Management Operation field of I/O Management Send is only 8 bits,
> > > not 16.
> > > 
> > > Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
> > > Signed-off-by: Klaus Jensen 
> > > ---
> > >   hw/nvme/ctrl.c | 2 +-
> > >   1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > > index 9e7bbebc8bb0..ede5f281dd7c 100644
> > > --- a/hw/nvme/ctrl.c
> > > +++ b/hw/nvme/ctrl.c
> > > @@ -4387,7 +4387,7 @@ static uint16_t nvme_io_mgmt_send(NvmeCtrl *n,
> > > NvmeRequest *req)
> > >   {
> > >   NvmeCmd *cmd = >cmd;
> > >   uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
> > > -    uint8_t mo = (cdw10 & 0xff);
> > > +    uint8_t mo = cdw10 & 0xf;
> > >   switch (mo) {
> > >   case NVME_IOMS_MO_NOP:
> > > 
> > > ---
> > > base-commit: 84b0eb1826f690aa8d51984644318ee6c810f5bf
> > > change-id: 20240506-fix-ioms-mo-97098c6c5396
> > > 
> > > Best regards,
> > 
> > Reviewed-by: Vincent Fu 
> 
> Klaus, upon taking a second look, the original code is correct. The proposed
> change would only keep the least significant 4 bits of the MO field. The
> original code gives you the 8 bits needed.
> 
> Let me withdraw my Reviewed-by.
> 
> Vincent

That was embarrasing. Thanks for catching that Vincent :)


signature.asc
Description: PGP signature

Re: [PATCH v3 10/11] hw/nvme: add reservation protocal command

2024-05-24 Thread Klaus Jensen

tpl,
> + ignore_key, nvme_misc_cb,
> + req);
> +break;
> +case NVME_RESV_REGISTER_ACTION_UNREGISTER:
> +req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key, 0,
> + 0, aptpl, ignore_key,
> + nvme_misc_cb, req);
> +break;
> +case NVME_RESV_REGISTER_ACTION_REPLACE:
> +req->aiocb = blk_aio_pr_register(ns->blkconf.blk, key_info.cr_key,
> + key_info.nr_key, 0, aptpl, 
> ignore_key,
> + nvme_misc_cb, req);
> +break;
> +default:
> +return NVME_INVALID_FIELD;
> +}
> +
> +return NVME_NO_COMPLETE;
> +}
> +
> +static uint16_t nvme_resv_release(NvmeCtrl *n, NvmeRequest *req)
> +{
> +int ret;
> +uint64_t cr_key;
> +NvmeNamespace *ns = req->ns;
> +uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10);
> +uint8_t action = cdw10 & 0x7;
> +NVMEResvType type = cdw10 >> 8 & 0xff;
> +
> +ret = nvme_h2c(n, (uint8_t *)_key, sizeof(cr_key), req);
> +if (ret) {
> +return ret;
> +}
> +
> +switch (action) {
> +case NVME_RESV_RELEASE_ACTION_RELEASE:
> +req->aiocb = blk_aio_pr_release(ns->blkconf.blk, cr_key,
> +nvme_pr_type_to_block(type),
> +nvme_misc_cb, req);
> +break;
> +case NVME_RESV_RELEASE_ACTION_CLEAR:
> +req->aiocb = blk_aio_pr_clear(ns->blkconf.blk, cr_key,
> +  nvme_misc_cb, req);
> +break;
> +default:
> +return NVME_INVALID_FIELD;
> +}
> +
> +return NVME_NO_COMPLETE;
> +}
> +
> +static uint16_t nvme_resv_acquire(NvmeCtrl *n, NvmeRequest *req)
> +{
> +int ret;
> +NvmeKeyInfo key_info;
> +NvmeNamespace *ns = req->ns;
> +uint32_t cdw10 = le32_to_cpu(req->cmd.cdw10);
> +uint8_t action = cdw10 & 0x7;
> +NVMEResvType type = cdw10 >> 8 & 0xff;
> +
> +ret = nvme_h2c(n, (uint8_t *)_info, sizeof(NvmeKeyInfo), req);
> +if (ret) {
> +return ret;
> +}
> +
> +switch (action) {
> +case NVME_RESV_ACQUIRE_ACTION_ACQUIRE:
> +req->aiocb = blk_aio_pr_reserve(ns->blkconf.blk, key_info.cr_key,
> +nvme_pr_type_to_block(type),
> +nvme_misc_cb, req);
> +break;
> +case NVME_RESV_ACQUIRE_ACTION_PREEMPT:
> +req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk,
> + key_info.cr_key, key_info.nr_key,
> + nvme_pr_type_to_block(type),
> + false, nvme_misc_cb, req);
> +break;
> +case NVME_RESV_ACQUIRE_ACTION_PREEMPT_AND_ABORT:
> +req->aiocb = blk_aio_pr_preempt(ns->blkconf.blk, key_info.cr_key,
> +key_info.nr_key, type, true,
> +nvme_misc_cb, req);
> +break;
> +default:
> +return NVME_INVALID_FIELD;
> +}
> +
> +return NVME_NO_COMPLETE;
> +}
> +
> +typedef struct NvmeResvKeys {
> +uint32_t generation;
> +uint32_t num_keys;
> +uint64_t *keys;
> +NvmeRequest *req;
> +} NvmeResvKeys;
> +
> +typedef struct NvmeReadReservation {
> +uint32_t generation;
> +uint64_t key;
> +BlockPrType type;
> +NvmeRequest *req;
> +NvmeResvKeys *keys_info;
> +} NvmeReadReservation;
> +
> +static int _nvme_resv_read_reservation_cb(NvmeReadReservation *reservation)

Nit: you can drop the leading underscore.

But I have no problems introducing this to hw/nvme, so

Acked-by: Klaus Jensen 

I will give this a proper review once reviews trickle in on the core
block layer changes (since this obviously depends on that).


signature.asc
Description: PGP signature

Re: [PATCH v3 09/11] hw/nvme: enable namespace rescap function

2024-05-24 Thread Klaus Jensen

On May 17 17:52, Changqi Lu wrote:
> This commit enables the rescap function in the
> namespace by detecting the supported reservation
> function in the backend driver.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  hw/nvme/ns.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index ea8db175db..bb09117f4b 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -20,6 +20,7 @@
>  #include "qemu/bitops.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/block-backend.h"
> +#include "block/block_int.h"
>  
>  #include "nvme.h"
>  #include "trace.h"
> @@ -55,6 +56,13 @@ void nvme_ns_init_format(NvmeNamespace *ns)
>  }
>  
>  id_ns->npda = id_ns->npdg = npdg - 1;
> +
> +/*
> + * The persistent reservation capacities of block
> + * and nvme are currently defined the same.
> + * If there are subsequent changes, this part needs to be changed.
> + */
> +id_ns->rescap = blk_bs(ns->blkconf.blk)->file->bs->bl.pr_cap;

This is very brittle. I see that you have an enum for both th eblock
layer and nvme. It is tricky to remember to update this if it changes in
the block layer.

>  }
>  
>  static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
> -- 
> 2.20.1
> 

-- 
One of us - No more doubt, silence or taboo about mental illness.


signature.asc
Description: PGP signature

Re: [PATCH v3 06/11] block/nvme: add reservation command protocol constants

2024-05-24 Thread Klaus Jensen

On May 17 17:52, Changqi Lu wrote:
> Add constants for the NVMe persistent command protocol.
> The constants include the reservation command opcode and
> reservation type values defined in section 7 of the NVMe
> 2.0 specification.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  include/block/nvme.h | 61 
>  1 file changed, 61 insertions(+)
> 
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index bb231d0b9a..84e2b2e401 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -633,6 +633,10 @@ enum NvmeIoCommands {
>  NVME_CMD_WRITE_ZEROES   = 0x08,
>  NVME_CMD_DSM= 0x09,
>  NVME_CMD_VERIFY = 0x0c,
> +NVME_CMD_RESV_REGISTER  = 0x0d,
> +NVME_CMD_RESV_REPORT= 0x0e,
> +NVME_CMD_RESV_ACQUIRE   = 0x11,
> +NVME_CMD_RESV_RELEASE   = 0x15,
>  NVME_CMD_IO_MGMT_RECV   = 0x12,
>  NVME_CMD_COPY   = 0x19,
>  NVME_CMD_IO_MGMT_SEND   = 0x1d,
> @@ -641,6 +645,63 @@ enum NvmeIoCommands {
>  NVME_CMD_ZONE_APPEND= 0x7d,
>  };
>  
> +typedef enum {
> +NVME_RESV_REGISTER_ACTION_REGISTER  = 0x00,
> +NVME_RESV_REGISTER_ACTION_UNREGISTER= 0x01,
> +NVME_RESV_REGISTER_ACTION_REPLACE   = 0x02,
> +} NVME_RESV_REGISTER_ACTION;

Existing style would name this `NvmeReservationRegisterAction`.


signature.asc
Description: PGP signature

Re: [PATCH v3 08/11] hw/nvme: enable ONCS reservations

2024-05-24 Thread Klaus Jensen

On May 17 17:52, Changqi Lu wrote:
> This commit enables ONCS to support the reservation
> function at the controller level. It also lays the
> groundwork for detecting and enabling the reservation
> function on a per-namespace basis in RESCAP.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  hw/nvme/ctrl.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 127c3d2383..182307a48b 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -8248,7 +8248,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  id->nn = cpu_to_le32(NVME_MAX_NAMESPACES);
>  id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
> NVME_ONCS_FEATURES | NVME_ONCS_DSM |
> -   NVME_ONCS_COMPARE | NVME_ONCS_COPY);
> +   NVME_ONCS_COMPARE | NVME_ONCS_COPY |
> +   NVME_ONCS_RESRVATIONS);
>  
>  /*
>   * NOTE: If this device ever supports a command set that does NOT use 0x0
> -- 
> 2.20.1
> 

Should be merged with patch 10.


signature.asc
Description: PGP signature

Re: [PATCH v3 09/11] hw/nvme: enable namespace rescap function

2024-05-24 Thread Klaus Jensen

On May 17 17:52, Changqi Lu wrote:
> This commit enables the rescap function in the
> namespace by detecting the supported reservation
> function in the backend driver.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  hw/nvme/ns.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index ea8db175db..bb09117f4b 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -20,6 +20,7 @@
>  #include "qemu/bitops.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/block-backend.h"
> +#include "block/block_int.h"
>  
>  #include "nvme.h"
>  #include "trace.h"
> @@ -55,6 +56,13 @@ void nvme_ns_init_format(NvmeNamespace *ns)
>  }
>  
>  id_ns->npda = id_ns->npdg = npdg - 1;
> +
> +/*
> + * The persistent reservation capacities of block
> + * and nvme are currently defined the same.
> + * If there are subsequent changes, this part needs to be changed.
> + */
> +id_ns->rescap = blk_bs(ns->blkconf.blk)->file->bs->bl.pr_cap;
>  }
>  
>  static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
> -- 
> 2.20.1
> 

This should probably be merged with path 10. I don't think it make sense
on it's own?


signature.asc
Description: PGP signature

Re: [PATCH 8/9] hw/nvme: add reservation protocal command

2024-05-08 Thread Klaus Jensen

On May  8 17:36, Changqi Lu wrote:
> Add reservation acquire, reservation register,
> reservation release and reservation report commands
> in the nvme device layer.
> 
> By introducing these commands, this enables the nvme
> device to perform reservation-related tasks, including
> querying keys, querying reservation status, registering
> reservation keys, initiating and releasing reservations,
> as well as clearing and preempting reservations held by
> other keys.
> 
> These commands are crucial for management and control of
> shared storage resources in a persistent manner.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  hw/nvme/ctrl.c   | 304 ++-
>  hw/nvme/nvme.h   |   4 +
>  include/block/nvme.h |  37 ++
>  3 files changed, 344 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 127c3d2383..1f881fc44c 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -294,6 +294,10 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
>  [NVME_CMD_COMPARE]  = NVME_CMD_EFF_CSUPP,
>  [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
>  [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
> +[NVME_CMD_RESV_REGISTER]= NVME_CMD_EFF_CSUPP,
> +[NVME_CMD_RESV_REPORT]  = NVME_CMD_EFF_CSUPP,
> +[NVME_CMD_RESV_ACQUIRE] = NVME_CMD_EFF_CSUPP,
> +[NVME_CMD_RESV_RELEASE] = NVME_CMD_EFF_CSUPP,
>  };

We need to indicate support for these commands in ONCS, right? And that
should depend on if or not the underlying block device supports it.


signature.asc
Description: PGP signature

Re: [RFC 0/1] hw/nvme: add atomic write support

2024-05-08 Thread Klaus Jensen

On Apr 15 16:46, Alan Adamson wrote:
> Since there is discussion in the Linux NVMe Driver community to add NVMe 
> Atomic Write
> support, it would be desirable to test it with qemu nvme emulation.
>  
> Initially, this RFC will focus on supporting NVMe controller atomic write 
> parameters(AWUN,
> AWUPF, and ACWU) but will be extended to support Namespace parameters (NAWUN, 
> NAWUPF
> and NACWU).
>  
> Atomic Write Parameters for NVMe QEMU
> -
> New NVMe QEMU Parameters (See NVMe Specification for details):
> atomic.dn (default off) - Set the value of Disable Normal.
> atomic.awun=UINT16 (default: 0)
> atomic.awupf=UINT16 (default: 0)
> atomic.acwu=UINT16 (default: 0)
>  
> qemu command line example:
> qemu-system-x86_64 -cpu host --enable-kvm -smp cpus=4 -no-reboot -m 
> 8192M -drive file=./disk.img,if=ide \
> -boot c -device e1000,netdev=net0,mac=DE:CC:CC:EF:99:88 -netdev 
> tap,id=net0 \
> -device 
> nvme,id=nvme-ctrl-0,serial=nvme-1,atomic.dn=off,atomic.awun=63,atomic.awupf=63,atomic.acwu=0
>  \
> -drive file=./nvme.img,if=none,id=nvm-1 -device 
> nvme-ns,drive=nvm-1,bus=nvme-ctrl-0 nvme-ns,drive=nvm-1,bus=nvme-ctrl-0
>  
> Making Writes Atomic:
> -
> - Prior to a command being pulled off the SQ and executed, a check is made to 
> see if it
>   conflicts "atomically" with a currently executing command.
> - All currently executing commands on the same namespace, across all SQs need 
> to be checked.
> - If an atomic conflict is detected, the command is not started and remains 
> on the queue.
>  
> Testing
> ---
> NVMe QEMU Parameters used: 
> atomic.dn=off,atomic.awun=63,atomic.awupf=63,atomic.acwu=0
>  
> # nvme id-ctrl /dev/nvme0 | grep awun
> awun  : 63
> # nvme id-ctrl /dev/nvme0 | grep awupf
> awupf : 63
> # nvme id-ctrl /dev/nvme0 | grep acwu
> acwu  : 0< Since qemu-nvme doesn't support Compare and Write, this is 
> always zero
> # nvme get-feature /dev/nvme0  -f 0xa
> get-feature:0x0a (Write Atomicity Normal), Current value:
> #
>  
> # fio --filename=/dev/nvme0n1 --direct=1 --rw=randwrite --bs=32k 
> --iodepth=256 --name=iops --numjobs=50 --verify=crc64 --verify_fatal=1 
> --ioengine=libaio
>  
> When executed without atomic write support, eventually the following error 
> will be
> observed:
>  
> crc64: verify failed at file /dev/nvme0n1 offset 857669632, length 
> 32768
> (requested block: offset=857669632, length=32768, flags=88)
> Expected CRC: 9c87d3539dafdca0
> Received CRC: d521f7ea3b69d2ee
>  
> When executed with atomic write support, this error no longer happens.
>  
> Questions
> -
> AWUN vs AWUPF - Does the nvme emulation need to do treat these differently? 
> Currently the
> larger of the two will be used as the max atomic write size.
>  
> Future Work
> ---
> - Namespace support (NAWUN, NAWUPF and NACWU)
> - Namespace Boundary support (NABSN, NABO, and NABSPF)
> - Atomic Compare and Write Unit (ACWU)
> 
> Alan Adamson (1):
>   nvme: add atomic write support
> 
>  hw/nvme/ctrl.c | 147 -
>  hw/nvme/nvme.h |  17 ++
>  2 files changed, 163 insertions(+), 1 deletion(-)
> 
> -- 
> 2.39.3
> 

Hi Alan,

I have no obvious qualms about this. It is clearly useful for driver
testing and verification and does not negatively impact the performance
when this "faked" feature is not enabled.

Acked-by: Klaus Jensen 


signature.asc
Description: PGP signature

[PATCH] hw/nvme: fix mo field in io mgnt send

2024-05-06 Thread Klaus Jensen

From: Klaus Jensen 

The Management Operation field of I/O Management Send is only 8 bits,
not 16.

Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 9e7bbebc8bb0..ede5f281dd7c 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4387,7 +4387,7 @@ static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, 
NvmeRequest *req)
 {
 NvmeCmd *cmd = >cmd;
 uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
-uint8_t mo = (cdw10 & 0xff);
+uint8_t mo = cdw10 & 0xf;
 
 switch (mo) {
 case NVME_IOMS_MO_NOP:

---
base-commit: 84b0eb1826f690aa8d51984644318ee6c810f5bf
change-id: 20240506-fix-ioms-mo-97098c6c5396

Best regards,
-- 
Klaus Jensen

Re: [PATCH v2 3/4] hw/nvme: Support SR-IOV VFs more than 127

2024-05-01 Thread Klaus Jensen

On Apr  1 04:30, Minwoo Im wrote:
> From: Minwoo Im 
> 
> The number of virtual functions(VFs) supported in SR-IOV is 64k as per
> spec.  To test a large number of MSI-X vectors mapping to CPU matrix in
> the QEMU system, we need much more than 127 VFs.  This patch made
> support for 256 VFs per a physical function(PF).
> 

With patch 2 in place, shouldn't it be relatively straight forward to
convert the static array to be dynamic and just use numvfs to size it?
Then we won't have to add another patch when someone comes around and
wants to bump this again ;)

signature.asc
Description: PGP signature

Re: [PATCH] hw/nvme: Add support for setting the MQES for the NVMe emulation

2024-05-01 Thread Klaus Jensen

On May  1 12:27, Berg, John wrote:
> On Thu, 2024-04-04 at 15:01 +0200, Klaus Jensen wrote:
> > On Apr  4 13:04, John Berg wrote:
> > > From: John Berg 
> > > 
> > > The MQES field in the CAP register describes the Maximum Queue
> > > Entries
> > > Supported for the IO queues of an NVMe controller. Adding a +1 to
> > > the
> > > value in this field results in the total queue size. A full queue
> > > is
> > > when a queue of size N contains N - 1 entries, and the minimum
> > > queue
> > > size is 2. Thus the lowest MQES value is 1.
> > > 
> > > This patch adds the new mqes property to the NVMe emulation which
> > > allows
> > > a user to specify the maximum queue size by setting this property.
> > > This
> > > is useful as it enables testing of NVMe controller where the MQES
> > > is
> > > relatively small. The smallest NVMe queue size supported in NVMe is
> > > 2
> > > submission and completion entries, which means that the smallest
> > > legal
> > > mqes value is 1.
> > > 
> > > The following example shows how the mqes can be set for a the NVMe
> > > emulation:
> > > 
> > > -drive id=nvme0,if=none,file=nvme.img,format=raw
> > > -device nvme,drive=nvme0,serial=foo,mqes=1
> > > 
> > > If the mqes property is not provided then the default mqes will
> > > still be
> > > 0x7ff (the queue size is 2048 entries).
> > > 
> > > Signed-off-by: John Berg 
> > > ---
> > >  hw/nvme/ctrl.c | 9 -
> > >  hw/nvme/nvme.h | 1 +
> > >  2 files changed, 9 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > > index 127c3d2383..86cda9bc73 100644
> > > --- a/hw/nvme/ctrl.c
> > > +++ b/hw/nvme/ctrl.c
> > > @@ -7805,6 +7805,12 @@ static bool nvme_check_params(NvmeCtrl *n,
> > > Error **errp)
> > >  return false;
> > >  }
> > >  
> > > +    if (params->mqes < 1)
> > > +    {
> > 
> > Please keep the `{` on the same line as the `if`. I think
> > checkpatch.pl
> > should catch this.
> > 
> > No need to send a v2, I'll fix it up when I apply it to nvme-next :)
> > 
> > Thanks!
> 
> 
> Hello
> 
> Sorry for chasing. I was just wondering when this patch will be
> applied. I can send a second revision if that helps.
> 

No need for the sorry. My apologies. It feel off my radar, so thanks for
the bump.

I've queued this on nvme-next, will send a pull for master ASAP.


signature.asc
Description: PGP signature

Re: [PATCH] hw/nvme: Add support for setting the MQES for the NVMe emulation

2024-04-04 Thread Klaus Jensen

On Apr  4 13:04, John Berg wrote:
> From: John Berg 
> 
> The MQES field in the CAP register describes the Maximum Queue Entries
> Supported for the IO queues of an NVMe controller. Adding a +1 to the
> value in this field results in the total queue size. A full queue is
> when a queue of size N contains N - 1 entries, and the minimum queue
> size is 2. Thus the lowest MQES value is 1.
> 
> This patch adds the new mqes property to the NVMe emulation which allows
> a user to specify the maximum queue size by setting this property. This
> is useful as it enables testing of NVMe controller where the MQES is
> relatively small. The smallest NVMe queue size supported in NVMe is 2
> submission and completion entries, which means that the smallest legal
> mqes value is 1.
> 
> The following example shows how the mqes can be set for a the NVMe
> emulation:
> 
> -drive id=nvme0,if=none,file=nvme.img,format=raw
> -device nvme,drive=nvme0,serial=foo,mqes=1
> 
> If the mqes property is not provided then the default mqes will still be
> 0x7ff (the queue size is 2048 entries).
> 
> Signed-off-by: John Berg 
> ---
>  hw/nvme/ctrl.c | 9 -
>  hw/nvme/nvme.h | 1 +
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 127c3d2383..86cda9bc73 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -7805,6 +7805,12 @@ static bool nvme_check_params(NvmeCtrl *n, Error 
> **errp)
>  return false;
>  }
>  
> +if (params->mqes < 1)
> +{

Please keep the `{` on the same line as the `if`. I think checkpatch.pl
should catch this.

No need to send a v2, I'll fix it up when I apply it to nvme-next :)

Thanks!


signature.asc
Description: PGP signature

Re: [PATCH] hw/nvme: Add support for setting the MQES for the NVMe emulation

2024-04-04 Thread Klaus Jensen

On Apr  4 13:04, John Berg wrote:
> From: John Berg 
> 
> The MQES field in the CAP register describes the Maximum Queue Entries
> Supported for the IO queues of an NVMe controller. Adding a +1 to the
> value in this field results in the total queue size. A full queue is
> when a queue of size N contains N - 1 entries, and the minimum queue
> size is 2. Thus the lowest MQES value is 1.
> 
> This patch adds the new mqes property to the NVMe emulation which allows
> a user to specify the maximum queue size by setting this property. This
> is useful as it enables testing of NVMe controller where the MQES is
> relatively small. The smallest NVMe queue size supported in NVMe is 2
> submission and completion entries, which means that the smallest legal
> mqes value is 1.
> 
> The following example shows how the mqes can be set for a the NVMe
> emulation:
> 
> -drive id=nvme0,if=none,file=nvme.img,format=raw
> -device nvme,drive=nvme0,serial=foo,mqes=1
> 
> If the mqes property is not provided then the default mqes will still be
> 0x7ff (the queue size is 2048 entries).
> 
> Signed-off-by: John Berg 

LGTM,

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

Re: [PATCH 17/19] hw/nvme: fix -Werror=maybe-uninitialized

2024-04-02 Thread Klaus Jensen

On Mar 28 14:20, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> ../hw/nvme/ctrl.c:6081:21: error: ‘result’ may be used uninitialized 
> [-Werror=maybe-uninitialized]
> 
> It's not obvious that 'result' is set in all code paths. When  is
> a returned argument, it's even less clear.
> 
> Looking at various assignments, 0 seems to be a suitable default value.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  hw/nvme/ctrl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index c2b17de987..127c3d2383 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -5894,7 +5894,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, 
> NvmeRequest *req)
>  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>  uint32_t nsid = le32_to_cpu(cmd->nsid);
> -uint32_t result;
> +uint32_t result = 0;
>  uint8_t fid = NVME_GETSETFEAT_FID(dw10);
>  NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
>  uint16_t iv;
> -- 
> 2.44.0
> 

Thanks!

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

[PULL v2 3/6] MAINTAINERS: add Jesper as reviewer on hw/nvme

2024-03-12 Thread Klaus Jensen

From: Klaus Jensen 

My colleague, Jesper, will be assiting with hw/nvme related reviews. Add
him with R: so he gets automatically bugged going forward.

Cc: Jesper Devantier 
Acked-by: Jesper Devantier 
Signed-off-by: Klaus Jensen 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 12f5e47a11f4..7f96ce857486 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2407,6 +2407,7 @@ F: docs/system/devices/virtio-snd.rst
 nvme
 M: Keith Busch 
 M: Klaus Jensen 
+R: Jesper Devantier 
 L: qemu-block@nongnu.org
 S: Supported
 F: hw/nvme/*
-- 
2.44.0

[PULL v2 4/6] hw/nvme: Add NVMe NGUID property

2024-03-12 Thread Klaus Jensen

From: Roque Arcudia Hernandez 

This patch adds a way to specify an NGUID for a given NVMe Namespace using a
string of hexadecimal digits with an optional '-' separator to group bytes. For
instance:

-device nvme-ns,nguid="e9accd3b83904e13167cf0593437f57d"

If provided, the NGUID will be part of the Namespace Identification Descriptor
list and the Identify Namespace data.

Signed-off-by: Roque Arcudia Hernandez 
Signed-off-by: Nabih Estefan 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 docs/system/devices/nvme.rst |   7 ++
 hw/nvme/ctrl.c   |  12 +++
 hw/nvme/meson.build  |   2 +-
 hw/nvme/nguid.c  | 187 +++
 hw/nvme/ns.c |   2 +
 hw/nvme/nvme.h   |  26 +++--
 6 files changed, 229 insertions(+), 7 deletions(-)
 create mode 100644 hw/nvme/nguid.c

diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index 4ea957baed10..d2b1ca96455f 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -81,6 +81,13 @@ There are a number of parameters available:
   Set the UUID of the namespace. This will be reported as a "Namespace UUID"
   descriptor in the Namespace Identification Descriptor List.
 
+``nguid``
+  Set the NGUID of the namespace. This will be reported as a "Namespace 
Globally
+  Unique Identifier" descriptor in the Namespace Identification Descriptor 
List.
+  It is specified as a string of hexadecimal digits containing exactly 16 bytes
+  or "auto" for a random value. An optional '-' separator could be used to 
group
+  bytes. If not specified the NGUID will remain all zeros.
+
 ``eui64``
   Set the EUI-64 of the namespace. This will be reported as a "IEEE Extended
   Unique Identifier" descriptor in the Namespace Identification Descriptor 
List.
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index abc0387f2ca8..6c5a2b875da8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5640,6 +5640,10 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, 
NvmeRequest *req)
 NvmeIdNsDescr hdr;
 uint8_t v[NVME_NIDL_UUID];
 } QEMU_PACKED uuid = {};
+struct {
+NvmeIdNsDescr hdr;
+uint8_t v[NVME_NIDL_NGUID];
+} QEMU_PACKED nguid = {};
 struct {
 NvmeIdNsDescr hdr;
 uint64_t v;
@@ -5668,6 +5672,14 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, 
NvmeRequest *req)
 pos += sizeof(uuid);
 }
 
+if (!nvme_nguid_is_null(>params.nguid)) {
+nguid.hdr.nidt = NVME_NIDT_NGUID;
+nguid.hdr.nidl = NVME_NIDL_NGUID;
+memcpy(nguid.v, ns->params.nguid.data, NVME_NIDL_NGUID);
+memcpy(pos, , sizeof(nguid));
+pos += sizeof(nguid);
+}
+
 if (ns->params.eui64) {
 eui64.hdr.nidt = NVME_NIDT_EUI64;
 eui64.hdr.nidl = NVME_NIDL_EUI64;
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
index 1a6a2ca2f307..7d5caa53c280 100644
--- a/hw/nvme/meson.build
+++ b/hw/nvme/meson.build
@@ -1 +1 @@
-system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c'))
+system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c', 'nguid.c'))
\ No newline at end of file
diff --git a/hw/nvme/nguid.c b/hw/nvme/nguid.c
new file mode 100644
index ..829832bd9f41
--- /dev/null
+++ b/hw/nvme/nguid.c
@@ -0,0 +1,187 @@
+/*
+ *  QEMU NVMe NGUID functions
+ *
+ * Copyright 2024 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/visitor.h"
+#include "qemu/ctype.h"
+#include "nvme.h"
+
+#define NGUID_SEPARATOR '-'
+
+#define NGUID_VALUE_AUTO "auto"
+
+#define NGUID_FMT  \
+"%02hhx%02hhx%02hhx%02hhx" \
+"%02hhx%02hhx%02hhx%02hhx" \
+"%02hhx%02hhx%02hhx%02hhx" \
+"%02hhx%02hhx%02hhx%02hhx"
+
+#define NGUID_STR_LEN (2 * NGUID_LEN + 1)
+
+bool nvme_nguid_is_null(const NvmeNGUID *nguid)
+{
+static NvmeNGUID null_nguid;
+return memcmp(nguid, _nguid, sizeof(NvmeNGUID)) == 0;
+}
+
+static void nvme_nguid_generate(NvmeNGUID *out)
+{
+int i;
+uint32_t x;
+
+QEMU_BUILD_BUG_ON((NGUID_LEN % sizeof(x)) != 0);
+
+for (i = 0; i < NGUID_LEN; i += sizeof(x)) {
+x = g_random_int();
+memcpy(>data[i], , sizeof(x));
+}
+}
+
+/*
+ * The Linux Kernel typically prints the NGUID of a

[PULL v2 5/6] hw/nvme: generalize the mbar size helper

2024-03-12 Thread Klaus Jensen

From: Klaus Jensen 

Generalize the mbar size helper such that it can handle cases where the
MSI-X table and PBA are expected to be in an exclusive bar.

Cc: qemu-sta...@nongnu.org
Reviewed-by: Jesper Wendel Devantier 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6c5a2b875da8..efcfd7171066 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8015,13 +8015,18 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice 
*pci_dev)
 memory_region_set_enabled(>pmr.dev->mr, false);
 }
 
-static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
-  unsigned *msix_table_offset,
-  unsigned *msix_pba_offset)
+static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs,
+   unsigned *msix_table_offset,
+   unsigned *msix_pba_offset)
 {
-uint64_t bar_size, msix_table_size, msix_pba_size;
+uint64_t bar_size, msix_table_size;
 
 bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
+
+if (total_irqs == 0) {
+goto out;
+}
+
 bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
 
 if (msix_table_offset) {
@@ -8036,11 +8041,10 @@ static uint64_t nvme_bar_size(unsigned total_queues, 
unsigned total_irqs,
 *msix_pba_offset = bar_size;
 }
 
-msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8;
-bar_size += msix_pba_size;
+bar_size += QEMU_ALIGN_UP(total_irqs, 64) / 8;
 
-bar_size = pow2ceil(bar_size);
-return bar_size;
+out:
+return pow2ceil(bar_size);
 }
 
 static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
@@ -8048,7 +8052,7 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice 
*pci_dev, uint16_t offset)
 uint16_t vf_dev_id = n->params.use_intel_id ?
  PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
 NvmePriCtrlCap *cap = >pri_ctrl_cap;
-uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
+uint64_t bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm),
   le16_to_cpu(cap->vifrsm),
   NULL, NULL);
 
@@ -8087,7 +8091,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 ERRP_GUARD();
 uint8_t *pci_conf = pci_dev->config;
 uint64_t bar_size;
-unsigned msix_table_offset, msix_pba_offset;
+unsigned msix_table_offset = 0, msix_pba_offset = 0;
 int ret;
 
 pci_conf[PCI_INTERRUPT_PIN] = 1;
@@ -8110,8 +8114,8 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 }
 
 /* add one to max_ioqpairs to account for the admin queue pair */
-bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
- _table_offset, _pba_offset);
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
+  _table_offset, _pba_offset);
 
 memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
 memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
-- 
2.44.0

[PULL v2 0/6] hw/nvme updates

2024-03-12 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

Sorry about the compilation error in v1. Did a full CI run for v2.

  https://gitlab.com/birkelund/qemu/-/pipelines/1210559370



The following changes since commit 8f3f329f5e0117bd1a23a79ab751f8a7d3471e4b:

  Merge tag 'migration-20240311-pull-request' of https://gitlab.com/peterx/qemu 
into staging (2024-03-12 11:35:41 +)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request

for you to fetch changes up to fa905f65c5549703279f68c253914799b10ada47:

  hw/nvme: add machine compatibility parameter to enable msix exclusive bar 
(2024-03-12 16:05:53 +0100)


hw/nvme updates
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmXwj+wACgkQTeGvMW1P
DelOsAf+Jg51zf3vtWpe4MS/WtULjSr5GtnXMJ5hkHS0WdKOiLW3P+pUZXbsohmh
faVlYeCWptF1CFGfxBf4Trc7XzJy8J6W1YJEofs/07hIAnazo9pwk5shoVu4oiex
HVsBg7/9y7DuiEEg1MRvVvW895cP60WmG1AqU63SYwrVgxZ51ZH0XNuyRhQeYC/6
OSXJ3FDYu2iJQ58uEzGEwv8vhskIpEFTdz0J6gQVxIdzFBbuk87VgZo6pqwgfMBm
/65K85TgFBT4SASc7a2iSUv+iAqSCA6Jdy0VWxCYCikiv5nuPCMCrlbvqcVp+i2B
GKtgfFXhtgepxx6jmYd03EkRjCrxUA==
=W3gg
-END PGP SIGNATURE-



Klaus Jensen (4):
  hw/nvme: fix invalid check on mcl
  MAINTAINERS: add Jesper as reviewer on hw/nvme
  hw/nvme: generalize the mbar size helper
  hw/nvme: add machine compatibility parameter to enable msix exclusive
bar

Minwoo Im (1):
  hw/nvme: separate 'serial' property for VFs

Roque Arcudia Hernandez (1):
  hw/nvme: Add NVMe NGUID property

 MAINTAINERS  |   1 +
 docs/system/devices/nvme.rst |   7 ++
 hw/core/machine.c|   1 +
 hw/nvme/ctrl.c   |  99 +--
 hw/nvme/meson.build  |   2 +-
 hw/nvme/nguid.c  | 187 +++
 hw/nvme/ns.c |   2 +
 hw/nvme/nvme.h   |  27 +++--
 8 files changed, 291 insertions(+), 35 deletions(-)
 create mode 100644 hw/nvme/nguid.c

-- 
2.44.0

[PULL v2 6/6] hw/nvme: add machine compatibility parameter to enable msix exclusive bar

2024-03-12 Thread Klaus Jensen

From: Klaus Jensen 

Commit 1901b4967c3f ("hw/block/nvme: move msix table and pba to BAR 0")
moved the MSI-X table and PBA to BAR 0 to make room for enabling CMR and
PMR at the same time. As reported by Julien Grall in #2184, this breaks
migration through system hibernation.

Add a machine compatibility parameter and set it on machines pre 6.0 to
enable the old behavior automatically, restoring the hibernation
migration support.

Cc: qemu-sta...@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2184
Fixes: 1901b4967c3f ("hw/block/nvme: move msix table and pba to BAR 0")
Reported-by: Julien Grall jul...@xen.org
Tested-by: Julien Grall jul...@xen.org
Reviewed-by: Jesper Wendel Devantier 
Signed-off-by: Klaus Jensen 
---
 hw/core/machine.c |  1 +
 hw/nvme/ctrl.c| 53 +--
 hw/nvme/nvme.h|  1 +
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0e9d646b61e1..37e5d4c8dfd5 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -102,6 +102,7 @@ GlobalProperty hw_compat_5_2[] = {
 { "PIIX4_PM", "smm-compat", "on"},
 { "virtio-blk-device", "report-discard-granularity", "off" },
 { "virtio-net-pci-base", "vectors", "3"},
+{ "nvme", "msix-exclusive-bar", "on"},
 };
 const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
 
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index efcfd7171066..036b15403a02 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7810,6 +7810,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp)
 }
 
 if (n->pmr.dev) {
+if (params->msix_exclusive_bar) {
+error_setg(errp, "not enough BARs available to enable PMR");
+return false;
+}
+
 if (host_memory_backend_is_mapped(n->pmr.dev)) {
 error_setg(errp, "can't use already busy memdev: %s",

object_get_canonical_path_component(OBJECT(n->pmr.dev)));
@@ -8113,24 +8118,38 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 pcie_ari_init(pci_dev, 0x100);
 }
 
-/* add one to max_ioqpairs to account for the admin queue pair */
-bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
-  _table_offset, _pba_offset);
-
-memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
-memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
-  msix_table_offset);
-memory_region_add_subregion(>bar0, 0, >iomem);
-
-if (pci_is_vf(pci_dev)) {
-pcie_sriov_vf_register_bar(pci_dev, 0, >bar0);
-} else {
+if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL);
+memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
+  bar_size);
 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_TYPE_64, >bar0);
+ PCI_BASE_ADDRESS_MEM_TYPE_64, >iomem);
+ret = msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp);
+} else {
+assert(n->params.msix_qsize >= 1);
+
+/* add one to max_ioqpairs to account for the admin queue pair */
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,
+  n->params.msix_qsize, _table_offset,
+  _pba_offset);
+
+memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
+memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
+  msix_table_offset);
+memory_region_add_subregion(>bar0, 0, >iomem);
+
+if (pci_is_vf(pci_dev)) {
+pcie_sriov_vf_register_bar(pci_dev, 0, >bar0);
+} else {
+pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, >bar0);
+}
+
+ret = msix_init(pci_dev, n->params.msix_qsize,
+>bar0, 0, msix_table_offset,
+>bar0, 0, msix_pba_offset, 0, errp);
 }
-ret = msix_init(pci_dev, n->params.msix_qsize,
->bar0, 0, msix_table_offset,
->bar0, 0, msix_pba_offset, 0, errp);
+
 if (ret == -ENOTSUP) {
 /* report that msix is not supported, but do not error out */
 warn_report_err(*errp);
@@ -8434,6 +8453,8 @@ static Property nvme_props[] = {
   params.sriov_max_vi_per_vf, 0),
 DEFINE_PROP_UINT8("sriov_m

[PULL v2 1/6] hw/nvme: separate 'serial' property for VFs

2024-03-12 Thread Klaus Jensen

From: Minwoo Im 

Currently, when a VF is created, it uses the 'params' object of the PF
as it is. In other words, the 'params.serial' string memory area is also
shared. In this situation, if the VF is removed from the system, the
PF's 'params.serial' object is released with object_finalize() followed
by object_property_del_all() which release the memory for 'serial'
property. If that happens, the next VF created will inherit a serial
from a corrupted memory area.

If this happens, an error will occur when comparing subsys->serial and
n->params.serial in the nvme_subsys_register_ctrl() function.

Cc: qemu-sta...@nongnu.org
Fixes: 44c2c09488db ("hw/nvme: Add support for SR-IOV")
Signed-off-by: Minwoo Im 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 76fe0397045b..94ef63945725 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8309,9 +8309,15 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
 if (pci_is_vf(pci_dev)) {
 /*
  * VFs derive settings from the parent. PF's lifespan exceeds
- * that of VF's, so it's safe to share params.serial.
+ * that of VF's.
  */
 memcpy(>params, >params, sizeof(NvmeParams));
+
+/*
+ * Set PF's serial value to a new string memory to prevent 'serial'
+ * property object release of PF when a VF is removed from the system.
+ */
+n->params.serial = g_strdup(pn->params.serial);
 n->subsys = pn->subsys;
 }
 
-- 
2.44.0

[PULL v2 2/6] hw/nvme: fix invalid check on mcl

2024-03-12 Thread Klaus Jensen

From: Klaus Jensen 

The number of logical blocks within a source range is converted into a
1s based number at the time of parsing. However, when verifying the copy
length we add one again, causing the check against MCL to fail in error.

Cc: qemu-sta...@nongnu.org
Fixes: 381ab99d8587 ("hw/nvme: check maximum copy length (MCL) for COPY")
Reviewed-by: Minwoo Im 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 94ef63945725..abc0387f2ca8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2855,7 +2855,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace 
*ns,
 uint32_t nlb;
 nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL,
  , NULL, NULL, NULL);
-copy_len += nlb + 1;
+copy_len += nlb;
 }
 
 if (copy_len > ns->id_ns.mcl) {
-- 
2.44.0

Re: [PULL 0/6] hw/nvme updates

2024-03-12 Thread Klaus Jensen

On Mar 12 11:34, Peter Maydell wrote:
> On Mon, 11 Mar 2024 at 19:11, Klaus Jensen  wrote:
> >
> > From: Klaus Jensen 
> >
> > Hi,
> >
> > The following changes since commit 7489f7f3f81dcb776df8c1b9a9db281fc21bf05f:
> >
> >   Merge tag 'hw-misc-20240309' of https://github.com/philmd/qemu into 
> > staging (2024-03-09 20:12:21 +)
> >
> > are available in the Git repository at:
> >
> >   https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request
> >
> > for you to fetch changes up to a1505d799232939bf90c1b3e1fc20e81cd398404:
> >
> >   hw/nvme: add machine compatibility parameter to enable msix exclusive bar 
> > (2024-03-11 20:07:41 +0100)
> >
> > 
> > hw/nvme updates
> > -BEGIN PGP SIGNATURE-
> >
> > iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmXvVsYACgkQTeGvMW1P
> > DemWtwf9HU3cjtvCp8AeHGoPFTwp8/Vx3cQlQ6ilADKSDm44up2+M504xE/Mdviv
> > 6y3PTPe1yiEpg/MbjWTX/df5lo+VdNoCuCyjph9mea0s1QAjCfVpl+KLMUVF/Oj5
> > y1Iz9PQqOVDJ3O4xlgmPTfd8NXE/frNJaiXAjFuBxF2+4lilD5kMxpyu7DXbLiy2
> > Szd1I3DhFAEOLEbrSSRDI3Fpy0KBdRzdKuUfmRdrHzbmhzHJefW7wnZ3aAiDboaD
> > Ny7y/aovmjGymMp9GrBKWhUFPfSUtJ8l8j4Z7acQs+VDxg8lcAHCJKOyqCBTspUL
> > PSnDe6E/CRyjrG2fUVXTLb6YW1eibQ==
> > =Ld7a
> > -END PGP SIGNATURE-
> 
> Hi; I'm afraid this fails to build for some jobs, eg
> https://gitlab.com/qemu-project/qemu/-/jobs/6373091994
> https://gitlab.com/qemu-project/qemu/-/jobs/6373091978
> https://gitlab.com/qemu-project/qemu/-/jobs/6373091975
> 
> ../hw/nvme/ctrl.c: In function ‘nvme_realize’:
> ../hw/nvme/ctrl.c:8146:15: error: ‘msix_pba_offset’ may be used
> uninitialized in this function [-Werror=maybe-uninitialized]
> 8146 | ret = msix_init(pci_dev, n->params.msix_qsize,
>  | ^~~~
> 8147 | >bar0, 0, msix_table_offset,
>  | ~~~
> 8148 | >bar0, 0, msix_pba_offset, 0, errp);
>  | ~~
> ../hw/nvme/ctrl.c:8099:33: note: ‘msix_pba_offset’ was declared here
> 8099 | unsigned msix_table_offset, msix_pba_offset;
>  | ^~~
> ../hw/nvme/ctrl.c:8135:9: error: ‘msix_table_offset’ may be used
> uninitialized in this function [-Werror=maybe-uninitialized]
> 8135 | memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
>  | ^~
> 8136 | msix_table_offset);
>  | ~~
> ../hw/nvme/ctrl.c:8099:14: note: ‘msix_table_offset’ was declared here
> 8099 | unsigned msix_table_offset, msix_pba_offset;
>  | ^
> cc1: all warnings being treated as errors
> 
> 
> I think this is because the compiler notices that nvme_mbar_size() has
> an early-exit code path which never initializes *msix_table_offset
> and *msix-pba_offset.
> 

Crap, yeah. I'll fix it up. Thanks!


signature.asc
Description: PGP signature

[PULL 2/6] hw/nvme: fix invalid check on mcl

2024-03-11 Thread Klaus Jensen

From: Klaus Jensen 

The number of logical blocks within a source range is converted into a
1s based number at the time of parsing. However, when verifying the copy
length we add one again, causing the check against MCL to fail in error.

Cc: qemu-sta...@nongnu.org
Fixes: 381ab99d8587 ("hw/nvme: check maximum copy length (MCL) for COPY")
Reviewed-by: Minwoo Im 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 94ef63945725..abc0387f2ca8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2855,7 +2855,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace 
*ns,
 uint32_t nlb;
 nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL,
  , NULL, NULL, NULL);
-copy_len += nlb + 1;
+copy_len += nlb;
 }
 
 if (copy_len > ns->id_ns.mcl) {
-- 
2.44.0

[PULL 6/6] hw/nvme: add machine compatibility parameter to enable msix exclusive bar

2024-03-11 Thread Klaus Jensen

From: Klaus Jensen 

Commit 1901b4967c3f ("hw/block/nvme: move msix table and pba to BAR 0")
moved the MSI-X table and PBA to BAR 0 to make room for enabling CMR and
PMR at the same time. As reported by Julien Grall in #2184, this breaks
migration through system hibernation.

Add a machine compatibility parameter and set it on machines pre 6.0 to
enable the old behavior automatically, restoring the hibernation
migration support.

Cc: qemu-sta...@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2184
Fixes: 1901b4967c3f ("hw/block/nvme: move msix table and pba to BAR 0")
Reported-by: Julien Grall jul...@xen.org
Tested-by: Julien Grall jul...@xen.org
Reviewed-by: Jesper Wendel Devantier 
Signed-off-by: Klaus Jensen 
---
 hw/core/machine.c |  1 +
 hw/nvme/ctrl.c| 51 ---
 hw/nvme/nvme.h|  1 +
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9ac5d5389a6c..f3012bca1370 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -100,6 +100,7 @@ GlobalProperty hw_compat_5_2[] = {
 { "PIIX4_PM", "smm-compat", "on"},
 { "virtio-blk-device", "report-discard-granularity", "off" },
 { "virtio-net-pci-base", "vectors", "3"},
+{ "nvme", "msix-exclusive-bar", "on"},
 };
 const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
 
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 5ee8deda22a4..6210b7098845 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7810,6 +7810,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp)
 }
 
 if (n->pmr.dev) {
+if (params->msix_exclusive_bar) {
+error_setg(errp, "not enough BARs available to enable PMR");
+return false;
+}
+
 if (host_memory_backend_is_mapped(n->pmr.dev)) {
 error_setg(errp, "can't use already busy memdev: %s",

object_get_canonical_path_component(OBJECT(n->pmr.dev)));
@@ -8113,24 +8118,36 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 pcie_ari_init(pci_dev, 0x100);
 }
 
-/* add one to max_ioqpairs to account for the admin queue pair */
-bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
-  _table_offset, _pba_offset);
-
-memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
-memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
-  msix_table_offset);
-memory_region_add_subregion(>bar0, 0, >iomem);
-
-if (pci_is_vf(pci_dev)) {
-pcie_sriov_vf_register_bar(pci_dev, 0, >bar0);
-} else {
+if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL);
+memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
+  bar_size);
 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_TYPE_64, >bar0);
+ PCI_BASE_ADDRESS_MEM_TYPE_64, >iomem);
+ret = msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp);
+} else {
+/* add one to max_ioqpairs to account for the admin queue pair */
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,
+  n->params.msix_qsize, _table_offset,
+  _pba_offset);
+
+memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
+memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
+  msix_table_offset);
+memory_region_add_subregion(>bar0, 0, >iomem);
+
+if (pci_is_vf(pci_dev)) {
+pcie_sriov_vf_register_bar(pci_dev, 0, >bar0);
+} else {
+pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, >bar0);
+}
+
+ret = msix_init(pci_dev, n->params.msix_qsize,
+>bar0, 0, msix_table_offset,
+>bar0, 0, msix_pba_offset, 0, errp);
 }
-ret = msix_init(pci_dev, n->params.msix_qsize,
->bar0, 0, msix_table_offset,
->bar0, 0, msix_pba_offset, 0, errp);
+
 if (ret == -ENOTSUP) {
 /* report that msix is not supported, but do not error out */
 warn_report_err(*errp);
@@ -8434,6 +8451,8 @@ static Property nvme_props[] = {
   params.sriov_max_vi_per_vf, 0),
 DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,

[PULL 5/6] hw/nvme: generalize the mbar size helper

2024-03-11 Thread Klaus Jensen

From: Klaus Jensen 

Generalize the mbar size helper such that it can handle cases where the
MSI-X table and PBA are expected to be in an exclusive bar.

Cc: qemu-sta...@nongnu.org
Reviewed-by: Jesper Wendel Devantier 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6c5a2b875da8..5ee8deda22a4 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8015,13 +8015,18 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice 
*pci_dev)
 memory_region_set_enabled(>pmr.dev->mr, false);
 }
 
-static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
-  unsigned *msix_table_offset,
-  unsigned *msix_pba_offset)
+static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs,
+   unsigned *msix_table_offset,
+   unsigned *msix_pba_offset)
 {
-uint64_t bar_size, msix_table_size, msix_pba_size;
+uint64_t bar_size, msix_table_size;
 
 bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
+
+if (total_irqs == 0) {
+goto out;
+}
+
 bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
 
 if (msix_table_offset) {
@@ -8036,11 +8041,10 @@ static uint64_t nvme_bar_size(unsigned total_queues, 
unsigned total_irqs,
 *msix_pba_offset = bar_size;
 }
 
-msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8;
-bar_size += msix_pba_size;
+bar_size += QEMU_ALIGN_UP(total_irqs, 64) / 8;
 
-bar_size = pow2ceil(bar_size);
-return bar_size;
+out:
+return pow2ceil(bar_size);
 }
 
 static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
@@ -8048,7 +8052,7 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice 
*pci_dev, uint16_t offset)
 uint16_t vf_dev_id = n->params.use_intel_id ?
  PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
 NvmePriCtrlCap *cap = >pri_ctrl_cap;
-uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
+uint64_t bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm),
   le16_to_cpu(cap->vifrsm),
   NULL, NULL);
 
@@ -8110,8 +8114,8 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 }
 
 /* add one to max_ioqpairs to account for the admin queue pair */
-bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
- _table_offset, _pba_offset);
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
+  _table_offset, _pba_offset);
 
 memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
 memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
-- 
2.44.0

[PULL 3/6] MAINTAINERS: add Jesper as reviewer on hw/nvme

2024-03-11 Thread Klaus Jensen

From: Klaus Jensen 

My colleague, Jesper, will be assiting with hw/nvme related reviews. Add
him with R: so he gets automatically bugged going forward.

Cc: Jesper Devantier 
Acked-by: Jesper Devantier 
Signed-off-by: Klaus Jensen 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4d96f855de5c..e21e18e93c63 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2407,6 +2407,7 @@ F: docs/system/devices/virtio-snd.rst
 nvme
 M: Keith Busch 
 M: Klaus Jensen 
+R: Jesper Devantier 
 L: qemu-block@nongnu.org
 S: Supported
 F: hw/nvme/*
-- 
2.44.0

[PULL 4/6] hw/nvme: Add NVMe NGUID property

2024-03-11 Thread Klaus Jensen

From: Roque Arcudia Hernandez 

This patch adds a way to specify an NGUID for a given NVMe Namespace using a
string of hexadecimal digits with an optional '-' separator to group bytes. For
instance:

-device nvme-ns,nguid="e9accd3b83904e13167cf0593437f57d"

If provided, the NGUID will be part of the Namespace Identification Descriptor
list and the Identify Namespace data.

Signed-off-by: Roque Arcudia Hernandez 
Signed-off-by: Nabih Estefan 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 docs/system/devices/nvme.rst |   7 ++
 hw/nvme/ctrl.c   |  12 +++
 hw/nvme/meson.build  |   2 +-
 hw/nvme/nguid.c  | 187 +++
 hw/nvme/ns.c |   2 +
 hw/nvme/nvme.h   |  26 +++--
 6 files changed, 229 insertions(+), 7 deletions(-)
 create mode 100644 hw/nvme/nguid.c

diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index 4ea957baed10..d2b1ca96455f 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -81,6 +81,13 @@ There are a number of parameters available:
   Set the UUID of the namespace. This will be reported as a "Namespace UUID"
   descriptor in the Namespace Identification Descriptor List.
 
+``nguid``
+  Set the NGUID of the namespace. This will be reported as a "Namespace 
Globally
+  Unique Identifier" descriptor in the Namespace Identification Descriptor 
List.
+  It is specified as a string of hexadecimal digits containing exactly 16 bytes
+  or "auto" for a random value. An optional '-' separator could be used to 
group
+  bytes. If not specified the NGUID will remain all zeros.
+
 ``eui64``
   Set the EUI-64 of the namespace. This will be reported as a "IEEE Extended
   Unique Identifier" descriptor in the Namespace Identification Descriptor 
List.
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index abc0387f2ca8..6c5a2b875da8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5640,6 +5640,10 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, 
NvmeRequest *req)
 NvmeIdNsDescr hdr;
 uint8_t v[NVME_NIDL_UUID];
 } QEMU_PACKED uuid = {};
+struct {
+NvmeIdNsDescr hdr;
+uint8_t v[NVME_NIDL_NGUID];
+} QEMU_PACKED nguid = {};
 struct {
 NvmeIdNsDescr hdr;
 uint64_t v;
@@ -5668,6 +5672,14 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, 
NvmeRequest *req)
 pos += sizeof(uuid);
 }
 
+if (!nvme_nguid_is_null(>params.nguid)) {
+nguid.hdr.nidt = NVME_NIDT_NGUID;
+nguid.hdr.nidl = NVME_NIDL_NGUID;
+memcpy(nguid.v, ns->params.nguid.data, NVME_NIDL_NGUID);
+memcpy(pos, , sizeof(nguid));
+pos += sizeof(nguid);
+}
+
 if (ns->params.eui64) {
 eui64.hdr.nidt = NVME_NIDT_EUI64;
 eui64.hdr.nidl = NVME_NIDL_EUI64;
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
index 1a6a2ca2f307..7d5caa53c280 100644
--- a/hw/nvme/meson.build
+++ b/hw/nvme/meson.build
@@ -1 +1 @@
-system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c'))
+system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c', 'nguid.c'))
\ No newline at end of file
diff --git a/hw/nvme/nguid.c b/hw/nvme/nguid.c
new file mode 100644
index ..829832bd9f41
--- /dev/null
+++ b/hw/nvme/nguid.c
@@ -0,0 +1,187 @@
+/*
+ *  QEMU NVMe NGUID functions
+ *
+ * Copyright 2024 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/visitor.h"
+#include "qemu/ctype.h"
+#include "nvme.h"
+
+#define NGUID_SEPARATOR '-'
+
+#define NGUID_VALUE_AUTO "auto"
+
+#define NGUID_FMT  \
+"%02hhx%02hhx%02hhx%02hhx" \
+"%02hhx%02hhx%02hhx%02hhx" \
+"%02hhx%02hhx%02hhx%02hhx" \
+"%02hhx%02hhx%02hhx%02hhx"
+
+#define NGUID_STR_LEN (2 * NGUID_LEN + 1)
+
+bool nvme_nguid_is_null(const NvmeNGUID *nguid)
+{
+static NvmeNGUID null_nguid;
+return memcmp(nguid, _nguid, sizeof(NvmeNGUID)) == 0;
+}
+
+static void nvme_nguid_generate(NvmeNGUID *out)
+{
+int i;
+uint32_t x;
+
+QEMU_BUILD_BUG_ON((NGUID_LEN % sizeof(x)) != 0);
+
+for (i = 0; i < NGUID_LEN; i += sizeof(x)) {
+x = g_random_int();
+memcpy(>data[i], , sizeof(x));
+}
+}
+
+/*
+ * The Linux Kernel typically prints the NGUID of a

[PULL 0/6] hw/nvme updates

2024-03-11 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

The following changes since commit 7489f7f3f81dcb776df8c1b9a9db281fc21bf05f:

  Merge tag 'hw-misc-20240309' of https://github.com/philmd/qemu into staging 
(2024-03-09 20:12:21 +)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request

for you to fetch changes up to a1505d799232939bf90c1b3e1fc20e81cd398404:

  hw/nvme: add machine compatibility parameter to enable msix exclusive bar 
(2024-03-11 20:07:41 +0100)


hw/nvme updates
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmXvVsYACgkQTeGvMW1P
DemWtwf9HU3cjtvCp8AeHGoPFTwp8/Vx3cQlQ6ilADKSDm44up2+M504xE/Mdviv
6y3PTPe1yiEpg/MbjWTX/df5lo+VdNoCuCyjph9mea0s1QAjCfVpl+KLMUVF/Oj5
y1Iz9PQqOVDJ3O4xlgmPTfd8NXE/frNJaiXAjFuBxF2+4lilD5kMxpyu7DXbLiy2
Szd1I3DhFAEOLEbrSSRDI3Fpy0KBdRzdKuUfmRdrHzbmhzHJefW7wnZ3aAiDboaD
Ny7y/aovmjGymMp9GrBKWhUFPfSUtJ8l8j4Z7acQs+VDxg8lcAHCJKOyqCBTspUL
PSnDe6E/CRyjrG2fUVXTLb6YW1eibQ==
=Ld7a
-END PGP SIGNATURE-



Klaus Jensen (4):
  hw/nvme: fix invalid check on mcl
  MAINTAINERS: add Jesper as reviewer on hw/nvme
  hw/nvme: generalize the mbar size helper
  hw/nvme: add machine compatibility parameter to enable msix exclusive
bar

Minwoo Im (1):
  hw/nvme: separate 'serial' property for VFs

Roque Arcudia Hernandez (1):
  hw/nvme: Add NVMe NGUID property

 MAINTAINERS  |   1 +
 docs/system/devices/nvme.rst |   7 ++
 hw/core/machine.c|   1 +
 hw/nvme/ctrl.c   |  95 +-
 hw/nvme/meson.build  |   2 +-
 hw/nvme/nguid.c  | 187 +++
 hw/nvme/ns.c |   2 +
 hw/nvme/nvme.h   |  27 +++--
 8 files changed, 288 insertions(+), 34 deletions(-)
 create mode 100644 hw/nvme/nguid.c

-- 
2.44.0

[PULL 1/6] hw/nvme: separate 'serial' property for VFs

2024-03-11 Thread Klaus Jensen

From: Minwoo Im 

Currently, when a VF is created, it uses the 'params' object of the PF
as it is. In other words, the 'params.serial' string memory area is also
shared. In this situation, if the VF is removed from the system, the
PF's 'params.serial' object is released with object_finalize() followed
by object_property_del_all() which release the memory for 'serial'
property. If that happens, the next VF created will inherit a serial
from a corrupted memory area.

If this happens, an error will occur when comparing subsys->serial and
n->params.serial in the nvme_subsys_register_ctrl() function.

Cc: qemu-sta...@nongnu.org
Fixes: 44c2c09488db ("hw/nvme: Add support for SR-IOV")
Signed-off-by: Minwoo Im 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 76fe0397045b..94ef63945725 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8309,9 +8309,15 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
 if (pci_is_vf(pci_dev)) {
 /*
  * VFs derive settings from the parent. PF's lifespan exceeds
- * that of VF's, so it's safe to share params.serial.
+ * that of VF's.
  */
 memcpy(>params, >params, sizeof(NvmeParams));
+
+/*
+ * Set PF's serial value to a new string memory to prevent 'serial'
+ * property object release of PF when a VF is removed from the system.
+ */
+n->params.serial = g_strdup(pn->params.serial);
 n->subsys = pn->subsys;
 }
 
-- 
2.44.0

Re: [PATCH 0/2] hw/nvme: fix hibernation-based migration

2024-03-10 Thread Klaus Jensen

On Mar 10 12:07, Klaus Jensen wrote:
> Julien Grall, in #2184, reported that hibernation-based migration with
> hw/nvme is broken when suspending on a pre 6.0 QEMU and resuming on a
> more recent one. This is because the BAR layout was changed in 6.0.
> 
> Fix this by adding a machine compatibility parameter that restores the
> old behavior.
> 
> Signed-off-by: Klaus Jensen 
> ---
> Klaus Jensen (2):
>   hw/nvme: generalize the mbar size helper
>   hw/nvme: add machine compatibility parameter to enable msix exclusive 
> bar
> 
>  hw/core/machine.c |  1 +
>  hw/nvme/ctrl.c| 73 
> ---
>  hw/nvme/nvme.h|  1 +
>  3 files changed, 50 insertions(+), 25 deletions(-)
> ---
> base-commit: f901bf11b3ddf852e591593b09b8aa7a177f9a0b
> change-id: 20240310-fix-msix-exclusive-bar-d65564414a2c
> 
> Best regards,
> -- 
> Klaus Jensen 
> 

Whoops, forgot Fixes: and Resolves: tags.

Will add that on the pull.


signature.asc
Description: PGP signature

[PATCH 0/2] hw/nvme: fix hibernation-based migration

2024-03-10 Thread Klaus Jensen

Julien Grall, in #2184, reported that hibernation-based migration with
hw/nvme is broken when suspending on a pre 6.0 QEMU and resuming on a
more recent one. This is because the BAR layout was changed in 6.0.

Fix this by adding a machine compatibility parameter that restores the
old behavior.

Signed-off-by: Klaus Jensen 
---
Klaus Jensen (2):
  hw/nvme: generalize the mbar size helper
  hw/nvme: add machine compatibility parameter to enable msix exclusive bar

 hw/core/machine.c |  1 +
 hw/nvme/ctrl.c| 73 ---
 hw/nvme/nvme.h|  1 +
 3 files changed, 50 insertions(+), 25 deletions(-)
---
base-commit: f901bf11b3ddf852e591593b09b8aa7a177f9a0b
change-id: 20240310-fix-msix-exclusive-bar-d65564414a2c

Best regards,
-- 
Klaus Jensen

[PATCH 1/2] hw/nvme: generalize the mbar size helper

2024-03-10 Thread Klaus Jensen

From: Klaus Jensen 

Generalize the mbar size helper such that it can handle cases where the
MSI-X table and PBA are expected to be in an exclusive bar.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 76fe0397045b..8cca8a762124 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8003,13 +8003,18 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice 
*pci_dev)
 memory_region_set_enabled(>pmr.dev->mr, false);
 }
 
-static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
-  unsigned *msix_table_offset,
-  unsigned *msix_pba_offset)
+static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs,
+   unsigned *msix_table_offset,
+   unsigned *msix_pba_offset)
 {
-uint64_t bar_size, msix_table_size, msix_pba_size;
+uint64_t bar_size, msix_table_size;
 
 bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
+
+if (total_irqs == 0) {
+goto out;
+}
+
 bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
 
 if (msix_table_offset) {
@@ -8024,11 +8029,10 @@ static uint64_t nvme_bar_size(unsigned total_queues, 
unsigned total_irqs,
 *msix_pba_offset = bar_size;
 }
 
-msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8;
-bar_size += msix_pba_size;
+bar_size += QEMU_ALIGN_UP(total_irqs, 64) / 8;
 
-bar_size = pow2ceil(bar_size);
-return bar_size;
+out:
+return pow2ceil(bar_size);
 }
 
 static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
@@ -8036,7 +8040,7 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice 
*pci_dev, uint16_t offset)
 uint16_t vf_dev_id = n->params.use_intel_id ?
  PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
 NvmePriCtrlCap *cap = >pri_ctrl_cap;
-uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
+uint64_t bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm),
   le16_to_cpu(cap->vifrsm),
   NULL, NULL);
 
@@ -8098,8 +8102,8 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 }
 
 /* add one to max_ioqpairs to account for the admin queue pair */
-bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
- _table_offset, _pba_offset);
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
+  _table_offset, _pba_offset);
 
 memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
 memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",

-- 
2.43.0

[PATCH 2/2] hw/nvme: add machine compatibility parameter to enable msix exclusive bar

2024-03-10 Thread Klaus Jensen

From: Klaus Jensen 

Commit 1901b4967c3f ("hw/block/nvme: move msix table and pba to BAR 0")
moved the MSI-X table and PBA to BAR 0 to make room for enabling CMR and
PMR at the same time. As reported by Julien Grall in #2184, this breaks
migration through system hibernation.

Add a machine compatibility parameter and set it on machines pre 6.0 to
enable the old behavior automatically, restoring the hibernation
migration support.

Reported-by: Julien Grall 
Tested-by: Julien Grall 
Signed-off-by: Klaus Jensen 
---
 hw/core/machine.c |  1 +
 hw/nvme/ctrl.c| 51 +++
 hw/nvme/nvme.h|  1 +
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9ac5d5389a6c..f3012bca1370 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -100,6 +100,7 @@ GlobalProperty hw_compat_5_2[] = {
 { "PIIX4_PM", "smm-compat", "on"},
 { "virtio-blk-device", "report-discard-granularity", "off" },
 { "virtio-net-pci-base", "vectors", "3"},
+{ "nvme", "msix-exclusive-bar", "on"},
 };
 const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
 
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8cca8a762124..649467723e7e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7798,6 +7798,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp)
 }
 
 if (n->pmr.dev) {
+if (params->msix_exclusive_bar) {
+error_setg(errp, "not enough BARs available to enable PMR");
+return false;
+}
+
 if (host_memory_backend_is_mapped(n->pmr.dev)) {
 error_setg(errp, "can't use already busy memdev: %s",

object_get_canonical_path_component(OBJECT(n->pmr.dev)));
@@ -8101,24 +8106,36 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 pcie_ari_init(pci_dev, 0x100);
 }
 
-/* add one to max_ioqpairs to account for the admin queue pair */
-bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
-  _table_offset, _pba_offset);
-
-memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
-memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
-  msix_table_offset);
-memory_region_add_subregion(>bar0, 0, >iomem);
-
-if (pci_is_vf(pci_dev)) {
-pcie_sriov_vf_register_bar(pci_dev, 0, >bar0);
-} else {
+if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL);
+memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
+  bar_size);
 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_TYPE_64, >bar0);
+ PCI_BASE_ADDRESS_MEM_TYPE_64, >iomem);
+ret = msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp);
+} else {
+/* add one to max_ioqpairs to account for the admin queue pair */
+bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,
+  n->params.msix_qsize, _table_offset,
+  _pba_offset);
+
+memory_region_init(>bar0, OBJECT(n), "nvme-bar0", bar_size);
+memory_region_init_io(>iomem, OBJECT(n), _mmio_ops, n, "nvme",
+  msix_table_offset);
+memory_region_add_subregion(>bar0, 0, >iomem);
+
+if (pci_is_vf(pci_dev)) {
+pcie_sriov_vf_register_bar(pci_dev, 0, >bar0);
+} else {
+pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, >bar0);
+}
+
+ret = msix_init(pci_dev, n->params.msix_qsize,
+>bar0, 0, msix_table_offset,
+>bar0, 0, msix_pba_offset, 0, errp);
 }
-ret = msix_init(pci_dev, n->params.msix_qsize,
->bar0, 0, msix_table_offset,
->bar0, 0, msix_pba_offset, 0, errp);
+
 if (ret == -ENOTSUP) {
 /* report that msix is not supported, but do not error out */
 warn_report_err(*errp);
@@ -8416,6 +8433,8 @@ static Property nvme_props[] = {
   params.sriov_max_vi_per_vf, 0),
 DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
   params.sriov_max_vq_per_vf, 0),
+DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar,
+ false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/nvme/nvme.h b/hw

Re: [PATCH v2] hw/nvme/ns: Add NVMe NGUID property

2024-03-01 Thread Klaus Jensen

On Feb 22 17:50, Nabih Estefan wrote:
> From: Roque Arcudia Hernandez 
> 
> This patch adds a way to specify an NGUID for a given NVMe Namespace using a
> string of hexadecimal digits with an optional '-' separator to group bytes. 
> For
> instance:
> 
> -device nvme-ns,nguid="e9accd3b83904e13167cf0593437f57d"
> 
> If provided, the NGUID will be part of the Namespace Identification Descriptor
> list and the Identify Namespace data.
> 
> Signed-off-by: Roque Arcudia Hernandez 
> Signed-off-by: Nabih Estefan 
> Reviewed-by: Klaus Jensen 

Thanks, applied to nvme-next!


signature.asc
Description: PGP signature

Re: [PATCH] hw/nvme: fix invalid endian conversion

2024-02-26 Thread Klaus Jensen

On Feb 26 09:18, Philippe Mathieu-Daudé wrote:
> On 22/2/24 10:29, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > numcntl is one byte and so is max_vfs. Using cpu_to_le16 on big endian
> > hosts results in numcntl being set to 0.
> > 
> > Fix by dropping the endian conversion.
> > 
> > Fixes: 746d42b13368 ("hw/nvme: Initialize capability structures for 
> > primary/secondary controllers")
> > Reported-by: Kevin Wolf 
> > Signed-off-by: Klaus Jensen 
> > ---
> >   hw/nvme/ctrl.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> Hi Klaus, I'm not seeing other NVMe patches on the list,
> so I'll queue this on my hw-misc tree, but feel free to
> object and I'll unqueue :)
> 
> Thanks,
> 

No, thats perfect! Thanks! :)


signature.asc
Description: PGP signature

Re: [PATCH] hw/nvme: fix invalid endian conversion

2024-02-22 Thread Klaus Jensen

On Feb 22 11:08, Philippe Mathieu-Daudé wrote:
> Hi Klaus,
> 
> On 22/2/24 10:29, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > numcntl is one byte and so is max_vfs. Using cpu_to_le16 on big endian
> > hosts results in numcntl being set to 0.
> > 
> > Fix by dropping the endian conversion.
> > 
> > Fixes: 746d42b13368 ("hw/nvme: Initialize capability structures for 
> > primary/secondary controllers")
> 
> Isn't it commit 99f48ae7ae ("Add support for Secondary Controller
> List") instead?
> 

Thanks Philippe,

Yes, that is the commit that had the bug originally. I'll fix it up.


signature.asc
Description: PGP signature

[PATCH] hw/nvme: fix invalid endian conversion

2024-02-22 Thread Klaus Jensen

From: Klaus Jensen 

numcntl is one byte and so is max_vfs. Using cpu_to_le16 on big endian
hosts results in numcntl being set to 0.

Fix by dropping the endian conversion.

Fixes: 746d42b13368 ("hw/nvme: Initialize capability structures for 
primary/secondary controllers")
Reported-by: Kevin Wolf 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f026245d1e9e..76fe0397045b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7924,7 +7924,7 @@ static void nvme_init_state(NvmeCtrl *n)
 n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
 QTAILQ_INIT(>aer_queue);
 
-list->numcntl = cpu_to_le16(max_vfs);
+list->numcntl = max_vfs;
 for (i = 0; i < max_vfs; i++) {
 sctrl = >sec[i];
 sctrl->pcid = cpu_to_le16(n->cntlid);

---
base-commit: 760b4dcdddba4a40b9fa0eb78fdfc7eda7cb83d0
change-id: 20240222-fix-sriov-numcntl-5ebe46a42176

Best regards,
-- 
Klaus Jensen

Re: [PATCH] hw/nvme/ns: Add NVMe NGUID property

2024-02-22 Thread Klaus Jensen

On Feb 21 17:38, Nabih Estefan wrote:
> From: Roque Arcudia Hernandez 
> 
> This patch adds a way to specify an NGUID for a given NVMe Namespace using a
> string of hexadecimal digits with an optional '-' separator to group bytes. 
> For
> instance:
> 
> -device nvme-ns,nguid="e9accd3b83904e13167cf0593437f57d"
> 
> If provided, the NGUID will be part of the Namespace Identification Descriptor
> list and the Identify Namespace data.
> 
> Signed-off-by: Roque Arcudia Hernandez 
> Signed-off-by: Nabih Estefan 

Hi Thanks! Looks good,

Reviewed-by: Klaus Jensen 

Only a minor nit below.

> diff --git a/hw/nvme/nguid.c b/hw/nvme/nguid.c
> new file mode 100644
> index 00..3e3e0567c5
> --- /dev/null
> +++ b/hw/nvme/nguid.c
> @@ -0,0 +1,192 @@
> +/*
> + *  QEMU NVMe NGUID functions
> + *
> + * Copyright 2024 Google LLC
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful, but 
> WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> + * for more details.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/visitor.h"
> +#include "qemu/ctype.h"
> +#include "nvme.h"
> +
> +#define NGUID_SEPARATOR '-'
> +
> +#define NGUID_VALUE_AUTO "auto"
> +
> +#define NGUID_FMT  \
> +"%02hhx%02hhx%02hhx%02hhx" \
> +"%02hhx%02hhx%02hhx%02hhx" \
> +"%02hhx%02hhx%02hhx%02hhx" \
> +"%02hhx%02hhx%02hhx%02hhx"
> +
> +#define NGUID_STR_LEN (2 * NGUID_LEN + 1)
> +
> +bool nvme_nguid_is_null(const NvmeNGUID *nguid)
> +{
> +int i;
> +for (i = 0; i < NGUID_LEN; i++) {
> +if (nguid->data[i] != 0) {
> +return false;
> +}
> +}
> +return true;
> +}

Maybe just

bool nvme_nguid_is_null(const NvmeNGUID *nguid)
{
static NvmeNGUID null_nguid;
return memcmp(nguid, _nguid, sizeof(NvmeNGUID)) == 0;
}



signature.asc
Description: PGP signature

Re: [PATCH v6 01/15] hw/nvme: Use pcie_sriov_num_vfs()

2024-02-20 Thread Klaus Jensen

On Feb 21 00:33, Akihiko Odaki wrote:
> On 2024/02/20 23:53, Kevin Wolf wrote:
> > Am 20.02.2024 um 15:29 hat Kevin Wolf geschrieben:
> > > Am 20.02.2024 um 13:24 hat Akihiko Odaki geschrieben:
> > > > nvme_sriov_pre_write_ctrl() used to directly inspect SR-IOV
> > > > configurations to know the number of VFs being disabled due to SR-IOV
> > > > configuration writes, but the logic was flawed and resulted in
> > > > out-of-bound memory access.
> > > > 
> > > > It assumed PCI_SRIOV_NUM_VF always has the number of currently enabled
> > > > VFs, but it actually doesn't in the following cases:
> > > > - PCI_SRIOV_NUM_VF has been set but PCI_SRIOV_CTRL_VFE has never been.
> > > > - PCI_SRIOV_NUM_VF was written after PCI_SRIOV_CTRL_VFE was set.
> > > > - VFs were only partially enabled because of realization failure.
> > > > 
> > > > It is a responsibility of pcie_sriov to interpret SR-IOV configurations
> > > > and pcie_sriov does it correctly, so use pcie_sriov_num_vfs(), which it
> > > > provides, to get the number of enabled VFs before and after SR-IOV
> > > > configuration writes.
> > > > 
> > > > Cc: qemu-sta...@nongnu.org
> > > > Fixes: CVE-2024-26328
> > > > Fixes: 11871f53ef8e ("hw/nvme: Add support for the Virtualization 
> > > > Management command")
> > > > Suggested-by: Michael S. Tsirkin 
> > > > Signed-off-by: Akihiko Odaki 
> > > > ---
> > > >   hw/nvme/ctrl.c | 26 --
> > > >   1 file changed, 8 insertions(+), 18 deletions(-)
> > > > 
> > > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > > > index f026245d1e9e..7a56e7b79b4d 100644
> > > > --- a/hw/nvme/ctrl.c
> > > > +++ b/hw/nvme/ctrl.c
> > > > @@ -8466,36 +8466,26 @@ static void nvme_pci_reset(DeviceState *qdev)
> > > >   nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
> > > >   }
> > > > -static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address,
> > > > -  uint32_t val, int len)
> > > > +static void nvme_sriov_post_write_config(PCIDevice *dev, uint16_t 
> > > > old_num_vfs)
> > > >   {
> > > >   NvmeCtrl *n = NVME(dev);
> > > >   NvmeSecCtrlEntry *sctrl;
> > > > -uint16_t sriov_cap = dev->exp.sriov_cap;
> > > > -uint32_t off = address - sriov_cap;
> > > > -int i, num_vfs;
> > > > +int i;
> > > > -if (!sriov_cap) {
> > > > -return;
> > > > -}
> > > > -
> > > > -if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
> > > > -if (!(val & PCI_SRIOV_CTRL_VFE)) {
> > > > -num_vfs = pci_get_word(dev->config + sriov_cap + 
> > > > PCI_SRIOV_NUM_VF);
> > > > -for (i = 0; i < num_vfs; i++) {
> > > > -sctrl = >sec_ctrl_list.sec[i];
> > > > -nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), 
> > > > false);
> > > > -}
> > > > -}
> > > > +for (i = pcie_sriov_num_vfs(dev); i < old_num_vfs; i++) {
> > > > +sctrl = >sec_ctrl_list.sec[i];
> > > > +nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
> > > >   }
> > > >   }
> > > 
> > > Maybe I'm missing something, but if the concern is that 'i' could run
> > > beyond the end of the array, I don't see anything that limits
> > > pcie_sriov_num_vfs() to the static size of 127 that n->sec_ctrl_list.sec
> > > has. register_vfs() seems to just take whatever 16 bit value the guest
> > > wrote without imposing additional restrictions.
> > > 
> > > If there is some mechanism that makes register_vf() fail if we exceed
> > > the limit, maybe an assertion with a comment would be in order because
> > > it doesn't seem obvious. I couldn't find any code that enforces it,
> > > sriov_max_vfs only ever seems to be used as a hint for the guest.
> > > 
> > > If not, do we need another check that fails gracefully in the error
> > > case?
> > 
> > Ok, I see now that patch 2 fixes this. But then the commit message is
> > wrong because it implies that this patch is the only thing you need to
> > fix the problem with nvme. You can't say "Fixes: CVE-2024-26328" if half
> > of the fix is still missing.
> 
> I didn't assign CVE-2024-26328 for the case that the value of
> PCI_SRIOV_NUM_VF is greater than PCI_SRIOV_TOTAL_VF; it's what
> CVE-2024-26327 deals with.
> 
> The problem I dealt here is that the value of PCI_SRIOV_NUM_VF may not
> represent the actual number of enabled VFs because another register
> (PCI_SRIOV_CTRL_VFE) is not set, for example.
> 
> If an assertion to be added, I think it should be in pcie_sriov_num_vfs()
> and ensure the returning value is less than the value of PCI_SRIOV_TOTAL_VF
> (aka sriov_max_vfs in hw/nvme/ctrl.c), but I think it's fine without it.
> 
> > 
> > Maybe asserting old_num_vfs < n->sec_ctrl_list.numcntl would still be a
> > good idea. But looking at this one, it seems to me that numcntl isn't
> > completely correct either:
> > 
> >  list->numcntl = cpu_to_le16(max_vfs);
> > 
> > Both list->numcntl and max_vfs are uint8_t, so I think this will always
> > be 0 on big endian

Re: [PATCH v6 01/15] hw/nvme: Use pcie_sriov_num_vfs()

2024-02-20 Thread Klaus Jensen

On Feb 20 15:29, Kevin Wolf wrote:
> Am 20.02.2024 um 13:24 hat Akihiko Odaki geschrieben:
> > nvme_sriov_pre_write_ctrl() used to directly inspect SR-IOV
> > configurations to know the number of VFs being disabled due to SR-IOV
> > configuration writes, but the logic was flawed and resulted in
> > out-of-bound memory access.
> > 
> > It assumed PCI_SRIOV_NUM_VF always has the number of currently enabled
> > VFs, but it actually doesn't in the following cases:
> > - PCI_SRIOV_NUM_VF has been set but PCI_SRIOV_CTRL_VFE has never been.
> > - PCI_SRIOV_NUM_VF was written after PCI_SRIOV_CTRL_VFE was set.
> > - VFs were only partially enabled because of realization failure.
> > 
> > It is a responsibility of pcie_sriov to interpret SR-IOV configurations
> > and pcie_sriov does it correctly, so use pcie_sriov_num_vfs(), which it
> > provides, to get the number of enabled VFs before and after SR-IOV
> > configuration writes.
> > 
> > Cc: qemu-sta...@nongnu.org
> > Fixes: CVE-2024-26328
> > Fixes: 11871f53ef8e ("hw/nvme: Add support for the Virtualization 
> > Management command")
> > Suggested-by: Michael S. Tsirkin 
> > Signed-off-by: Akihiko Odaki 
> > ---
> >  hw/nvme/ctrl.c | 26 --
> >  1 file changed, 8 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > index f026245d1e9e..7a56e7b79b4d 100644
> > --- a/hw/nvme/ctrl.c
> > +++ b/hw/nvme/ctrl.c
> > @@ -8466,36 +8466,26 @@ static void nvme_pci_reset(DeviceState *qdev)
> >  nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
> >  }
> >  
> > -static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address,
> > -  uint32_t val, int len)
> > +static void nvme_sriov_post_write_config(PCIDevice *dev, uint16_t 
> > old_num_vfs)
> >  {
> >  NvmeCtrl *n = NVME(dev);
> >  NvmeSecCtrlEntry *sctrl;
> > -uint16_t sriov_cap = dev->exp.sriov_cap;
> > -uint32_t off = address - sriov_cap;
> > -int i, num_vfs;
> > +int i;
> >  
> > -if (!sriov_cap) {
> > -return;
> > -}
> > -
> > -if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
> > -if (!(val & PCI_SRIOV_CTRL_VFE)) {
> > -num_vfs = pci_get_word(dev->config + sriov_cap + 
> > PCI_SRIOV_NUM_VF);
> > -for (i = 0; i < num_vfs; i++) {
> > -sctrl = >sec_ctrl_list.sec[i];
> > -nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
> > -}
> > -}
> > +for (i = pcie_sriov_num_vfs(dev); i < old_num_vfs; i++) {
> > +sctrl = >sec_ctrl_list.sec[i];
> > +nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
> >  }
> >  }
> 
> Maybe I'm missing something, but if the concern is that 'i' could run
> beyond the end of the array, I don't see anything that limits
> pcie_sriov_num_vfs() to the static size of 127 that n->sec_ctrl_list.sec
> has. register_vfs() seems to just take whatever 16 bit value the guest
> wrote without imposing additional restrictions.
> 

Hi Kevin,

Thanks for reviewing, I believe patch 2 in this series fixes that
missing validation of NumVFs.


signature.asc
Description: PGP signature

Re: [PATCH v5 01/11] hw/nvme: Use pcie_sriov_num_vfs()

2024-02-19 Thread Klaus Jensen

On Feb 18 13:56, Akihiko Odaki wrote:
> nvme_sriov_pre_write_ctrl() used to directly inspect SR-IOV
> configurations to know the number of VFs being disabled due to SR-IOV
> configuration writes, but the logic was flawed and resulted in
> out-of-bound memory access.
> 
> It assumed PCI_SRIOV_NUM_VF always has the number of currently enabled
> VFs, but it actually doesn't in the following cases:
> - PCI_SRIOV_NUM_VF has been set but PCI_SRIOV_CTRL_VFE has never been.
> - PCI_SRIOV_NUM_VF was written after PCI_SRIOV_CTRL_VFE was set.
> - VFs were only partially enabled because of realization failure.
> 
> It is a responsibility of pcie_sriov to interpret SR-IOV configurations
> and pcie_sriov does it correctly, so use pcie_sriov_num_vfs(), which it
> provides, to get the number of enabled VFs before and after SR-IOV
> configuration writes.
> 
> Cc: qemu-sta...@nongnu.org
> Fixes: 11871f53ef8e ("hw/nvme: Add support for the Virtualization Management 
> command")
> Suggested-by: Michael S. Tsirkin 
> Signed-off-by: Akihiko Odaki 

Thanks Akihiko,

I'll pick this up for hw/nvme nvme-next as-is.

Reviewed-by: Klaus Jensen 

> ---
>  hw/nvme/ctrl.c | 26 --
>  1 file changed, 8 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index f026245d1e9e..7a56e7b79b4d 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -8466,36 +8466,26 @@ static void nvme_pci_reset(DeviceState *qdev)
>  nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
>  }
>  
> -static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address,
> -  uint32_t val, int len)
> +static void nvme_sriov_post_write_config(PCIDevice *dev, uint16_t 
> old_num_vfs)
>  {
>  NvmeCtrl *n = NVME(dev);
>  NvmeSecCtrlEntry *sctrl;
> -uint16_t sriov_cap = dev->exp.sriov_cap;
> -uint32_t off = address - sriov_cap;
> -int i, num_vfs;
> +int i;
>  
> -if (!sriov_cap) {
> -return;
> -}
> -
> -if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
> -if (!(val & PCI_SRIOV_CTRL_VFE)) {
> -num_vfs = pci_get_word(dev->config + sriov_cap + 
> PCI_SRIOV_NUM_VF);
> -for (i = 0; i < num_vfs; i++) {
> -sctrl = >sec_ctrl_list.sec[i];
> -nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
> -}
> -}
> +for (i = pcie_sriov_num_vfs(dev); i < old_num_vfs; i++) {
> +sctrl = >sec_ctrl_list.sec[i];
> +nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
>  }
>  }
>  
>  static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
>uint32_t val, int len)
>  {
> -nvme_sriov_pre_write_ctrl(dev, address, val, len);
> +uint16_t old_num_vfs = pcie_sriov_num_vfs(dev);
> +
>  pci_default_write_config(dev, address, val, len);
>  pcie_cap_flr_write_config(dev, address, val, len);
> +nvme_sriov_post_write_config(dev, old_num_vfs);
>  }
>  
>  static const VMStateDescription nvme_vmstate = {
> 
> -- 
> 2.43.1
> 

-- 
One of us - No more doubt, silence or taboo about mental illness.


signature.asc
Description: PGP signature

Re: [PATCH v4 0/3] Initial support for SPDM Responders

2024-02-16 Thread Klaus Jensen

On Feb 15 14:44, Jonathan Cameron wrote:
> On Tue, 13 Feb 2024 12:44:00 +1000
> Alistair Francis  wrote:
> 
> Hi All,
> 
> Just wanted to add that back in v2 Klaus Jensen stated:
> 
> "I have no problem with picking this up for nvme, but I'd rather not take
>  the full series through my tree without reviews/acks from the pci
>  maintainers."
> 
> So I'd like to add my request that Michael and/or Marcell takes a look
> when they have time.
> 
> I've been carrying more or less the first 2 patches in my CXL staging
> tree for a couple of years (the initial Linux Kernel support that Lukas
> Wunner is now handling was developed against this) and I would love
> to see this upstream. Along with PCI and CXL and NVME usecases this
> is a major part of the Confidential Compute device assignment story
> via PCI/TDISP and CXL equivalent.
> 
> It's not changed in significant ways since v2 back in October last year.
> 

Would someone be willing to sign up to maintain the spdm_socket backend?


signature.asc
Description: PGP signature

Re: [PATCH] hw/nvme: fix invalid check on mcl

2024-02-08 Thread Klaus Jensen

On Feb  8 21:33, Minwoo Im wrote:
> On 24-02-08 13:22:48, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > The number of logical blocks within a source range is converted into a
> > 1s based number at the time of parsing. However, when verifying the copy
> > length we add one again, causing the check against MCL to fail in error.
> > 
> > Fixes: 381ab99d8587 ("hw/nvme: check maximum copy length (MCL) for COPY")
> > Signed-off-by: Klaus Jensen 
> 
> Hi Klaus,
> 
> Reviewed-by: Minwoo Im 
> 
> Thanks!
> 

Thanks Minwoo, applied to nvme-next!


signature.asc
Description: PGP signature

[PATCH] hw/nvme: fix invalid check on mcl

2024-02-08 Thread Klaus Jensen

From: Klaus Jensen 

The number of logical blocks within a source range is converted into a
1s based number at the time of parsing. However, when verifying the copy
length we add one again, causing the check against MCL to fail in error.

Fixes: 381ab99d8587 ("hw/nvme: check maximum copy length (MCL) for COPY")
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f026245d1e9e..05c667158a3a 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2855,7 +2855,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace 
*ns,
 uint32_t nlb;
 nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL,
  , NULL, NULL, NULL);
-copy_len += nlb + 1;
+copy_len += nlb;
 }
 
 if (copy_len > ns->id_ns.mcl) {

---
base-commit: 39a6e4f87e7b75a45b08d6dc8b8b7c2954c87440
change-id: 20240208-fix-copy-mcl-check-3a6d95327154

Best regards,
-- 
Klaus Jensen

Re: NVME hotplug support ?

2024-01-29 Thread Klaus Jensen

On Jan 29 14:13, Damien Hedde wrote:
> 
> 
> On 1/24/24 08:47, Hannes Reinecke wrote:
> > On 1/24/24 07:52, Philippe Mathieu-Daudé wrote:
> > > Hi Hannes,
> > > 
> > > [+Markus as QOM/QDev rubber duck]
> > > 
> > > On 23/1/24 13:40, Hannes Reinecke wrote:
> > > > On 1/23/24 11:59, Damien Hedde wrote:
> > > > > Hi all,
> > > > > 
> > > > > We are currently looking into hotplugging nvme devices and
> > > > > it is currently not possible:
> > > > > When nvme was introduced 2 years ago, the feature was disabled.
> > > > > > commit cc6fb6bc506e6c47ed604fcb7b7413dff0b7d845
> > > > > > Author: Klaus Jensen
> > > > > > Date:   Tue Jul 6 10:48:40 2021 +0200
> > > > > > 
> > > > > >     hw/nvme: mark nvme-subsys non-hotpluggable
> > > > > >     We currently lack the infrastructure to handle
> > > > > > subsystem hotplugging, so
> > > > > >     disable it.
> > > > > 
> > > > > Do someone know what's lacking or anyone have some tips/idea
> > > > > of what we should develop to add the support ?
> > > > > 
> > > > Problem is that the object model is messed up. In qemu
> > > > namespaces are attached to controllers, which in turn are
> > > > children of the PCI device.
> > > > There are subsystems, but these just reference the controller.
> > > > 
> > > > So if you hotunplug the PCI device you detach/destroy the
> > > > controller and detach the namespaces from the controller.
> > > > But if you hotplug the PCI device again the NVMe controller will
> > > > be attached to the PCI device, but the namespace are still be
> > > > detached.
> > > > 
> > > > Klaus said he was going to fix that, and I dimly remember some patches
> > > > floating around. But apparently it never went anywhere.
> > > > 
> > > > Fundamental problem is that the NVMe hierarchy as per spec is
> > > > incompatible with the qemu object model; qemu requires a strict
> > > > tree model where every object has exactly _one_ parent.
> > > 
> > > The modelling problem is not clear to me.
> > > Do you have an example of how the NVMe hierarchy should be?
> > > 
> > Sure.
> > 
> > As per NVMe spec we have this hierarchy:
> > 
> >   --->  subsys ---
> >      |    |
> >      |    V
> > controller  namespaces
> > 
> > There can be several controllers, and several
> > namespaces.
> > The initiator (ie the linux 'nvme' driver) connects
> > to a controller, queries the subsystem for the attached
> > namespaces, and presents each namespace as a block device.
> > 
> > For Qemu we have the problem that every device _must_ be
> > a direct descendant of the parent (expressed by the fact
> > that each 'parent' object is embedded in the device object).
> > 
> > So if we were to present a NVMe PCI device, the controller
> > must be derived from the PCI device:
> > 
> > pci -> controller
> > 
> > but now we have to express the NVMe hierarchy, too:
> > 
> > pci -> ctrl1 -> subsys1 -> namespace1
> > 
> > which actually works.
> > We can easily attach several namespaces:
> > 
> > pci -> ctrl1 ->subsys1 -> namespace2
> > 
> > For a single controller and a single subsystem.
> > However, as mentioned above, there can be _several_
> > controllers attached to the same subsystem.
> > So we can express the second controller:
> > 
> > pci -> ctrl2
> > 
> > but we cannot attach the controller to 'subsys1'
> > as then 'subsys1' would need to be derived from
> > 'ctrl2', and not (as it is now) from 'ctrl1'.
> > 
> > The most logical step would be to have 'subsystems'
> > their own entity, independent of any controllers.
> > But then the block devices (which are derived from
> > the namespaces) could not be traced back
> > to the PCI device, and a PCI hotplug would not
> > 'automatically' disconnect the nvme block devices.
> > 
> > Plus the subsystem would be independent from the NVMe
> > PCI devices, so you could have a subsystem with
> > no controllers attached. And one would wonder who
> > should be responsible for cleaning up that.
> > 
> 
> Thanks for the details !
> 
> My use case is the simple one with no nvme subsystem/namespaces:
> - hotplug a pci nvme device (nvme controller) as in the following CLI (which
> automatically put the drive into a default namespace)
> 
> ./qemu-system-aarch64 -nographic -M virt \
>-drive file=nvme0.disk,if=none,id=nvme-drive0 \
>-device nvme,serial=nvme0,id=nvmedev0,drive=nvme-drive0
> 

AFAIK, you just need a pci root port to plug the device into.

  -drive file=nvme0.disk,if=none,id=nvme-drive0 \
  -device "pcie-root-port,id=pcie_root_port0,chassis=1,slot=0" \
  -device nvme,serial=nvme0,id=nvmedev0,drive=nvme-drive0

Then, you can use the qemu monitor to `device_del nvmedev0` and add it
with `device_add nvme,serial=nvme0,id=nvmedev0,drive=nvme-drive0`. The
"drive" (blockdev) will stick around after the device_del.


signature.asc
Description: PGP signature

Re: NVME hotplug support ?

2024-01-23 Thread Klaus Jensen

On Jan 23 13:40, Hannes Reinecke wrote:
> On 1/23/24 11:59, Damien Hedde wrote:
> > Hi all,
> > 
> > We are currently looking into hotplugging nvme devices and it is currently 
> > not possible:
> > When nvme was introduced 2 years ago, the feature was disabled.
> > > commit cc6fb6bc506e6c47ed604fcb7b7413dff0b7d845
> > > Author: Klaus Jensen
> > > Date:   Tue Jul 6 10:48:40 2021 +0200
> > > 
> > > hw/nvme: mark nvme-subsys non-hotpluggable
> > > We currently lack the infrastructure to handle subsystem hotplugging, 
> > > so
> > > disable it.
> > 
> > Do someone know what's lacking or anyone have some tips/idea of what we 
> > should develop to add the support ?
> > 
> Problem is that the object model is messed up. In qemu namespaces are
> attached to controllers, which in turn are children of the PCI device.
> There are subsystems, but these just reference the controller.
> 
> So if you hotunplug the PCI device you detach/destroy the controller and
> detach the namespaces from the controller.
> But if you hotplug the PCI device again the NVMe controller will be attached
> to the PCI device, but the namespace are still be detached.
> 
> Klaus said he was going to fix that, and I dimly remember some patches
> floating around. But apparently it never went anywhere.
> 
> Fundamental problem is that the NVMe hierarchy as per spec is incompatible
> with the qemu object model; qemu requires a strict
> tree model where every object has exactly _one_ parent.
> 

A little history might help to nuance this just a bit. And to defend the
current model ;)

When we added support for multiple namespaces we did not consider
subsystem support, so the namespaces would just be associated directly
with a parent controller (in QDev terms, the parent has a bus that the
namespace devices are attached to).

When we added subsystems, where namespaces may be attached to several
controllers, it became necessary to break the controller/namespace
parent/child relationship. The problem was that removing the controller
would take all the bus children with it, causing namespaces to be
removed from other controllers in the subsystem. We fixed this by
reparenting the namespaces to the subsystem device instead.

I think this model fits the NVMe hierarchy as good as possible.
Controllers and namespaces are considered children of the subsystem (as
they are in NVMe).

Now, the problem with namespaces not being re-attached is partly false.
If the namespaces are 'shared=on', they will be automatically attached
to any new controller attached to the subsystem. However, if they are
private, that is is not the case. In NVMe, a private namespace just
means a namespace that can only be attached to a single controller at a
time. It is not entirely unlikely that you have a private namespace that
you then reassign to controller B when controller A is removed. Now,
what we could do is track the last controller identifier that a private
namespace was attached to, and if the same controller identifier is
added to the subsystem, we could reattach the private namespace.

However, broadly, I think the current model does a pretty good job in
supporting experimentation with hotplug, multipath and failover
configurations.

signature.asc
Description: PGP signature

Re: NVME hotplug support ?

2024-01-23 Thread Klaus Jensen

On Jan 23 10:59, Damien Hedde wrote:
> Hi all,
> 
> We are currently looking into hotplugging nvme devices and it is currently 
> not possible:
> When nvme was introduced 2 years ago, the feature was disabled.
> > commit cc6fb6bc506e6c47ed604fcb7b7413dff0b7d845
> > Author: Klaus Jensen 
> > Date:   Tue Jul 6 10:48:40 2021 +0200
> >
> >hw/nvme: mark nvme-subsys non-hotpluggable
> >
> >We currently lack the infrastructure to handle subsystem hotplugging, so
> >disable it.
> 
> Do someone know what's lacking or anyone have some tips/idea of what we 
> should develop to add the support ?
> 
> Regards,
> --
> Damien 
> 

That's not entirely true.

The *subsystem* is non-hotpluggable, but individual controllers can be
hotplugged. Even into an existing subsystem.

However, you cannot hotplug pci devices unless you set up a pcie root
port. Say,

  -device "pcie-root-port,id=pcie_root_port0,chassis=1,slot=0"
  -device "nvme,id=nvme0,serial=nvme0,bus=pcie_root_port0"

nvme0 can then be removed with device_del and added back as a new device
with device_add.


signature.asc
Description: PGP signature

Re: [RFC v2 0/7] Add persistence to NVMe ZNS emulation

2024-01-09 Thread Klaus Jensen

On Nov 27 16:56, Sam Li wrote:
> ZNS emulation follows NVMe ZNS spec but the state of namespace
> zones does not persist accross restarts of QEMU. This patch makes the
> metadata of ZNS emulation persistent by using new block layer APIs and
> the qcow2 img as backing file. It is the second part after the patches
> - adding full zoned storage emulation to qcow2 driver.
> https://patchwork.kernel.org/project/qemu-devel/cover/20231127043703.49489-1-faithilike...@gmail.com/
> 
> The metadata of ZNS emulation divides into two parts, zone metadata and
> zone descriptor extension data. The zone metadata is composed of zone
> states, zone type, wp and zone attributes. The zone information can be
> stored at an uint64_t wp to save space and easy access. The structure of
> wp of each zone is as follows:
> |(4)| zone type (1)| zone attr (8)| wp (51) ||
> 
> The zone descriptor extension data is relatively small comparing to the
> overall size therefore we adopt the option that store zded of all zones
> in an array regardless of the valid bit set.
> 
> Creating a zns format qcow2 image file adds one more option zd_extension_size
> to zoned device configurations.
> 
> To attach this file as emulated zns drive in the command line of QEMU, use:
>   -drive file=${znsimg},id=nvmezns0,format=qcow2,if=none \
>   -device nvme-ns,drive=nvmezns0,bus=nvme0,nsid=1,uuid=xxx \
> 
> Sorry, send this one more time due to network problems.
> 
> v1->v2:
> - split [v1 2/5] patch to three (doc, config, block layer API)
> - adapt qcow2 v6
> 
> Sam Li (7):
>   docs/qcow2: add zd_extension_size option to the zoned format feature
>   qcow2: add zd_extension configurations to zoned metadata
>   hw/nvme: use blk_get_*() to access zone info in the block layer
>   hw/nvme: add blk_get_zone_extension to access zd_extensions
>   hw/nvme: make the metadata of ZNS emulation persistent
>   hw/nvme: refactor zone append write using block layer APIs
>   hw/nvme: make ZDED persistent
> 
>  block/block-backend.c |   88 ++
>  block/qcow2.c |  119 ++-
>  block/qcow2.h |2 +
>  docs/interop/qcow2.txt|3 +
>  hw/nvme/ctrl.c| 1247 -
>  hw/nvme/ns.c  |  162 +---
>  hw/nvme/nvme.h|   95 +--
>  include/block/block-common.h  |9 +
>  include/block/block_int-common.h  |8 +
>  include/sysemu/block-backend-io.h |   11 +
>  include/sysemu/dma.h  |3 +
>  qapi/block-core.json  |4 +
>  system/dma-helpers.c      |   17 +
>  13 files changed, 647 insertions(+), 1121 deletions(-)
> 
> -- 
> 2.40.1
> 

Hi Sam,

This is awesome. For the hw/nvme parts,

Acked-by: Klaus Jensen 

I'll give it a proper R-b when you drop the RFC status.


signature.asc
Description: PGP signature

Re: [PATCH v2 3/3] hw/nvme: Add SPDM over DOE support

2023-11-15 Thread Klaus Jensen

On Oct 17 15:21, Alistair Francis wrote:
> From: Wilfred Mallawa 
> 
> Setup Data Object Exchance (DOE) as an extended capability for the NVME
> controller and connect SPDM to it (CMA) to it.
> 
> Signed-off-by: Wilfred Mallawa 
> Signed-off-by: Alistair Francis 

Acked-by: Klaus Jensen 

I have no problem with picking this up for nvme, but I'd rather not take
the full series through my tree without reviews/acks from the pci
maintainers.

signature.asc
Description: PGP signature

Re: [RFC PATCH v3 65/78] hw/nvme: add fallthrough pseudo-keyword

2023-11-15 Thread Klaus Jensen

le *dif,
>  break;
>  }
>  
> -/* fallthrough */
> +fallthrough;
>  case NVME_ID_NS_DPS_TYPE_1:
>  case NVME_ID_NS_DPS_TYPE_2:
>  if (be16_to_cpu(dif->g16.apptag) != 0x) {
> @@ -229,7 +229,7 @@ static uint16_t nvme_dif_prchk_crc64(NvmeNamespace *ns, 
> NvmeDifTuple *dif,
>  break;
>  }
>  
> -/* fallthrough */
> +fallthrough;
>  case NVME_ID_NS_DPS_TYPE_1:
>  case NVME_ID_NS_DPS_TYPE_2:
>  if (be16_to_cpu(dif->g64.apptag) != 0x) {
> -- 
> 2.39.2
> 
> 

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

Re: [PATCH 3/3] hw/nvme: Add SPDM over DOE support

2023-10-02 Thread Klaus Jensen

On Sep 15 21:27, Alistair Francis wrote:
> From: Wilfred Mallawa 
> 
> Setup Data Object Exchance (DOE) as an extended capability for the NVME
> controller and connect SPDM to it (CMA) to it.
> 
> Signed-off-by: Wilfred Mallawa 
> Signed-off-by: Alistair Francis 
> ---
>  docs/specs/index.rst|  1 +
>  docs/specs/spdm.rst | 56 +
>  include/hw/pci/pci_device.h |  5 
>  include/hw/pci/pcie_doe.h   |  3 ++
>  hw/nvme/ctrl.c  | 52 ++
>  hw/nvme/trace-events|  1 +
>  6 files changed, 118 insertions(+)
>  create mode 100644 docs/specs/spdm.rst
> 

This looks reasonable enough, but could this not be implemented at the
PCI layer? I do not see anything that is tied specifically to the nvme
device, why can the spdm parameter not be a PCIDevice parameter such
that all PCIDevice-derived devices gains this functionality?

signature.asc
Description: PGP signature

[PATCH] hw/nvme: Clean up local variable shadowing in nvme_ns_init()

2023-09-25 Thread Klaus Jensen

From: Klaus Jensen 

Fix local variable shadowing in nvme_ns_init().

Reported-by: Markus Armbruster 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ns.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 44aba8f4d9cf..0eabcf5cf500 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -107,7 +107,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 
 ns->pif = ns->params.pif;
 
-static const NvmeLBAF lbaf[16] = {
+static const NvmeLBAF defaults[16] = {
 [0] = { .ds =  9   },
 [1] = { .ds =  9, .ms =  8 },
 [2] = { .ds =  9, .ms = 16 },
@@ -120,7 +120,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 
 ns->nlbaf = 8;
 
-memcpy(_ns->lbaf, , sizeof(lbaf));
+memcpy(_ns->lbaf, , sizeof(defaults));
 
 for (i = 0; i < ns->nlbaf; i++) {
 NvmeLBAF *lbaf = _ns->lbaf[i];

---
base-commit: b55e4b9c0525560577384adfc6d30eb0daa8d7be
change-id: 20230925-fix-local-shadowing-9606793e8ae9

Best regards,
-- 
Klaus Jensen

Re: [PATCH v6 0/3] hw/{i2c,nvme}: mctp endpoint, nvme management interface model

2023-09-20 Thread Klaus Jensen

On Sep 20 07:54, Corey Minyard wrote:
> On Wed, Sep 20, 2023 at 12:48:03PM +0100, Jonathan Cameron via wrote:
> > On Thu, 14 Sep 2023 11:53:40 +0200
> > Klaus Jensen  wrote:
> > 
> > > This adds a generic MCTP endpoint model that other devices may derive
> > > from.
> > > 
> > > Also included is a very basic implementation of an NVMe-MI device,
> > > supporting only a small subset of the required commands.
> > > 
> > > Since this all relies on i2c target mode, this can currently only be
> > > used with an SoC that includes the Aspeed I2C controller.
> > > 
> > > The easiest way to get up and running with this, is to grab my buildroot
> > > overlay[1] (aspeed_ast2600evb_nmi_defconfig). It includes modified a
> > > modified dts as well as a couple of required packages.
> > > 
> > > QEMU can then be launched along these lines:
> > > 
> > >   qemu-system-arm \
> > > -nographic \
> > > -M ast2600-evb \
> > > -kernel output/images/zImage \
> > > -initrd output/images/rootfs.cpio \
> > > -dtb output/images/aspeed-ast2600-evb-nmi.dtb \
> > > -nic user,hostfwd=tcp::-:22 \
> > > -device nmi-i2c,address=0x3a \
> > > -serial mon:stdio
> > > 
> > > From within the booted system,
> > > 
> > >   mctp addr add 8 dev mctpi2c15
> > >   mctp link set mctpi2c15 up
> > >   mctp route add 9 via mctpi2c15
> > >   mctp neigh add 9 dev mctpi2c15 lladdr 0x3a
> > >   mi-mctp 1 9 info
> > > 
> > > Comments are very welcome!
> > > 
> > >   [1]: https://github.com/birkelund/hwtests/tree/main/br2-external
> > > 
> > > Signed-off-by: Klaus Jensen 
> > 
> > Hi Klaus,
> > 
> > Silly question, but who is likely to pick this up? + likely to be soon?
> > 
> > I'm going to post the CXL stuff that makes use of the core support shortly
> > and whilst I can point at this patch set on list, I'd keen to see it 
> > upstream
> > to reduce the dependencies (it's got 2 sets ahead of it of CXL stuff
> > anyway but that will all hopefully go through Michael Tsirkin's tree
> > for PCI stuff in one go).
> 
> I can pick it up, but he can just request a merge, too.
> 
> I did have a question I asked earlier about tests.  It would be unusual
> at this point to add something like this without having some tests,
> especially injecting invalid data.
> 

Hi all,

Sorry for the late reply. I'm currently at SDC, but I will write up some
tests when I get back to in the office on Monday.

Corey, what kinds of tests would be best here? Avocado "acceptance"
tests or would you like to see something lower level?


Cheers,
Klaus


signature.asc
Description: PGP signature

[PATCH v6 3/3] hw/nvme: add nvme management interface model

2023-09-14 Thread Klaus Jensen

From: Klaus Jensen 

Add the 'nmi-i2c' device that emulates an NVMe Management Interface
controller.

Initial support is very basic (Read NMI DS, Configuration Get).

This is based on previously posted code by Padmakar Kalghatgi, Arun
Kumar Agasar and Saurav Kumar.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/Kconfig  |   4 +
 hw/nvme/meson.build  |   1 +
 hw/nvme/nmi-i2c.c| 407 +++
 hw/nvme/trace-events |   6 +
 4 files changed, 418 insertions(+)

diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
index cfa2ab0f9d5a..e1f6360c0f4b 100644
--- a/hw/nvme/Kconfig
+++ b/hw/nvme/Kconfig
@@ -2,3 +2,7 @@ config NVME_PCI
 bool
 default y if PCI_DEVICES || PCIE_DEVICES
 depends on PCI
+
+config NVME_NMI_I2C
+bool
+default y if I2C_MCTP
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
index 1a6a2ca2f307..7bc85f31c149 100644
--- a/hw/nvme/meson.build
+++ b/hw/nvme/meson.build
@@ -1 +1,2 @@
 system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c'))
+system_ss.add(when: 'CONFIG_NVME_NMI_I2C', if_true: files('nmi-i2c.c'))
diff --git a/hw/nvme/nmi-i2c.c b/hw/nvme/nmi-i2c.c
new file mode 100644
index ..bf4648db0457
--- /dev/null
+++ b/hw/nvme/nmi-i2c.c
@@ -0,0 +1,407 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ *
+ * SPDX-FileContributor: Padmakar Kalghatgi 
+ * SPDX-FileContributor: Arun Kumar Agasar 
+ * SPDX-FileContributor: Saurav Kumar 
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/crc32c.h"
+#include "hw/registerfields.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+#include "trace.h"
+
+/* NVM Express Management Interface 1.2c, Section 3.1 */
+#define NMI_MAX_MESSAGE_LENGTH 4224
+
+#define TYPE_NMI_I2C_DEVICE "nmi-i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(NMIDevice, NMI_I2C_DEVICE)
+
+typedef struct NMIDevice {
+MCTPI2CEndpoint mctp;
+
+uint8_t buffer[NMI_MAX_MESSAGE_LENGTH];
+uint8_t scratch[NMI_MAX_MESSAGE_LENGTH];
+
+size_t  len;
+int64_t pos;
+} NMIDevice;
+
+FIELD(NMI_MCTPD, MT, 0, 7)
+FIELD(NMI_MCTPD, IC, 7, 1)
+
+#define NMI_MCTPD_MT_NMI 0x4
+#define NMI_MCTPD_IC_ENABLED 0x1
+
+FIELD(NMI_NMP, ROR, 7, 1)
+FIELD(NMI_NMP, NMIMT, 3, 4)
+
+#define NMI_NMP_NMIMT_NVME_MI 0x1
+#define NMI_NMP_NMIMT_NVME_ADMIN 0x2
+
+typedef struct NMIMessage {
+uint8_t mctpd;
+uint8_t nmp;
+uint8_t rsvd2[2];
+uint8_t payload[]; /* includes the Message Integrity Check */
+} NMIMessage;
+
+typedef struct NMIRequest {
+   uint8_t opc;
+   uint8_t rsvd1[3];
+   uint32_t dw0;
+   uint32_t dw1;
+   uint32_t mic;
+} NMIRequest;
+
+FIELD(NMI_CMD_READ_NMI_DS_DW0, DTYP, 24, 8)
+
+typedef enum NMIReadDSType {
+NMI_CMD_READ_NMI_DS_SUBSYSTEM   = 0x0,
+NMI_CMD_READ_NMI_DS_PORTS   = 0x1,
+NMI_CMD_READ_NMI_DS_CTRL_LIST   = 0x2,
+NMI_CMD_READ_NMI_DS_CTRL_INFO   = 0x3,
+NMI_CMD_READ_NMI_DS_OPT_CMD_SUPPORT = 0x4,
+NMI_CMD_READ_NMI_DS_MEB_CMD_SUPPORT = 0x5,
+} NMIReadDSType;
+
+#define NMI_STATUS_INVALID_PARAMETER 0x4
+
+static void nmi_scratch_append(NMIDevice *nmi, const void *buf, size_t count)
+{
+assert(nmi->pos + count <= NMI_MAX_MESSAGE_LENGTH);
+
+memcpy(nmi->scratch + nmi->pos, buf, count);
+nmi->pos += count;
+}
+
+static void nmi_set_parameter_error(NMIDevice *nmi, uint8_t bit, uint16_t byte)
+{
+/* NVM Express Management Interface 1.2c, Figure 30 */
+struct resp {
+uint8_t  status;
+uint8_t  bit;
+uint16_t byte;
+};
+
+struct resp buf = {
+.status = NMI_STATUS_INVALID_PARAMETER,
+.bit = bit & 0x3,
+.byte = byte,
+};
+
+nmi_scratch_append(nmi, , sizeof(buf));
+}
+
+static void nmi_set_error(NMIDevice *nmi, uint8_t status)
+{
+const uint8_t buf[4] = {status,};
+
+nmi_scratch_append(nmi, buf, sizeof(buf));
+}
+
+static void nmi_handle_mi_read_nmi_ds(NMIDevice *nmi, NMIRequest *request)
+{
+I2CSlave *i2c = I2C_SLAVE(nmi);
+
+uint32_t dw0 = le32_to_cpu(request->dw0);
+uint8_t dtyp = FIELD_EX32(dw0, NMI_CMD_READ_NMI_DS_DW0, DTYP);
+
+trace_nmi_handle_mi_read_nmi_ds(dtyp);
+
+static const uint8_t nmi_ds_subsystem[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x00,   /* number of ports */
+0x01,   /* major version */
+0x01,   /* minor version */
+};
+
+/*
+ * Cannot be static (or const) since we need to patch in the i2c
+ * address.
+ */
+const uint8_t nmi_ds_ports[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x02,   /* por

[PATCH v6 2/3] hw/i2c: add mctp core

2023-09-14 Thread Klaus Jensen

From: Klaus Jensen 

Add an abstract MCTP over I2C endpoint model. This implements MCTP
control message handling as well as handling the actual I2C transport
(packetization).

Devices are intended to derive from this and implement the class
methods.

Parts of this implementation is inspired by code[1] previously posted by
Jonathan Cameron.

Squashed a fix[2] from Matt Johnston.

  [1]: 
https://lore.kernel.org/qemu-devel/20220520170128.4436-1-jonathan.came...@huawei.com/
  [2]: 
https://lore.kernel.org/qemu-devel/20221121080445.ga29...@codeconstruct.com.au/

Tested-by: Jonathan Cameron 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 432 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/trace-events   |  13 ++
 include/hw/i2c/mctp.h | 125 +++
 include/net/mctp.h|  35 
 8 files changed, 618 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 00562f924f7a..3208ebb1bcde 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3404,6 +3404,13 @@ F: tests/qtest/adm1272-test.c
 F: tests/qtest/max34451-test.c
 F: tests/qtest/isl_pmbus_vr-test.c
 
+MCTP I2C Transport
+M: Klaus Jensen 
+S: Maintained
+F: hw/i2c/mctp.c
+F: include/hw/i2c/mctp.h
+F: include/net/mctp.h
+
 Firmware schema specifications
 M: Philippe Mathieu-Daudé 
 R: Daniel P. Berrange 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 7e6834844051..5bcb1e0e8a6f 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -541,6 +541,7 @@ config ASPEED_SOC
 select DS1338
 select FTGMAC100
 select I2C
+select I2C_MCTP
 select DPS310
 select PCA9552
 select SERIAL
diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig
index 14886b35dac2..2b2a50b83d1e 100644
--- a/hw/i2c/Kconfig
+++ b/hw/i2c/Kconfig
@@ -6,6 +6,10 @@ config I2C_DEVICES
 # to any board's i2c bus
 bool
 
+config I2C_MCTP
+bool
+select I2C
+
 config SMBUS
 bool
 select I2C
diff --git a/hw/i2c/mctp.c b/hw/i2c/mctp.c
new file mode 100644
index ..8d8e74567745
--- /dev/null
+++ b/hw/i2c/mctp.c
@@ -0,0 +1,432 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/smbus_master.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+
+#include "trace.h"
+
+/* DSP0237 1.2.0, Figure 1 */
+typedef struct MCTPI2CPacketHeader {
+uint8_t dest;
+#define MCTP_I2C_COMMAND_CODE 0xf
+uint8_t command_code;
+uint8_t byte_count;
+uint8_t source;
+} MCTPI2CPacketHeader;
+
+typedef struct MCTPI2CPacket {
+MCTPI2CPacketHeader i2c;
+MCTPPacket  mctp;
+} MCTPI2CPacket;
+
+#define i2c_mctp_payload_offset offsetof(MCTPI2CPacket, mctp.payload)
+#define i2c_mctp_payload(buf) (buf + i2c_mctp_payload_offset)
+
+/* DSP0236 1.3.0, Figure 20 */
+typedef struct MCTPControlMessage {
+#define MCTP_MESSAGE_TYPE_CONTROL 0x0
+uint8_t type;
+#define MCTP_CONTROL_FLAGS_RQ   (1 << 7)
+#define MCTP_CONTROL_FLAGS_D(1 << 6)
+uint8_t flags;
+uint8_t command_code;
+uint8_t data[];
+} MCTPControlMessage;
+
+enum MCTPControlCommandCodes {
+MCTP_CONTROL_SET_EID= 0x01,
+MCTP_CONTROL_GET_EID= 0x02,
+MCTP_CONTROL_GET_VERSION= 0x04,
+MCTP_CONTROL_GET_MESSAGE_TYPE_SUPPORT   = 0x05,
+};
+
+#define MCTP_CONTROL_ERROR_UNSUPPORTED_CMD 0x5
+
+#define i2c_mctp_control_data_offset \
+(i2c_mctp_payload_offset + offsetof(MCTPControlMessage, data))
+#define i2c_mctp_control_data(buf) (buf + i2c_mctp_control_data_offset)
+
+/**
+ * The byte count field in the SMBUS Block Write containers the number of bytes
+ * *following* the field itself.
+ *
+ * This is at least 5.
+ *
+ * 1 byte for the MCTP/I2C piggy-backed I2C source address in addition to the
+ * size of the MCTP transport/packet header.
+ */
+#define MCTP_I2C_BYTE_COUNT_OFFSET (sizeof(MCTPPacketHeader) + 1)
+
+void i2c_mctp_schedule_send(MCTPI2CEndpoint *mctp)
+{
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(DEVICE(mctp)));
+
+mctp->tx.state = I2C_MCTP_STATE_TX_START_SEND;
+
+i2c_bus_master(i2c, mctp->tx.bh);
+}
+
+static void i2c_mctp_tx(void *opaque)
+{
+DeviceState *dev = DEVICE(opaque);
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(dev));
+I2CSlave *slave = I2C_SLAVE(dev);
+MCTPI2CEndpoint *mctp = MCTP_I2C_ENDPOINT(dev);
+MCTPI2CEndpointClass *mc = MCTP_I2C_ENDPOINT_GET_CLASS(mctp);
+MCTPI2CPacket *pkt = (MCTPI2CPacket *)mctp->buffer;
+uint8_t flags = 0;
+
+switch (mctp->tx.state) {
+case I2C_MCTP_STAT

[PATCH v6 0/3] hw/{i2c,nvme}: mctp endpoint, nvme management interface model

2023-09-14 Thread Klaus Jensen

This adds a generic MCTP endpoint model that other devices may derive
from.

Also included is a very basic implementation of an NVMe-MI device,
supporting only a small subset of the required commands.

Since this all relies on i2c target mode, this can currently only be
used with an SoC that includes the Aspeed I2C controller.

The easiest way to get up and running with this, is to grab my buildroot
overlay[1] (aspeed_ast2600evb_nmi_defconfig). It includes modified a
modified dts as well as a couple of required packages.

QEMU can then be launched along these lines:

  qemu-system-arm \
-nographic \
-M ast2600-evb \
-kernel output/images/zImage \
-initrd output/images/rootfs.cpio \
-dtb output/images/aspeed-ast2600-evb-nmi.dtb \
-nic user,hostfwd=tcp::-:22 \
-device nmi-i2c,address=0x3a \
-serial mon:stdio

>From within the booted system,

  mctp addr add 8 dev mctpi2c15
  mctp link set mctpi2c15 up
  mctp route add 9 via mctpi2c15
  mctp neigh add 9 dev mctpi2c15 lladdr 0x3a
  mi-mctp 1 9 info

Comments are very welcome!

  [1]: https://github.com/birkelund/hwtests/tree/main/br2-external

Signed-off-by: Klaus Jensen 
---
Changes in v6:
- Use nmi_scratch_append() directly where it makes sense. Fixes bug
  observed by Andrew.
- Link to v5: 
https://lore.kernel.org/r/20230905-nmi-i2c-v5-0-0001d372a...@samsung.com

Changes in v5:
- Added a nmi_scratch_append() that asserts available space in the
  scratch buffer. This is a similar defensive strategy as used in
  hw/i2c/mctp.c
- Various small fixups in response to review (Jonathan)
- Link to v4: 
https://lore.kernel.org/r/20230823-nmi-i2c-v4-0-2b0f86e5b...@samsung.com

---
Klaus Jensen (3):
  hw/i2c: add smbus pec utility function
  hw/i2c: add mctp core
  hw/nvme: add nvme management interface model

 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 432 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/smbus_master.c |  26 +++
 hw/i2c/trace-events   |  13 ++
 hw/nvme/Kconfig   |   4 +
 hw/nvme/meson.build   |   1 +
 hw/nvme/nmi-i2c.c | 407 +++
 hw/nvme/trace-events  |   6 +
 include/hw/i2c/mctp.h | 125 
 include/hw/i2c/smbus_master.h |   2 +
 include/net/mctp.h|  35 
 14 files changed, 1064 insertions(+)
---
base-commit: 005ad32358f12fe9313a4a01918a55e60d4f39e5
change-id: 20230822-nmi-i2c-d804ed5be7e6

Best regards,
-- 
Klaus Jensen

[PATCH v6 1/3] hw/i2c: add smbus pec utility function

2023-09-14 Thread Klaus Jensen

From: Klaus Jensen 

Add i2c_smbus_pec() to calculate the SMBus Packet Error Code for a
message.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 hw/i2c/smbus_master.c | 26 ++
 include/hw/i2c/smbus_master.h |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c
index 6a53c34e70b7..01a8e4700222 100644
--- a/hw/i2c/smbus_master.c
+++ b/hw/i2c/smbus_master.c
@@ -15,6 +15,32 @@
 #include "hw/i2c/i2c.h"
 #include "hw/i2c/smbus_master.h"
 
+static uint8_t crc8(uint16_t data)
+{
+int i;
+
+for (i = 0; i < 8; i++) {
+if (data & 0x8000) {
+data ^= 0x1070U << 3;
+}
+
+data <<= 1;
+}
+
+return (uint8_t)(data >> 8);
+}
+
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len)
+{
+int i;
+
+for (i = 0; i < len; i++) {
+crc = crc8((crc ^ buf[i]) << 8);
+}
+
+return crc;
+}
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read)
 {
diff --git a/include/hw/i2c/smbus_master.h b/include/hw/i2c/smbus_master.h
index bb13bc423c22..d90f81767d86 100644
--- a/include/hw/i2c/smbus_master.h
+++ b/include/hw/i2c/smbus_master.h
@@ -27,6 +27,8 @@
 
 #include "hw/i2c/i2c.h"
 
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len);
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read);
 int smbus_receive_byte(I2CBus *bus, uint8_t addr);

-- 
2.42.0

Re: [PATCH v5 3/3] hw/nvme: add nvme management interface model

2023-09-14 Thread Klaus Jensen

On Sep 12 13:50, Andrew Jeffery wrote:
> Hi Klaus,
> 
> On Tue, 2023-09-05 at 10:38 +0200, Klaus Jensen wrote:
> > > 
> > > +static void nmi_handle_mi_config_get(NMIDevice *nmi, NMIRequest
> > > *request)
> > > +{
> > > +    uint32_t dw0 = le32_to_cpu(request->dw0);
> > > +    uint8_t identifier = FIELD_EX32(dw0,
> > > NMI_CMD_CONFIGURATION_GET_DW0,
> > > +    IDENTIFIER);
> > > +    const uint8_t *buf;
> > > +
> > > +    static const uint8_t smbus_freq[4] = {
> > > +    0x00,   /* success */
> > > +    0x01, 0x00, 0x00,   /* 100 kHz */
> > > +    };
> > > +
> > > +    static const uint8_t mtu[4] = {
> > > +    0x00,   /* success */
> > > +    0x40, 0x00, /* 64 */
> > > +    0x00,   /* reserved */
> > > +    };
> > > +
> > > +    trace_nmi_handle_mi_config_get(identifier);
> > > +
> > > +    switch (identifier) {
> > > +    case NMI_CMD_CONFIGURATION_GET_SMBUS_FREQ:
> > > +    buf = smbus_freq;
> > > +    break;
> > > +
> > > +    case NMI_CMD_CONFIGURATION_GET_MCTP_TRANSMISSION_UNIT:
> > > +    buf = mtu;
> > > +    break;
> > > +
> > > +    default:
> > > +    nmi_set_parameter_error(nmi, 0x0, offsetof(NMIRequest,
> > > dw0));
> > > +    return;
> > > +    }
> > > +
> > > +    nmi_scratch_append(nmi, buf, sizeof(buf));
> > > +}
> 
> When I tried to build this patch I got:
> 
> ```
> In file included from /usr/include/string.h:535,
>  from /home/andrew/src/qemu.org/qemu/include/qemu/osdep.h:112,
>  from ../hw/nvme/nmi-i2c.c:12:
> In function ‘memcpy’,
> inlined from ‘nmi_scratch_append’ at ../hw/nvme/nmi-i2c.c:80:5,
> inlined from ‘nmi_handle_mi_config_get’ at ../hw/nvme/nmi-i2c.c:246:5,
> inlined from ‘nmi_handle_mi’ at ../hw/nvme/nmi-i2c.c:266:9,
> inlined from ‘nmi_handle’ at ../hw/nvme/nmi-i2c.c:313:9:
> /usr/include/x86_64-linux-gnu/bits/string_fortified.h:29:10: error: 
> ‘__builtin_memcpy’ forming offset [4, 7] is out of the bounds [0, 4] 
> [-Werror=array-bounds=]
>29 |   return __builtin___memcpy_chk (__dest, __src, __len,
>   |  ^
>30 |  __glibc_objsize0 (__dest));
>   |  ~~
> ```
> 
> It wasn't clear initially from the error that the source of the problem
> was the size associated with the source buffer, especially as there is
> some pointer arithmetic being done to derive `__dest`.
> 
> Anyway, what we're trying to express is that the size to copy from buf
> is the size of the array pointed to by buf. However, buf is declared as
> a pointer to uint8_t, which loses the length information. To fix that I
> think we need:
> 
> - const uint8_t *buf;
> + const uint8_t (*buf)[4];
> 
> and then:
> 
> - nmi_scratch_append(nmi, buf, sizeof(buf));
> + nmi_scratch_append(nmi, buf, sizeof(*buf));
> 
> Andrew
> 

Hi Andrew,

Nice (and important) catch! Just curious, are you massaging QEMU's build
system into adding additional checks or how did your compiler catch
this?

Thanks,
Klaus


signature.asc
Description: PGP signature

Re: [PATCH v3 1/5] block: remove AIOCBInfo->get_aio_context()

2023-09-14 Thread Klaus Jensen

On Sep 12 19:10, Stefan Hajnoczi wrote:
> The synchronous bdrv_aio_cancel() function needs the acb's AioContext so
> it can call aio_poll() to wait for cancellation.
> 
> It turns out that all users run under the BQL in the main AioContext, so
> this callback is not needed.
> 
> Remove the callback, mark bdrv_aio_cancel() GLOBAL_STATE_CODE just like
> its blk_aio_cancel() caller, and poll the main loop AioContext.
> 
> The purpose of this cleanup is to identify bdrv_aio_cancel() as an API
> that does not work with the multi-queue block layer.
> 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  include/block/aio.h|  1 -
>  include/block/block-global-state.h |  2 ++
>  include/block/block-io.h   |  1 -
>  block/block-backend.c  | 17 -
>  block/io.c | 23 ---
>  hw/nvme/ctrl.c |  7 ---
>  softmmu/dma-helpers.c  |  8 
>  util/thread-pool.c |  8 
>  8 files changed, 10 insertions(+), 57 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 539d273553..ee7273daa1 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -2130,11 +2130,6 @@ static inline bool nvme_is_write(NvmeRequest *req)
> rw->opcode == NVME_CMD_WRITE_ZEROES;
>  }
>  
> -static AioContext *nvme_get_aio_context(BlockAIOCB *acb)
> -{
> -return qemu_get_aio_context();
> -}
> -
>  static void nvme_misc_cb(void *opaque, int ret)
>  {
>  NvmeRequest *req = opaque;
> @@ -3302,7 +3297,6 @@ static void nvme_flush_cancel(BlockAIOCB *acb)
>  static const AIOCBInfo nvme_flush_aiocb_info = {
>  .aiocb_size = sizeof(NvmeFlushAIOCB),
>  .cancel_async = nvme_flush_cancel,
> -.get_aio_context = nvme_get_aio_context,
>  };
>  
>  static void nvme_do_flush(NvmeFlushAIOCB *iocb);
> @@ -6478,7 +6472,6 @@ static void nvme_format_cancel(BlockAIOCB *aiocb)
>  static const AIOCBInfo nvme_format_aiocb_info = {
>  .aiocb_size = sizeof(NvmeFormatAIOCB),
>  .cancel_async = nvme_format_cancel,
> -.get_aio_context = nvme_get_aio_context,
>  };
>  
>  static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

[PULL 0/2] hw/nvme: updates

2023-09-12 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

The following changes since commit 9ef497755afc252fb8e060c9ea6b0987abfd20b6:

  Merge tag 'pull-vfio-20230911' of https://github.com/legoater/qemu into 
staging (2023-09-11 09:13:08 -0400)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request

for you to fetch changes up to b3c8246750b7077add335559341268f2956f6470:

  hw/nvme: Avoid dynamic stack allocation (2023-09-12 16:17:05 +0200)


hw/nvme updates

Two fixes for dynamic array allocation.
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmUAc8AACgkQTeGvMW1P
DelwhQgAxD7imw85V89Dz58LgrFoq5XZz2cq6Q5BsudyZd8FW5r7lOn9c1i0Yu2x
iiP93FX0b5LPQ9/8/liz3oHu1HZ7+hX+VeDZSQ1/bugfXM/eDSPA7lf7GG1np312
9lKRs8o+T4Di7v93kdiEi6G3b0jQSmZ722aMa54isk58hy1mcUTnGxvPZpVZutTP
lYhwuElQIsnnKXB0jaRlpcDkpXdHJ1wwziaYLM7pus+tElMiSkFP05j2pX9iigKu
7g+Hs+DaqrOzdoF/6uu72IKygq3/5H8iou1No/7OICWbFti5Qhhra0OKQE6nrlKd
51fnWA6VjpO5g9+diwRRYbjEiOrkqQ==
=wn4B
-END PGP SIGNATURE-



Peter Maydell (1):
  hw/nvme: Avoid dynamic stack allocation

Philippe Mathieu-Daudé (1):
  hw/nvme: Use #define to avoid variable length array

 hw/nvme/ctrl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

-- 
2.42.0

[PULL 2/2] hw/nvme: Avoid dynamic stack allocation

2023-09-12 Thread Klaus Jensen

From: Peter Maydell 

Instead of using a variable-length array in nvme_map_prp(),
allocate on the stack with a g_autofree pointer.

The codebase has very few VLAs, and if we can get rid of them all we
can make the compiler error on new additions.  This is a defensive
measure against security bugs where an on-stack dynamic allocation
isn't correctly size-checked (e.g.  CVE-2021-3527).

Signed-off-by: Peter Maydell 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d99a6f5c9a2e..90687b168ae1 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -894,7 +894,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, 
uint64_t prp1,
 len -= trans_len;
 if (len) {
 if (len > n->page_size) {
-uint64_t prp_list[n->max_prp_ents];
+g_autofree uint64_t *prp_list = g_new(uint64_t, n->max_prp_ents);
 uint32_t nents, prp_trans;
 int i = 0;
 
-- 
2.42.0

[PULL 1/2] hw/nvme: Use #define to avoid variable length array

2023-09-12 Thread Klaus Jensen

From: Philippe Mathieu-Daudé 

In nvme_map_sgl() we create an array segment[] whose size is the
'const int SEG_CHUNK_SIZE'.  Since this is C, rather than C++, a
"const int foo" is not a true constant, it's merely a variable with a
constant value, and so semantically segment[] is a variable-length
array.  Switch SEG_CHUNK_SIZE to a #define so that we can make the
segment[] array truly fixed-size, in the sense that it doesn't
trigger the -Wvla warning.

The codebase has very few VLAs, and if we can get rid of them all we
can make the compiler error on new additions.  This is a defensive
measure against security bugs where an on-stack dynamic allocation
isn't correctly size-checked (e.g.  CVE-2021-3527).

[PMM: rebased (function has moved file), expand commit message
 based on discussion from previous version of patch]

Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Peter Maydell 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 539d27355313..d99a6f5c9a2e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1045,7 +1045,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, 
NvmeSglDescriptor sgl,
  * descriptors and segment chain) than the command transfer size, so it is
  * not bounded by MDTS.
  */
-const int SEG_CHUNK_SIZE = 256;
+#define SEG_CHUNK_SIZE 256
 
 NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
 uint64_t nsgld;
-- 
2.42.0

Re: [PATCH 0/2] nvme: avoid dynamic stack allocations

2023-09-12 Thread Klaus Jensen

On Sep 12 15:15, Peter Maydell wrote:
> On Mon, 14 Aug 2023 at 08:09, Klaus Jensen  wrote:
> >
> > On Aug 11 18:47, Peter Maydell wrote:
> > > The QEMU codebase has very few C variable length arrays, and if we can
> > > get rid of them all we can make the compiler error on new additions.
> > > This is a defensive measure against security bugs where an on-stack
> > > dynamic allocation isn't correctly size-checked (e.g.  CVE-2021-3527).
> > >
> > > We last had a go at this a few years ago, when Philippe wrote
> > > patches for this:
> > > https://patchew.org/QEMU/20210505211047.1496765-1-phi...@redhat.com/
> > > Some of the fixes made it into the tree, but some didn't (either
> > > because of lack of review or because review found some changes
> > > that needed to be made). I'm going through the remainder as a
> > > non-urgent Friday afternoon task...
> > >
> > > This patchset deals with two VLAs in the NVME code.
> > >
> > > thanks
> > > -- PMM
> > >
> > > Peter Maydell (1):
> > >   hw/nvme: Avoid dynamic stack allocation
> > >
> > > Philippe Mathieu-Daudé (1):
> > >   hw/nvme: Use #define to avoid variable length array
> > >
> > >  hw/nvme/ctrl.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > --
> > > 2.34.1
> > >
> >
> > Thanks Peter,
> >
> > Applied to nvme-next!
> 
> 
> Hi Klaus -- did these patches get lost? They don't seem to
> have appeared in master yet.
> 
> thanks
> -- PMM

Oh. I never sent the pull - I'll do that right away! Thanks for the
reminder!


signature.asc
Description: PGP signature

[PATCH v5 2/3] hw/i2c: add mctp core

2023-09-05 Thread Klaus Jensen

From: Klaus Jensen 

Add an abstract MCTP over I2C endpoint model. This implements MCTP
control message handling as well as handling the actual I2C transport
(packetization).

Devices are intended to derive from this and implement the class
methods.

Parts of this implementation is inspired by code[1] previously posted by
Jonathan Cameron.

Squashed a fix[2] from Matt Johnston.

  [1]: 
https://lore.kernel.org/qemu-devel/20220520170128.4436-1-jonathan.came...@huawei.com/
  [2]: 
https://lore.kernel.org/qemu-devel/20221121080445.ga29...@codeconstruct.com.au/

Tested-by: Jonathan Cameron 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 432 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/trace-events   |  13 ++
 include/hw/i2c/mctp.h | 125 +++
 include/net/mctp.h|  35 
 8 files changed, 618 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6111b6b4d928..8ca71167607d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3395,6 +3395,13 @@ F: tests/qtest/adm1272-test.c
 F: tests/qtest/max34451-test.c
 F: tests/qtest/isl_pmbus_vr-test.c
 
+MCTP I2C Transport
+M: Klaus Jensen 
+S: Maintained
+F: hw/i2c/mctp.c
+F: include/hw/i2c/mctp.h
+F: include/net/mctp.h
+
 Firmware schema specifications
 M: Philippe Mathieu-Daudé 
 R: Daniel P. Berrange 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 7e6834844051..5bcb1e0e8a6f 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -541,6 +541,7 @@ config ASPEED_SOC
 select DS1338
 select FTGMAC100
 select I2C
+select I2C_MCTP
 select DPS310
 select PCA9552
 select SERIAL
diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig
index 14886b35dac2..2b2a50b83d1e 100644
--- a/hw/i2c/Kconfig
+++ b/hw/i2c/Kconfig
@@ -6,6 +6,10 @@ config I2C_DEVICES
 # to any board's i2c bus
 bool
 
+config I2C_MCTP
+bool
+select I2C
+
 config SMBUS
 bool
 select I2C
diff --git a/hw/i2c/mctp.c b/hw/i2c/mctp.c
new file mode 100644
index ..8d8e74567745
--- /dev/null
+++ b/hw/i2c/mctp.c
@@ -0,0 +1,432 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/smbus_master.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+
+#include "trace.h"
+
+/* DSP0237 1.2.0, Figure 1 */
+typedef struct MCTPI2CPacketHeader {
+uint8_t dest;
+#define MCTP_I2C_COMMAND_CODE 0xf
+uint8_t command_code;
+uint8_t byte_count;
+uint8_t source;
+} MCTPI2CPacketHeader;
+
+typedef struct MCTPI2CPacket {
+MCTPI2CPacketHeader i2c;
+MCTPPacket  mctp;
+} MCTPI2CPacket;
+
+#define i2c_mctp_payload_offset offsetof(MCTPI2CPacket, mctp.payload)
+#define i2c_mctp_payload(buf) (buf + i2c_mctp_payload_offset)
+
+/* DSP0236 1.3.0, Figure 20 */
+typedef struct MCTPControlMessage {
+#define MCTP_MESSAGE_TYPE_CONTROL 0x0
+uint8_t type;
+#define MCTP_CONTROL_FLAGS_RQ   (1 << 7)
+#define MCTP_CONTROL_FLAGS_D(1 << 6)
+uint8_t flags;
+uint8_t command_code;
+uint8_t data[];
+} MCTPControlMessage;
+
+enum MCTPControlCommandCodes {
+MCTP_CONTROL_SET_EID= 0x01,
+MCTP_CONTROL_GET_EID= 0x02,
+MCTP_CONTROL_GET_VERSION= 0x04,
+MCTP_CONTROL_GET_MESSAGE_TYPE_SUPPORT   = 0x05,
+};
+
+#define MCTP_CONTROL_ERROR_UNSUPPORTED_CMD 0x5
+
+#define i2c_mctp_control_data_offset \
+(i2c_mctp_payload_offset + offsetof(MCTPControlMessage, data))
+#define i2c_mctp_control_data(buf) (buf + i2c_mctp_control_data_offset)
+
+/**
+ * The byte count field in the SMBUS Block Write containers the number of bytes
+ * *following* the field itself.
+ *
+ * This is at least 5.
+ *
+ * 1 byte for the MCTP/I2C piggy-backed I2C source address in addition to the
+ * size of the MCTP transport/packet header.
+ */
+#define MCTP_I2C_BYTE_COUNT_OFFSET (sizeof(MCTPPacketHeader) + 1)
+
+void i2c_mctp_schedule_send(MCTPI2CEndpoint *mctp)
+{
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(DEVICE(mctp)));
+
+mctp->tx.state = I2C_MCTP_STATE_TX_START_SEND;
+
+i2c_bus_master(i2c, mctp->tx.bh);
+}
+
+static void i2c_mctp_tx(void *opaque)
+{
+DeviceState *dev = DEVICE(opaque);
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(dev));
+I2CSlave *slave = I2C_SLAVE(dev);
+MCTPI2CEndpoint *mctp = MCTP_I2C_ENDPOINT(dev);
+MCTPI2CEndpointClass *mc = MCTP_I2C_ENDPOINT_GET_CLASS(mctp);
+MCTPI2CPacket *pkt = (MCTPI2CPacket *)mctp->buffer;
+uint8_t flags = 0;
+
+switch (mctp->tx.state) {
+case I2C_MCTP_STAT

[PATCH v5 0/3] hw/{i2c,nvme}: mctp endpoint, nvme management interface model

2023-09-05 Thread Klaus Jensen

This adds a generic MCTP endpoint model that other devices may derive
from.

Also included is a very basic implementation of an NVMe-MI device,
supporting only a small subset of the required commands.

Since this all relies on i2c target mode, this can currently only be
used with an SoC that includes the Aspeed I2C controller.

The easiest way to get up and running with this, is to grab my buildroot
overlay[1] (aspeed_ast2600evb_nmi_defconfig). It includes modified a
modified dts as well as a couple of required packages.

QEMU can then be launched along these lines:

  qemu-system-arm \
-nographic \
-M ast2600-evb \
-kernel output/images/zImage \
-initrd output/images/rootfs.cpio \
-dtb output/images/aspeed-ast2600-evb-nmi.dtb \
-nic user,hostfwd=tcp::-:22 \
-device nmi-i2c,address=0x3a \
-serial mon:stdio

>From within the booted system,

  mctp addr add 8 dev mctpi2c15
  mctp link set mctpi2c15 up
  mctp route add 9 via mctpi2c15
  mctp neigh add 9 dev mctpi2c15 lladdr 0x3a
  mi-mctp 1 9 info

Comments are very welcome!

  [1]: https://github.com/birkelund/hwtests/tree/main/br2-external

Signed-off-by: Klaus Jensen 
---
Changes in v5:
- Added a nmi_scratch_append() that asserts available space in the
  scratch buffer. This is a similar defensive strategy as used in
  hw/i2c/mctp.c
- Various small fixups in response to review (Jonathan)
- Link to v4: 
https://lore.kernel.org/r/20230823-nmi-i2c-v4-0-2b0f86e5b...@samsung.com

---
Klaus Jensen (3):
  hw/i2c: add smbus pec utility function
  hw/i2c: add mctp core
  hw/nvme: add nvme management interface model

 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 432 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/smbus_master.c |  26 +++
 hw/i2c/trace-events   |  13 ++
 hw/nvme/Kconfig   |   4 +
 hw/nvme/meson.build   |   1 +
 hw/nvme/nmi-i2c.c | 424 +
 hw/nvme/trace-events  |   6 +
 include/hw/i2c/mctp.h | 125 
 include/hw/i2c/smbus_master.h |   2 +
 include/net/mctp.h|  35 
 14 files changed, 1081 insertions(+)
---
base-commit: 17780edd81d27fcfdb7a802efc870a99788bd2fc
change-id: 20230822-nmi-i2c-d804ed5be7e6

Best regards,
-- 
Klaus Jensen

[PATCH v5 1/3] hw/i2c: add smbus pec utility function

2023-09-05 Thread Klaus Jensen

From: Klaus Jensen 

Add i2c_smbus_pec() to calculate the SMBus Packet Error Code for a
message.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 hw/i2c/smbus_master.c | 26 ++
 include/hw/i2c/smbus_master.h |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c
index 6a53c34e70b7..01a8e4700222 100644
--- a/hw/i2c/smbus_master.c
+++ b/hw/i2c/smbus_master.c
@@ -15,6 +15,32 @@
 #include "hw/i2c/i2c.h"
 #include "hw/i2c/smbus_master.h"
 
+static uint8_t crc8(uint16_t data)
+{
+int i;
+
+for (i = 0; i < 8; i++) {
+if (data & 0x8000) {
+data ^= 0x1070U << 3;
+}
+
+data <<= 1;
+}
+
+return (uint8_t)(data >> 8);
+}
+
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len)
+{
+int i;
+
+for (i = 0; i < len; i++) {
+crc = crc8((crc ^ buf[i]) << 8);
+}
+
+return crc;
+}
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read)
 {
diff --git a/include/hw/i2c/smbus_master.h b/include/hw/i2c/smbus_master.h
index bb13bc423c22..d90f81767d86 100644
--- a/include/hw/i2c/smbus_master.h
+++ b/include/hw/i2c/smbus_master.h
@@ -27,6 +27,8 @@
 
 #include "hw/i2c/i2c.h"
 
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len);
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read);
 int smbus_receive_byte(I2CBus *bus, uint8_t addr);

-- 
2.42.0

[PATCH v5 3/3] hw/nvme: add nvme management interface model

2023-09-05 Thread Klaus Jensen

From: Klaus Jensen 

Add the 'nmi-i2c' device that emulates an NVMe Management Interface
controller.

Initial support is very basic (Read NMI DS, Configuration Get).

This is based on previously posted code by Padmakar Kalghatgi, Arun
Kumar Agasar and Saurav Kumar.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/Kconfig  |   4 +
 hw/nvme/meson.build  |   1 +
 hw/nvme/nmi-i2c.c| 424 +++
 hw/nvme/trace-events |   6 +
 4 files changed, 435 insertions(+)

diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
index 8ac90942e55e..1d89a4f4ecea 100644
--- a/hw/nvme/Kconfig
+++ b/hw/nvme/Kconfig
@@ -2,3 +2,7 @@ config NVME_PCI
 bool
 default y if PCI_DEVICES
 depends on PCI
+
+config NVME_NMI_I2C
+bool
+default y if I2C_MCTP
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
index 1a6a2ca2f307..7bc85f31c149 100644
--- a/hw/nvme/meson.build
+++ b/hw/nvme/meson.build
@@ -1 +1,2 @@
 system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c'))
+system_ss.add(when: 'CONFIG_NVME_NMI_I2C', if_true: files('nmi-i2c.c'))
diff --git a/hw/nvme/nmi-i2c.c b/hw/nvme/nmi-i2c.c
new file mode 100644
index ..6e0ba7bd2a37
--- /dev/null
+++ b/hw/nvme/nmi-i2c.c
@@ -0,0 +1,424 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ *
+ * SPDX-FileContributor: Padmakar Kalghatgi 
+ * SPDX-FileContributor: Arun Kumar Agasar 
+ * SPDX-FileContributor: Saurav Kumar 
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/crc32c.h"
+#include "hw/registerfields.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+#include "trace.h"
+
+/* NVM Express Management Interface 1.2c, Section 3.1 */
+#define NMI_MAX_MESSAGE_LENGTH 4224
+
+#define TYPE_NMI_I2C_DEVICE "nmi-i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(NMIDevice, NMI_I2C_DEVICE)
+
+typedef struct NMIDevice {
+MCTPI2CEndpoint mctp;
+
+uint8_t buffer[NMI_MAX_MESSAGE_LENGTH];
+uint8_t scratch[NMI_MAX_MESSAGE_LENGTH];
+
+size_t  len;
+int64_t pos;
+} NMIDevice;
+
+FIELD(NMI_MCTPD, MT, 0, 7)
+FIELD(NMI_MCTPD, IC, 7, 1)
+
+#define NMI_MCTPD_MT_NMI 0x4
+#define NMI_MCTPD_IC_ENABLED 0x1
+
+FIELD(NMI_NMP, ROR, 7, 1)
+FIELD(NMI_NMP, NMIMT, 3, 4)
+
+#define NMI_NMP_NMIMT_NVME_MI 0x1
+#define NMI_NMP_NMIMT_NVME_ADMIN 0x2
+
+typedef struct NMIMessage {
+uint8_t mctpd;
+uint8_t nmp;
+uint8_t rsvd2[2];
+uint8_t payload[]; /* includes the Message Integrity Check */
+} NMIMessage;
+
+typedef struct NMIRequest {
+   uint8_t opc;
+   uint8_t rsvd1[3];
+   uint32_t dw0;
+   uint32_t dw1;
+   uint32_t mic;
+} NMIRequest;
+
+FIELD(NMI_CMD_READ_NMI_DS_DW0, DTYP, 24, 8)
+
+typedef enum NMIReadDSType {
+NMI_CMD_READ_NMI_DS_SUBSYSTEM   = 0x0,
+NMI_CMD_READ_NMI_DS_PORTS   = 0x1,
+NMI_CMD_READ_NMI_DS_CTRL_LIST   = 0x2,
+NMI_CMD_READ_NMI_DS_CTRL_INFO   = 0x3,
+NMI_CMD_READ_NMI_DS_OPT_CMD_SUPPORT = 0x4,
+NMI_CMD_READ_NMI_DS_MEB_CMD_SUPPORT = 0x5,
+} NMIReadDSType;
+
+#define NMI_STATUS_INVALID_PARAMETER 0x4
+
+static void nmi_scratch_append(NMIDevice *nmi, const void *buf, size_t count)
+{
+assert(nmi->pos + count <= NMI_MAX_MESSAGE_LENGTH);
+
+memcpy(nmi->scratch + nmi->pos, buf, count);
+nmi->pos += count;
+}
+
+static void nmi_set_parameter_error(NMIDevice *nmi, uint8_t bit, uint16_t byte)
+{
+/* NVM Express Management Interface 1.2c, Figure 30 */
+struct resp {
+uint8_t  status;
+uint8_t  bit;
+uint16_t byte;
+};
+
+struct resp buf = {
+.status = NMI_STATUS_INVALID_PARAMETER,
+.bit = bit & 0x3,
+.byte = byte,
+};
+
+nmi_scratch_append(nmi, , sizeof(buf));
+}
+
+static void nmi_set_error(NMIDevice *nmi, uint8_t status)
+{
+const uint8_t buf[4] = {status,};
+
+nmi_scratch_append(nmi, buf, sizeof(buf));
+}
+
+static void nmi_handle_mi_read_nmi_ds(NMIDevice *nmi, NMIRequest *request)
+{
+I2CSlave *i2c = I2C_SLAVE(nmi);
+
+uint32_t dw0 = le32_to_cpu(request->dw0);
+uint8_t dtyp = FIELD_EX32(dw0, NMI_CMD_READ_NMI_DS_DW0, DTYP);
+const uint8_t *buf;
+size_t len;
+
+trace_nmi_handle_mi_read_nmi_ds(dtyp);
+
+static const uint8_t nmi_ds_subsystem[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x00,   /* number of ports */
+0x01,   /* major version */
+0x01,   /* minor version */
+};
+
+/*
+ * Cannot be static (or const) since we need to patch in the i2c
+ * address.
+ */
+const uint8_t nmi_ds_ports[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x02,   /* port typ

Re: [PATCH v4 2/3] hw/i2c: add mctp core

2023-09-04 Thread Klaus Jensen

On Aug 30 15:31, Jonathan Cameron wrote:
> On Wed, 23 Aug 2023 11:21:59 +0200
> Klaus Jensen  wrote:
> 
> > From: Klaus Jensen 
> > 
> > Add an abstract MCTP over I2C endpoint model. This implements MCTP
> > control message handling as well as handling the actual I2C transport
> > (packetization).
> > 
> > Devices are intended to derive from this and implement the class
> > methods.
> > 
> > Parts of this implementation is inspired by code[1] previously posted by
> > Jonathan Cameron.
> > 
> > Squashed a fix[2] from Matt Johnston.
> > 
> >   [1]: 
> > https://lore.kernel.org/qemu-devel/20220520170128.4436-1-jonathan.came...@huawei.com/
> >   [2]: 
> > https://lore.kernel.org/qemu-devel/20221121080445.ga29...@codeconstruct.com.au/
> > 
> > Signed-off-by: Klaus Jensen 
> 
> I made the minor changes to the CXL FM-API PoC to use this and all works as 
> expected so
> 
> Tested-by: Jonathan Cameron 
> 
> 
> Some minor things inline.  With those tidied up or ignored with clear 
> reasoning.
> 
> Reviewed-by: Jonathan Cameron 
> 
> 
> > diff --git a/hw/i2c/mctp.c b/hw/i2c/mctp.c
> > new file mode 100644
> > index ..217073d62435
> > --- /dev/null
> > +++ b/hw/i2c/mctp.c
> 
> 
> ...
> 
> 
> > +static int i2c_mctp_event_cb(I2CSlave *i2c, enum i2c_event event)
> > +{
> > +MCTPI2CEndpoint *mctp = MCTP_I2C_ENDPOINT(i2c);
> > +MCTPI2CEndpointClass *mc = MCTP_I2C_ENDPOINT_GET_CLASS(mctp);
> > +MCTPI2CPacket *pkt = (MCTPI2CPacket *)mctp->buffer;
> > +size_t payload_len;
> > +uint8_t pec, pktseq, msgtype;
> > +
> > +switch (event) {
> > +case I2C_START_SEND:
> > +if (mctp->state == I2C_MCTP_STATE_IDLE) {
> > +mctp->state = I2C_MCTP_STATE_RX_STARTED;
> > +} else if (mctp->state != I2C_MCTP_STATE_RX) {
> > +return -1;
> > +}
> > +
> > +/* the i2c core eats the slave address, so put it back in */
> > +pkt->i2c.dest = i2c->address << 1;
> > +mctp->len = 1;
> > +
> > +return 0;
> > +
> > +case I2C_FINISH:
> > +if (mctp->len < sizeof(MCTPI2CPacket) + 1) {
> > +trace_i2c_mctp_drop_short_packet(mctp->len);
> > +goto drop;
> > +}
> > +
> > +payload_len = mctp->len - (1 + offsetof(MCTPI2CPacket, 
> > mctp.payload));
> > +
> > +if (pkt->i2c.byte_count + 3 != mctp->len - 1) {
> > +trace_i2c_mctp_drop_invalid_length(pkt->i2c.byte_count + 3,
> > +   mctp->len - 1);
> > +goto drop;
> > +}
> > +
> > +pec = i2c_smbus_pec(0, mctp->buffer, mctp->len - 1);
> > +if (mctp->buffer[mctp->len - 1] != pec) {
> > +trace_i2c_mctp_drop_invalid_pec(mctp->buffer[mctp->len - 1], 
> > pec);
> > +goto drop;
> > +}
> > +
> > +if (!(pkt->mctp.hdr.eid.dest == mctp->my_eid ||
> > +  pkt->mctp.hdr.eid.dest == 0)) {
> > +trace_i2c_mctp_drop_invalid_eid(pkt->mctp.hdr.eid.dest,
> > +mctp->my_eid);
> > +goto drop;
> > +}
> > +
> > +pktseq = FIELD_EX8(pkt->mctp.hdr.flags, MCTP_H_FLAGS, PKTSEQ);
> > +
> > +if (FIELD_EX8(pkt->mctp.hdr.flags, MCTP_H_FLAGS, SOM)) {
> > +mctp->tx.is_control = false;
> > +
> > +if (mctp->state == I2C_MCTP_STATE_RX) {
> > +mc->reset(mctp);
> > +}
> > +
> > +mctp->state = I2C_MCTP_STATE_RX;
> > +
> > +mctp->tx.addr = pkt->i2c.source >> 1;
> > +mctp->tx.eid = pkt->mctp.hdr.eid.source;
> > +mctp->tx.tag = FIELD_EX8(pkt->mctp.hdr.flags, MCTP_H_FLAGS, 
> > TAG);
> > +mctp->tx.pktseq = pktseq;
> > +
> > +msgtype = FIELD_EX8(pkt->mctp.payload[0], MCTP_MESSAGE_H, 
> > TYPE);
> > +
> > +if (msgtype == MCTP_MESSAGE_TYPE_CONTROL) {
> > +mctp->tx.is_control = true;
> > +
> > +i2c_mctp_handle_control(mctp);
> > +
> > +return 0;
> > +}
> > +} else if (mctp->state == I2C_MCTP_STATE_RX_STARTED) {
> > +trace_i2c_mctp_dro

Re: [PULL 1/2] hw/nvme: fix null pointer access in directive receive

2023-08-24 Thread Klaus Jensen

On Aug 24 14:44, Philippe Mathieu-Daudé wrote:
> On 9/8/23 15:39, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > nvme_directive_receive() does not check if an endurance group has been
> > configured (set) prior to testing if flexible data placement is enabled
> > or not.
> > 
> > Fix this.
> > 
> > Cc: qemu-sta...@nongnu.org
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1815
> > Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
> > Reviewed-by: Jesper Wendel Devantier 
> > Signed-off-by: Klaus Jensen 
> > ---
> >   hw/nvme/ctrl.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > index d217ae91b506..e5b5c7034d2b 100644
> > --- a/hw/nvme/ctrl.c
> > +++ b/hw/nvme/ctrl.c
> > @@ -6900,7 +6900,7 @@ static uint16_t nvme_directive_receive(NvmeCtrl *n, 
> > NvmeRequest *req)
> >   case NVME_DIRECTIVE_IDENTIFY:
> >   switch (doper) {
> >   case NVME_DIRECTIVE_RETURN_PARAMS:
> > -if (ns->endgrp->fdp.enabled) {
> > +if (ns->endgrp && ns->endgrp->fdp.enabled) {
> 
> This patch fixes CVE-2023-40360 ("QEMU: NVMe: NULL pointer
> dereference in nvme_directive_receive"). Were you aware of
> the security implications?
> 

Yes, but I was not aware of the CVE being assigned at the time. I don't
think it was?

But if what you are saying is that it was my responsibility as
maintainer, to get that reported and assigned, then I apologies and will
of course keep that in mind going forward!


signature.asc
Description: PGP signature

[PATCH v4 3/3] hw/nvme: add nvme management interface model

2023-08-23 Thread Klaus Jensen

From: Klaus Jensen 

Add the 'nmi-i2c' device that emulates an NVMe Management Interface
controller.

Initial support is very basic (Read NMI DS, Configuration Get).

This is based on previously posted code by Padmakar Kalghatgi, Arun
Kumar Agasar and Saurav Kumar.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/Kconfig  |   4 +
 hw/nvme/meson.build  |   1 +
 hw/nvme/nmi-i2c.c| 418 +++
 hw/nvme/trace-events |   6 +
 4 files changed, 429 insertions(+)

diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
index 8ac90942e55e..1d89a4f4ecea 100644
--- a/hw/nvme/Kconfig
+++ b/hw/nvme/Kconfig
@@ -2,3 +2,7 @@ config NVME_PCI
 bool
 default y if PCI_DEVICES
 depends on PCI
+
+config NVME_NMI_I2C
+bool
+default y if I2C_MCTP
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
index 1a6a2ca2f307..7bc85f31c149 100644
--- a/hw/nvme/meson.build
+++ b/hw/nvme/meson.build
@@ -1 +1,2 @@
 system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c'))
+system_ss.add(when: 'CONFIG_NVME_NMI_I2C', if_true: files('nmi-i2c.c'))
diff --git a/hw/nvme/nmi-i2c.c b/hw/nvme/nmi-i2c.c
new file mode 100644
index ..9040ba28a87c
--- /dev/null
+++ b/hw/nvme/nmi-i2c.c
@@ -0,0 +1,418 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ *
+ * SPDX-FileContributor: Padmakar Kalghatgi 
+ * SPDX-FileContributor: Arun Kumar Agasar 
+ * SPDX-FileContributor: Saurav Kumar 
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/crc32c.h"
+#include "hw/registerfields.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+#include "trace.h"
+
+#define NMI_MAX_MESSAGE_LENGTH 4224
+
+#define TYPE_NMI_I2C_DEVICE "nmi-i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(NMIDevice, NMI_I2C_DEVICE)
+
+typedef struct NMIDevice {
+MCTPI2CEndpoint mctp;
+
+uint8_t buffer[NMI_MAX_MESSAGE_LENGTH];
+uint8_t scratch[NMI_MAX_MESSAGE_LENGTH];
+
+size_t  len;
+int64_t pos;
+} NMIDevice;
+
+FIELD(NMI_MCTPD, MT, 0, 7)
+FIELD(NMI_MCTPD, IC, 7, 1)
+
+#define NMI_MCTPD_MT_NMI 0x4
+#define NMI_MCTPD_IC_ENABLED 0x1
+
+FIELD(NMI_NMP, ROR, 7, 1)
+FIELD(NMI_NMP, NMIMT, 3, 4)
+
+#define NMI_NMP_NMIMT_NVME_MI 0x1
+#define NMI_NMP_NMIMT_NVME_ADMIN 0x2
+
+typedef struct NMIMessage {
+uint8_t mctpd;
+uint8_t nmp;
+uint8_t rsvd2[2];
+uint8_t payload[]; /* includes the Message Integrity Check */
+} NMIMessage;
+
+typedef struct NMIRequest {
+   uint8_t opc;
+   uint8_t rsvd1[3];
+   uint32_t dw0;
+   uint32_t dw1;
+   uint32_t mic;
+} NMIRequest;
+
+typedef enum NMIReadDSType {
+NMI_CMD_READ_NMI_DS_SUBSYSTEM   = 0x0,
+NMI_CMD_READ_NMI_DS_PORTS   = 0x1,
+NMI_CMD_READ_NMI_DS_CTRL_LIST   = 0x2,
+NMI_CMD_READ_NMI_DS_CTRL_INFO   = 0x3,
+NMI_CMD_READ_NMI_DS_OPT_CMD_SUPPORT = 0x4,
+NMI_CMD_READ_NMI_DS_MEB_CMD_SUPPORT = 0x5,
+} NMIReadDSType;
+
+#define NMI_STATUS_INVALID_PARAMETER 0x4
+
+static void nmi_set_parameter_error(NMIDevice *nmi, uint8_t bit, uint16_t byte)
+{
+/* NVM Express Management Interface 1.2c, Figure 30 */
+struct resp {
+uint8_t  status;
+uint8_t  bit;
+uint16_t byte;
+};
+
+struct resp *buf = (struct resp *)(nmi->scratch + nmi->pos);
+
+buf->status = NMI_STATUS_INVALID_PARAMETER;
+buf->bit = bit & 0x3;
+buf->byte = byte;
+
+nmi->pos += sizeof(struct resp);
+}
+
+static void nmi_set_error(NMIDevice *nmi, uint8_t status)
+{
+uint8_t buf[4] = {};
+
+buf[0] = status;
+
+memcpy(nmi->scratch + nmi->pos, buf, 4);
+nmi->pos += 4;
+}
+
+static void nmi_handle_mi_read_nmi_ds(NMIDevice *nmi, NMIRequest *request)
+{
+I2CSlave *i2c = I2C_SLAVE(nmi);
+
+uint32_t dw0 = le32_to_cpu(request->dw0);
+uint8_t dtyp = (dw0 >> 24) & 0xf;
+uint8_t *buf;
+size_t len;
+
+trace_nmi_handle_mi_read_nmi_ds(dtyp);
+
+static uint8_t nmi_ds_subsystem[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x00,   /* number of ports */
+0x01,   /* major version */
+0x01,   /* minor version */
+};
+
+/* cannot be static since we need to patch in the i2c address */
+uint8_t nmi_ds_ports[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x02,   /* port type (smbus) */
+0x00,   /* reserved */
+0x40, 0x00, /* maximum mctp transission unit size (64 bytes) */
+0x00, 0x00, 0x00, 0x00, /* management endpoint buffer size */
+0x00,   /* vpd i2c address */
+0x00,   /* vpd i2c frequency */
+0x00,   /* management endpoi

[PATCH v4 2/3] hw/i2c: add mctp core

2023-08-23 Thread Klaus Jensen

From: Klaus Jensen 

Add an abstract MCTP over I2C endpoint model. This implements MCTP
control message handling as well as handling the actual I2C transport
(packetization).

Devices are intended to derive from this and implement the class
methods.

Parts of this implementation is inspired by code[1] previously posted by
Jonathan Cameron.

Squashed a fix[2] from Matt Johnston.

  [1]: 
https://lore.kernel.org/qemu-devel/20220520170128.4436-1-jonathan.came...@huawei.com/
  [2]: 
https://lore.kernel.org/qemu-devel/20221121080445.ga29...@codeconstruct.com.au/

Signed-off-by: Klaus Jensen 
---
 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 428 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/trace-events   |  13 ++
 include/hw/i2c/mctp.h | 127 +++
 include/net/mctp.h|  35 +
 8 files changed, 616 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6111b6b4d928..8ca71167607d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3395,6 +3395,13 @@ F: tests/qtest/adm1272-test.c
 F: tests/qtest/max34451-test.c
 F: tests/qtest/isl_pmbus_vr-test.c
 
+MCTP I2C Transport
+M: Klaus Jensen 
+S: Maintained
+F: hw/i2c/mctp.c
+F: include/hw/i2c/mctp.h
+F: include/net/mctp.h
+
 Firmware schema specifications
 M: Philippe Mathieu-Daudé 
 R: Daniel P. Berrange 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 7e6834844051..5bcb1e0e8a6f 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -541,6 +541,7 @@ config ASPEED_SOC
 select DS1338
 select FTGMAC100
 select I2C
+select I2C_MCTP
 select DPS310
 select PCA9552
 select SERIAL
diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig
index 14886b35dac2..2b2a50b83d1e 100644
--- a/hw/i2c/Kconfig
+++ b/hw/i2c/Kconfig
@@ -6,6 +6,10 @@ config I2C_DEVICES
 # to any board's i2c bus
 bool
 
+config I2C_MCTP
+bool
+select I2C
+
 config SMBUS
 bool
 select I2C
diff --git a/hw/i2c/mctp.c b/hw/i2c/mctp.c
new file mode 100644
index ..217073d62435
--- /dev/null
+++ b/hw/i2c/mctp.c
@@ -0,0 +1,428 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/smbus_master.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+
+#include "trace.h"
+
+/* DSP0237 1.2.0, Figure 1 */
+typedef struct MCTPI2CPacketHeader {
+uint8_t dest;
+#define MCTP_I2C_COMMAND_CODE 0xf
+uint8_t command_code;
+uint8_t byte_count;
+uint8_t source;
+} MCTPI2CPacketHeader;
+
+typedef struct MCTPI2CPacket {
+MCTPI2CPacketHeader i2c;
+MCTPPacket  mctp;
+} MCTPI2CPacket;
+
+#define i2c_mctp_payload_offset offsetof(MCTPI2CPacket, mctp.payload)
+#define i2c_mctp_payload(buf) (buf + i2c_mctp_payload_offset)
+
+/* DSP0236 1.3.0, Figure 20 */
+typedef struct MCTPControlMessage {
+#define MCTP_MESSAGE_TYPE_CONTROL 0x0
+uint8_t type;
+#define MCTP_CONTROL_FLAGS_RQ   (1 << 7)
+#define MCTP_CONTROL_FLAGS_D(1 << 6)
+uint8_t flags;
+uint8_t command_code;
+uint8_t data[];
+} MCTPControlMessage;
+
+enum MCTPControlCommandCodes {
+MCTP_CONTROL_SET_EID= 0x01,
+MCTP_CONTROL_GET_EID= 0x02,
+MCTP_CONTROL_GET_VERSION= 0x04,
+MCTP_CONTROL_GET_MESSAGE_TYPE_SUPPORT   = 0x05,
+};
+
+#define MCTP_CONTROL_ERROR_UNSUPPORTED_CMD 0x5
+
+#define i2c_mctp_control_data_offset \
+(i2c_mctp_payload_offset + offsetof(MCTPControlMessage, data))
+#define i2c_mctp_control_data(buf) (buf + i2c_mctp_control_data_offset)
+
+/**
+ * The byte count field in the SMBUS Block Write containers the number of bytes
+ * *following* the field itself.
+ *
+ * This is at least 5.
+ *
+ * 1 byte for the MCTP/I2C piggy-backed I2C source address in addition to the
+ * size of the MCTP transport/packet header.
+ */
+#define MCTP_I2C_BYTE_COUNT_OFFSET (sizeof(MCTPPacketHeader) + 1)
+
+void i2c_mctp_schedule_send(MCTPI2CEndpoint *mctp)
+{
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(DEVICE(mctp)));
+
+mctp->tx.state = I2C_MCTP_STATE_TX_START_SEND;
+
+i2c_bus_master(i2c, mctp->tx.bh);
+}
+
+static void i2c_mctp_tx(void *opaque)
+{
+DeviceState *dev = DEVICE(opaque);
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(dev));
+I2CSlave *slave = I2C_SLAVE(dev);
+MCTPI2CEndpoint *mctp = MCTP_I2C_ENDPOINT(dev);
+MCTPI2CEndpointClass *mc = MCTP_I2C_ENDPOINT_GET_CLASS(mctp);
+MCTPI2CPacket *pkt = (MCTPI2CPacket *)mctp->buffer;
+uint8_t flags = 0;
+
+switch (mctp->tx.state) {
+case I2C_MCTP_STATE_TX_SEND_BYTE:
+if (mctp->pos < mctp->len)

[PATCH v4 0/3] hw/{i2c,nvme}: mctp endpoint, nvme management interface model

2023-08-23 Thread Klaus Jensen

This adds a generic MCTP endpoint model that other devices may derive
from.

Also included is a very basic implementation of an NVMe-MI device,
supporting only a small subset of the required commands.

Since this all relies on i2c target mode, this can currently only be
used with an SoC that includes the Aspeed I2C controller.

The easiest way to get up and running with this, is to grab my buildroot
overlay[1] (aspeed_ast2600evb_nmi_defconfig). It includes modified a
modified dts as well as a couple of required packages.

QEMU can then be launched along these lines:

  qemu-system-arm \
-nographic \
-M ast2600-evb \
-kernel output/images/zImage \
-initrd output/images/rootfs.cpio \
-dtb output/images/aspeed-ast2600-evb-nmi.dtb \
-nic user,hostfwd=tcp::-:22 \
-device nmi-i2c,address=0x3a \
-serial mon:stdio

>From within the booted system,

  mctp addr add 8 dev mctpi2c15
  mctp link set mctpi2c15 up
  mctp route add 9 via mctpi2c15
  mctp neigh add 9 dev mctpi2c15 lladdr 0x3a
  mi-mctp 1 9 info

Comments are very welcome!

  [1]: https://github.com/birkelund/hwtests/tree/main/br2-external

Changes since v3


  - Inlined the POLY define in the crc8 function (Philippe)
  - Changed a bunch of fields to use registerfields.h

  - From Jonathan:
+ Added references to specs defining the structures.

  - From Corey:
+ Reworked the buffer handling (again) ;)
  Derived devices can now never write into the mctp core buffers. Instead,
  the mctp core will request a buffer pointer and copy from there.
  Internally, within the mctp core, writes to internal buffers are also
  checked.

Changes since v2


  - Applied a bunch of feedback from Jonathan:
+ Moved a lot of internally used structs out of the include headers
  and into the source files.
+ Added spec references in various places
+ Split the patch for i2c_smbus_pec() into its own
+ Fix a compile error (and bug) in nmi-i2c.c.

  - From Corey:
+ Reworked the buffer handling. The deriving devices now returns a
  pointer to their own buffer that the mctp core copies into.
+ Added a couple of extra debugging trace events.

Changes since v1


  - Fix SPDX-License tag for hw/nvme/nmi-i2c.c (Philippe)
  - Add some asserts to verify buffer indices (by request from Corey).
  - Drop short packets that could result in underflow (Corey)
  - Move i2c_smbus_pec() to smbus common code (Corey)
  - A couple of logic fixes (patch from Jeremy squashed in)
  - Added a patch to handle messages with dest eid 0 (Matt)
Maybe squash this as well.

Signed-off-by: Klaus Jensen 
---
Klaus Jensen (3):
  hw/i2c: add smbus pec utility function
  hw/i2c: add mctp core
  hw/nvme: add nvme management interface model

 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 428 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/smbus_master.c |  26 +++
 hw/i2c/trace-events   |  13 ++
 hw/nvme/Kconfig   |   4 +
 hw/nvme/meson.build   |   1 +
 hw/nvme/nmi-i2c.c | 418 +
 hw/nvme/trace-events  |   6 +
 include/hw/i2c/mctp.h | 127 +
 include/hw/i2c/smbus_master.h |   2 +
 include/net/mctp.h|  35 
 14 files changed, 1073 insertions(+)
---
base-commit: b0dd9a7d6dd15a6898e9c585b521e6bec79b25aa
change-id: 20230822-nmi-i2c-d804ed5be7e6

Best regards,
-- 
Klaus Jensen

[PATCH v4 1/3] hw/i2c: add smbus pec utility function

2023-08-23 Thread Klaus Jensen

From: Klaus Jensen 

Add i2c_smbus_pec() to calculate the SMBus Packet Error Code for a
message.

Signed-off-by: Klaus Jensen 
---
 hw/i2c/smbus_master.c | 26 ++
 include/hw/i2c/smbus_master.h |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c
index 6a53c34e70b7..01a8e4700222 100644
--- a/hw/i2c/smbus_master.c
+++ b/hw/i2c/smbus_master.c
@@ -15,6 +15,32 @@
 #include "hw/i2c/i2c.h"
 #include "hw/i2c/smbus_master.h"
 
+static uint8_t crc8(uint16_t data)
+{
+int i;
+
+for (i = 0; i < 8; i++) {
+if (data & 0x8000) {
+data ^= 0x1070U << 3;
+}
+
+data <<= 1;
+}
+
+return (uint8_t)(data >> 8);
+}
+
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len)
+{
+int i;
+
+for (i = 0; i < len; i++) {
+crc = crc8((crc ^ buf[i]) << 8);
+}
+
+return crc;
+}
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read)
 {
diff --git a/include/hw/i2c/smbus_master.h b/include/hw/i2c/smbus_master.h
index bb13bc423c22..d90f81767d86 100644
--- a/include/hw/i2c/smbus_master.h
+++ b/include/hw/i2c/smbus_master.h
@@ -27,6 +27,8 @@
 
 #include "hw/i2c/i2c.h"
 
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len);
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read);
 int smbus_receive_byte(I2CBus *bus, uint8_t addr);

-- 
2.42.0

Re: [PATCH v2 0/4] Add full zoned storage emulation to qcow2 driver

2023-08-16 Thread Klaus Jensen

On Aug 14 16:57, Sam Li wrote:
> This patch series add a new extension - zoned format - to the
> qcow2 driver thereby allowing full zoned storage emulation on
> the qcow2 img file. Users can attach such a qcow2 file to the
> guest as a zoned device.
> 
> To create a qcow2 file with zoned format, use command like this:
> $ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
> zone_size=64M -o zone_capacity=64M -o zone_nr_conv=0 -o
> max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
> -o zoned_profile=zbc
> 
> Then add it to the QEMU command line:
> -blockdev 
> node-name=drive1,driver=qcow2,file.driver=file,file.filename=../qemu/test.qcow2
>  \
> -device virtio-blk-pci,drive=drive1 \
> 
> v1->v2:
> - add more tests to qemu-io zoned commands
> - make zone append change state to full when wp reaches end
> - add documentation to qcow2 zoned extension header
> - address review comments (Stefan):
>   * fix zoned_mata allocation size
>   * use bitwise or than addition
>   * fix wp index overflow and locking
>   * cleanups: comments, naming
> 
> Sam Li (4):
>   docs/qcow2: add the zoned format feature
>   qcow2: add configurations for zoned format extension
>   qcow2: add zoned emulation capability
>   iotests: test the zoned format feature for qcow2 file
> 
>  block/qcow2.c| 799 ++-
>  block/qcow2.h|  23 +
>  docs/interop/qcow2.txt   |  26 +
>  docs/system/qemu-block-drivers.rst.inc   |  39 ++
>  include/block/block-common.h |   5 +
>  include/block/block_int-common.h |  16 +
>  qapi/block-core.json |  46 +-
>  tests/qemu-iotests/tests/zoned-qcow2 | 135 
>  tests/qemu-iotests/tests/zoned-qcow2.out | 140 
>  9 files changed, 1214 insertions(+), 15 deletions(-)
>  create mode 100755 tests/qemu-iotests/tests/zoned-qcow2
>  create mode 100644 tests/qemu-iotests/tests/zoned-qcow2.out
> 

Hi Sam,

Thanks for this and for the RFC for hw/nvme - this is an awesome
improvement.

Can you explain the need for the zoned_profile? I understand that only
ZNS requires potentially setting zone_capacity and configuring extended
descriptors. When an image is hooked up to a block emulation device that
doesnt understand cap < size or extended descriptors, it could just
would fail on the cap < size and just ignore the extended descriptor
space. Do we really need to add the complexity of the user explicitly
having to set the profile? I also think it is fair for the QEMU zoned
block api to accomodate both variations - if a particular configuration
is supported or not is up to the emulating device.

Checking the profile from hw/nvme or hw/block/virtio is the same as
checking if cap < size or possibly the presence of extended descriptors.

Thanks,
Klaus

signature.asc
Description: PGP signature

Re: [PATCH 0/2] nvme: avoid dynamic stack allocations

2023-08-14 Thread Klaus Jensen

On Aug 11 18:47, Peter Maydell wrote:
> The QEMU codebase has very few C variable length arrays, and if we can
> get rid of them all we can make the compiler error on new additions.
> This is a defensive measure against security bugs where an on-stack
> dynamic allocation isn't correctly size-checked (e.g.  CVE-2021-3527).
> 
> We last had a go at this a few years ago, when Philippe wrote
> patches for this:
> https://patchew.org/QEMU/20210505211047.1496765-1-phi...@redhat.com/
> Some of the fixes made it into the tree, but some didn't (either
> because of lack of review or because review found some changes
> that needed to be made). I'm going through the remainder as a
> non-urgent Friday afternoon task...
> 
> This patchset deals with two VLAs in the NVME code.
> 
> thanks
> -- PMM
> 
> Peter Maydell (1):
>   hw/nvme: Avoid dynamic stack allocation
> 
> Philippe Mathieu-Daudé (1):
>   hw/nvme: Use #define to avoid variable length array
> 
>  hw/nvme/ctrl.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> -- 
> 2.34.1
> 

Thanks Peter,

Applied to nvme-next!


signature.asc
Description: PGP signature

[PULL 2/2] hw/nvme: fix null pointer access in ruh update

2023-08-09 Thread Klaus Jensen

From: Klaus Jensen 

The Reclaim Unit Update operation in I/O Management Receive does not
verify the presence of a configured endurance group prior to accessing
it.

Fix this.

Cc: qemu-sta...@nongnu.org
Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
Reviewed-by: Jesper Wendel Devantier 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index e5b5c7034d2b..539d27355313 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4361,7 +4361,13 @@ static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl 
*n, NvmeRequest *req)
 uint32_t npid = (cdw10 >> 1) + 1;
 unsigned int i = 0;
 g_autofree uint16_t *pids = NULL;
-uint32_t maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh;
+uint32_t maxnpid;
+
+if (!ns->endgrp || !ns->endgrp->fdp.enabled) {
+return NVME_FDP_DISABLED | NVME_DNR;
+}
+
+maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh;
 
 if (unlikely(npid >= MIN(NVME_FDP_MAXPIDS, maxnpid))) {
 return NVME_INVALID_FIELD | NVME_DNR;
-- 
2.41.0

[PULL 1/2] hw/nvme: fix null pointer access in directive receive

2023-08-09 Thread Klaus Jensen

From: Klaus Jensen 

nvme_directive_receive() does not check if an endurance group has been
configured (set) prior to testing if flexible data placement is enabled
or not.

Fix this.

Cc: qemu-sta...@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1815
Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
Reviewed-by: Jesper Wendel Devantier 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d217ae91b506..e5b5c7034d2b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6900,7 +6900,7 @@ static uint16_t nvme_directive_receive(NvmeCtrl *n, 
NvmeRequest *req)
 case NVME_DIRECTIVE_IDENTIFY:
 switch (doper) {
 case NVME_DIRECTIVE_RETURN_PARAMS:
-if (ns->endgrp->fdp.enabled) {
+if (ns->endgrp && ns->endgrp->fdp.enabled) {
 id.supported |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
 id.enabled |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
 id.persistent |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
-- 
2.41.0

[PULL 0/2] hw/nvme: more fixes

2023-08-09 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

The following changes since commit a8fc5165aab02f328ccd148aafec1e59fd1426eb:

  Merge tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu into 
staging (2023-08-08 16:39:20 -0700)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-fixes-pull-request

for you to fetch changes up to 3439ba9c5da943d96f7a3c86e0a7eb2ff48de41c:

  hw/nvme: fix null pointer access in ruh update (2023-08-09 15:32:32 +0200)


hw/nvme: fixes
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmTTlmcACgkQTeGvMW1P
DemjjggAnhEvaJ4fgS9rsvtxCwtzLNc405xMpNxh6rPaxa+sL3RXPIrW6vWG13+W
VcHw8DI8EV4DzAFP919ZmTUq9/boRbgxx84bStlILUPHWol8+eGYVVfT75wFKszx
d4Vi3nyPSGlrxieSrosARqimcUDtFtDGGAxjvEcKgzhkcU3a8DVYAOmx/hdlWJJQ
KSk4h/E1pKItFbvv+w9yszsbToeZN65oIy7kQtFgx0JOULyWvEYSVygotw/AruF3
FPQ0nrJuZ115w3cJWDszznVJ6+3EcWbD3luQc3zE1FOPp76EkAOkcnPh1XbBJrE2
2BsCX/XnXcZT7BWSJbEzGXLsHjqsPg==
=Zy0+
-END PGP SIGNATURE-



Klaus Jensen (2):
  hw/nvme: fix null pointer access in directive receive
  hw/nvme: fix null pointer access in ruh update

 hw/nvme/ctrl.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

-- 
2.41.0

[PATCH 2/2] hw/nvme: fix null pointer access in ruh update

2023-08-08 Thread Klaus Jensen

From: Klaus Jensen 

The Reclaim Unit Update operation in I/O Management Receive does not
verify the presence of a configured endurance group prior to accessing
it.

Fix this.

Cc: qemu-sta...@nongnu.org
Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index e5b5c7034d2b..539d27355313 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4361,7 +4361,13 @@ static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl 
*n, NvmeRequest *req)
 uint32_t npid = (cdw10 >> 1) + 1;
 unsigned int i = 0;
 g_autofree uint16_t *pids = NULL;
-uint32_t maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh;
+uint32_t maxnpid;
+
+if (!ns->endgrp || !ns->endgrp->fdp.enabled) {
+return NVME_FDP_DISABLED | NVME_DNR;
+}
+
+maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh;
 
 if (unlikely(npid >= MIN(NVME_FDP_MAXPIDS, maxnpid))) {
 return NVME_INVALID_FIELD | NVME_DNR;
-- 
2.41.0

[PATCH 0/2] hw/nvme: two fixes

2023-08-08 Thread Klaus Jensen

From: Klaus Jensen 

Fix two potential accesses to null pointers.

Klaus Jensen (2):
  hw/nvme: fix null pointer access in directive receive
  hw/nvme: fix null pointer access in ruh update

 hw/nvme/ctrl.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

-- 
2.41.0

[PATCH 1/2] hw/nvme: fix null pointer access in directive receive

2023-08-08 Thread Klaus Jensen

From: Klaus Jensen 

nvme_directive_receive() does not check if an endurance group has been
configured (set) prior to testing if flexible data placement is enabled
or not.

Fix this.

Cc: qemu-sta...@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1815
Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d217ae91b506..e5b5c7034d2b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6900,7 +6900,7 @@ static uint16_t nvme_directive_receive(NvmeCtrl *n, 
NvmeRequest *req)
 case NVME_DIRECTIVE_IDENTIFY:
 switch (doper) {
 case NVME_DIRECTIVE_RETURN_PARAMS:
-if (ns->endgrp->fdp.enabled) {
+if (ns->endgrp && ns->endgrp->fdp.enabled) {
 id.supported |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
 id.enabled |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
 id.persistent |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
-- 
2.41.0

[PULL v2] hw/nvme fixes

2023-08-08 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

There was a small typo in the last pull. This replaces it.

The following changes since commit 0450cf08976f9036feaded438031b4cba94f6452:

  Merge tag 'fixes-pull-request' of https://gitlab.com/marcandre.lureau/qemu 
into staging (2023-08-07 13:55:00 -0700)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request

for you to fetch changes up to ec5a138ce63ce460575a44cf9ec3172c33eb0fd6:

  docs: update hw/nvme documentation for protection information (2023-08-08 
15:28:05 +0200)


hw/nvme fixes

- fix for invalid protection information calculation
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmTSREoACgkQTeGvMW1P
DekH6Qf/e3gi0KloAUpbTQvGmBA6XmkJFAtOdZn7IJXVCowjYTIKU84DrdPyT1c1
rofL4w0klKG5c4Or/Cs4dH/ASxTWaQZRlFAYxsTW3nUX74MnaFDRZcN2geb30ws7
ryejVEKeHNWH/YYY4Ny55wO3tmy2ILAKnbiadiXhj4dQfCK1GzZnrx10PWxLNlkZ
KRhiXLNBHpPnDlrLq7/nLs+/0cMrrqEz6ISm/Ju4iUczAH/wmqEbR/yD3pAwmH07
PCaSeegOpwscovI5TWRelOJlzIXb6D8Xk9d3dGL5x/eeN7GlkgERX4MAcNYKwe8T
JNR8y2ErTEj2nLU/juES1EpiR2gYKw==
=vJlA
-END PGP SIGNATURE-



Ankit Kumar (2):
  hw/nvme: fix CRC64 for guard tag
  docs: update hw/nvme documentation for protection information

 docs/system/devices/nvme.rst | 12 +---
 hw/nvme/dif.c|  4 ++--
 2 files changed, 11 insertions(+), 5 deletions(-)

-- 
2.41.0

[PULL 1/2] hw/nvme: fix CRC64 for guard tag

2023-08-08 Thread Klaus Jensen

From: Ankit Kumar 

The nvme CRC64 generator expects the caller to pass inverted seed value.
Pass inverted crc value for metadata buffer.

Cc: qemu-sta...@nongnu.org
Fixes: 44219b6029fc ("hw/nvme: 64-bit pi support")
Signed-off-by: Ankit Kumar 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/dif.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
index 63c44c86ab55..01b19c33734e 100644
--- a/hw/nvme/dif.c
+++ b/hw/nvme/dif.c
@@ -115,7 +115,7 @@ static void nvme_dif_pract_generate_dif_crc64(NvmeNamespace 
*ns, uint8_t *buf,
 uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz);
 
 if (pil) {
-crc = crc64_nvme(crc, mbuf, pil);
+crc = crc64_nvme(~crc, mbuf, pil);
 }
 
 dif->g64.guard = cpu_to_be64(crc);
@@ -246,7 +246,7 @@ static uint16_t nvme_dif_prchk_crc64(NvmeNamespace *ns, 
NvmeDifTuple *dif,
 uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz);
 
 if (pil) {
-crc = crc64_nvme(crc, mbuf, pil);
+crc = crc64_nvme(~crc, mbuf, pil);
 }
 
 trace_pci_nvme_dif_prchk_guard_crc64(be64_to_cpu(dif->g64.guard), crc);
-- 
2.41.0

[PULL 2/2] docs: update hw/nvme documentation for protection information

2023-08-08 Thread Klaus Jensen

From: Ankit Kumar 

Add missing entry for pif ("protection information format").
Protection information size can be 8 or 16 bytes, Update the pil entry
as per the NVM command set specification.

Signed-off-by: Ankit Kumar 
Signed-off-by: Klaus Jensen 
---
 docs/system/devices/nvme.rst | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index 2a3af268f7a5..32ff287cd78e 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -271,9 +271,15 @@ The virtual namespace device supports DIF- and DIX-based 
protection information
 
 ``pil=UINT8`` (default: ``0``)
   Controls the location of the protection information within the metadata. Set
-  to ``1`` to transfer protection information as the first eight bytes of
-  metadata. Otherwise, the protection information is transferred as the last
-  eight bytes.
+  to ``1`` to transfer protection information as the first bytes of metadata.
+  Otherwise, the protection information is transferred as the last bytes of
+  metadata.
+
+``pif=UINT8`` (default: ``0``)
+  By default, the namespace device uses 16 bit guard protection information
+  format (``pif=0``). Set to ``2`` to enable 64 bit guard protection
+  information format. This requires at least 16 bytes of metadata. Note that
+  ``pif=2`` (32 bit guards) are currently not supported.
 
 Virtualization Enhancements and SR-IOV (Experimental Support)
 -
-- 
2.41.0

[PULL 0/2] hw/nvme late fix

2023-08-08 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

This is a fix for hw/nvme protection information discovered by Ankit
late in the cycle. This is not a regression, but a long standing bug and
not critical (obviously no users of this until now, no potential for
crash or similar, just plain wrong).

If this can make it for -rc3 that is great, but it can easily be deferred
to a stable release or -rc4 if other fixes require it.

Thanks,


The following changes since commit 0450cf08976f9036feaded438031b4cba94f6452:

  Merge tag 'fixes-pull-request' of https://gitlab.com/marcandre.lureau/qemu 
into staging (2023-08-07 13:55:00 -0700)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request

for you to fetch changes up to bb223df9403beada5b2ab408d2d9a82471432a21:

  docs: update hw/nvme documentation for protection information (2023-08-08 
08:10:15 +0200)


hw/nvme late fix

- late fix for nvme pi
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmTR3uMACgkQTeGvMW1P
DenZzgf/T5pb1dQhYthpJ/OOLcvtttpRChQSvZJ8uxyzWoNS/9hKn5gd8buvdGwt
fO1QU3Ogh6ArwZ9GT5OqLV05d9vMuvJlPxpbqOs8XZACobH+nb3CqXBcX1F7TxqV
j9OmFH4UGPDo42hkT+jqa+kHc9hxmpwg+f6Wlpad+ZJ2UAel0/19JsQItln8JQ/I
Jxd07Q5qcj06RtwcPf/0WUOs4I6sTkifu7uZIrx1YjYN4/jQmaF2L0MQjUw1ktLF
hFXSW3iarDKh2fFlfR2fMkkoLLnoS6NoZnTj3fBDabcuMfpJlf7WZ5fuYpOlCKtB
kpN9/WaGpZtXmWAg82R7wlgR4D9vkw==
=EuEc
-END PGP SIGNATURE-



Ankit Kumar (2):
  hw/nvme: fix CRC64 for guard tag
  docs: update hw/nvme documentation for protection information

 docs/system/devices/nvme.rst | 12 +---
 hw/nvme/dif.c|  4 ++--
 2 files changed, 11 insertions(+), 5 deletions(-)

-- 
2.41.0

Re: [PATCH] hw/nvme: fix oob memory read in fdp events log

2023-08-08 Thread Klaus Jensen

+CC qemu-stable

On Aug  3 20:44, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> As reported by Trend Micro's Zero Day Initiative, an oob memory read
> vulnerability exists in nvme_fdp_events(). The host-provided offset is
> not verified.
> 
> Fix this.
> 
> This is only exploitable when Flexible Data Placement mode (fdp=on) is
> enabled.
> 
> Fixes: CVE-2023-4135
> Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
> Reported-by: Trend Micro's Zero Day Initiative
> Signed-off-by: Klaus Jensen 
> ---
>  hw/nvme/ctrl.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index f2e5a2fa737b..e9b5a55811b8 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -5120,6 +5120,11 @@ static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t 
> endgrpid,
>  }
>  
>  log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * 
> sizeof(NvmeFdpEvent);
> +
> +if (off >= log_size) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
>  trans_len = MIN(log_size - off, buf_len);
>  elog = g_malloc0(log_size);
>  elog->num_events = cpu_to_le32(ebuf->nelems);
> -- 
> 2.41.0
> 

-- 
One of us - No more doubt, silence or taboo about mental illness.


signature.asc
Description: PGP signature

[PULL 0/2] hw/nvme fixes

2023-08-07 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

The following changes since commit 9400601a689a128c25fa9c21e932562e0eeb7a26:

  Merge tag 'pull-tcg-20230806-3' of https://gitlab.com/rth7680/qemu into 
staging (2023-08-06 16:47:48 -0700)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request

for you to fetch changes up to 6a33f2e920ec0b489a77200888e3692664077f2d:

  hw/nvme: fix compliance issue wrt. iosqes/iocqes (2023-08-07 12:27:24 +0200)


hw/nvme fixes

- two fixes for hw/nvme
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmTQ2y4ACgkQTeGvMW1P
DenpWQf/WFgEljzgTcgxlfZhCyzWGwVNgKqRxlTuF6ELqm8BajCuCeA5ias6AXOr
x/gZ0VqrL91L5tRIH5Q0sdC+HBFC1yMs66jopdzc1oL1eYu1HTrLIqMDtkXp/K/P
PyGah2t4qEMtacSkad+hmB68ViUkkmhkxrWYIeufUQTfLNF5pBqNvB1kQON3jmXE
a1jI/PabYxi8Km0rfFJD6SUGmL9+m7MY/SyZAy+4EZZ1OEnp5jb3o9lbdwbhIU5e
dRX4NW4BEDiOJeIcNVDiQkXv2/Lna1B51RVMvM4owpk0eRvRXMSqs2DQ5/jp/nGb
8uChUJ0QW68I4e9ptTfxmBsr4pSktg==
=0nwp
-END PGP SIGNATURE-



Klaus Jensen (2):
  hw/nvme: fix oob memory read in fdp events log
  hw/nvme: fix compliance issue wrt. iosqes/iocqes

 hw/nvme/ctrl.c   | 51 +++-
 hw/nvme/nvme.h   |  9 ++--
 hw/nvme/trace-events |  1 +
 3 files changed, 25 insertions(+), 36 deletions(-)

-- 
2.41.0

[PULL 1/2] hw/nvme: fix oob memory read in fdp events log

2023-08-07 Thread Klaus Jensen

From: Klaus Jensen 

As reported by Trend Micro's Zero Day Initiative, an oob memory read
vulnerability exists in nvme_fdp_events(). The host-provided offset is
not verified.

Fix this.

This is only exploitable when Flexible Data Placement mode (fdp=on) is
enabled.

Fixes: CVE-2023-4135
Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
Reported-by: Trend Micro's Zero Day Initiative
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f2e5a2fa737b..e9b5a55811b8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5120,6 +5120,11 @@ static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t 
endgrpid,
 }
 
 log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * sizeof(NvmeFdpEvent);
+
+if (off >= log_size) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
 trans_len = MIN(log_size - off, buf_len);
 elog = g_malloc0(log_size);
 elog->num_events = cpu_to_le32(ebuf->nelems);
-- 
2.41.0

[PULL 2/2] hw/nvme: fix compliance issue wrt. iosqes/iocqes

2023-08-07 Thread Klaus Jensen

From: Klaus Jensen 

As of prior to this patch, the controller checks the value of CC.IOCQES
and CC.IOSQES prior to enabling the controller. As reported by Ben in
GitLab issue #1691, this is not spec compliant. The controller should
only check these values when queues are created.

This patch moves these checks to nvme_create_cq(). We do not need to
check it in nvme_create_sq() since that will error out if the completion
queue is not already created.

Also, since the controller exclusively supports SQEs of size 64 bytes
and CQEs of size 16 bytes, hard code that.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1691
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 46 
 hw/nvme/nvme.h   |  9 +++--
 hw/nvme/trace-events |  1 +
 3 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index e9b5a55811b8..d217ae91b506 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1507,7 +1507,7 @@ static void nvme_post_cqes(void *opaque)
 req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
 req->cqe.sq_id = cpu_to_le16(sq->sqid);
 req->cqe.sq_head = cpu_to_le16(sq->head);
-addr = cq->dma_addr + cq->tail * n->cqe_size;
+addr = cq->dma_addr + (cq->tail << NVME_CQES);
 ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)>cqe,
 sizeof(req->cqe));
 if (ret) {
@@ -5300,10 +5300,18 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
*req)
 uint16_t qsize = le16_to_cpu(c->qsize);
 uint16_t qflags = le16_to_cpu(c->cq_flags);
 uint64_t prp1 = le64_to_cpu(c->prp1);
+uint32_t cc = ldq_le_p(>bar.cc);
+uint8_t iocqes = NVME_CC_IOCQES(cc);
+uint8_t iosqes = NVME_CC_IOSQES(cc);
 
 trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
  NVME_CQ_FLAGS_IEN(qflags) != 0);
 
+if (iosqes != NVME_SQES || iocqes != NVME_CQES) {
+trace_pci_nvme_err_invalid_create_cq_entry_size(iosqes, iocqes);
+return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
+}
+
 if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) {
 trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
 return NVME_INVALID_QID | NVME_DNR;
@@ -7000,7 +7008,7 @@ static void nvme_process_sq(void *opaque)
 }
 
 while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(>req_list))) {
-addr = sq->dma_addr + sq->head * n->sqe_size;
+addr = sq->dma_addr + (sq->head << NVME_SQES);
 if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) {
 trace_pci_nvme_err_addr_read(addr);
 trace_pci_nvme_err_cfs();
@@ -7225,34 +7233,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 NVME_CAP_MPSMAX(cap));
 return -1;
 }
-if (unlikely(NVME_CC_IOCQES(cc) <
- NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
-trace_pci_nvme_err_startfail_cqent_too_small(
-NVME_CC_IOCQES(cc),
-NVME_CTRL_CQES_MIN(cap));
-return -1;
-}
-if (unlikely(NVME_CC_IOCQES(cc) >
- NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
-trace_pci_nvme_err_startfail_cqent_too_large(
-NVME_CC_IOCQES(cc),
-NVME_CTRL_CQES_MAX(cap));
-return -1;
-}
-if (unlikely(NVME_CC_IOSQES(cc) <
- NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
-trace_pci_nvme_err_startfail_sqent_too_small(
-NVME_CC_IOSQES(cc),
-NVME_CTRL_SQES_MIN(cap));
-return -1;
-}
-if (unlikely(NVME_CC_IOSQES(cc) >
- NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
-trace_pci_nvme_err_startfail_sqent_too_large(
-NVME_CC_IOSQES(cc),
-NVME_CTRL_SQES_MAX(cap));
-return -1;
-}
 if (unlikely(!NVME_AQA_ASQS(aqa))) {
 trace_pci_nvme_err_startfail_asqent_sz_zero();
 return -1;
@@ -7265,8 +7245,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 n->page_bits = page_bits;
 n->page_size = page_size;
 n->max_prp_ents = n->page_size / sizeof(uint64_t);
-n->cqe_size = 1 << NVME_CC_IOCQES(cc);
-n->sqe_size = 1 << NVME_CC_IOSQES(cc);
 nvme_init_cq(>admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
 nvme_init_sq(>admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);
 
@@ -8235,8 +8213,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
 id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
 
-id->sqes = (0x6 << 4) | 0x6;
-id->cqes = (0x4 << 4) | 0x4;
+id->sqes = (NVME_SQES << 4) | NVME_SQES;
+id->cqes = (NVME_CQES << 4) | NVME_CQE

[PATCH] hw/nvme: fix oob memory read in fdp events log

2023-08-03 Thread Klaus Jensen

From: Klaus Jensen 

As reported by Trend Micro's Zero Day Initiative, an oob memory read
vulnerability exists in nvme_fdp_events(). The host-provided offset is
not verified.

Fix this.

This is only exploitable when Flexible Data Placement mode (fdp=on) is
enabled.

Fixes: CVE-2023-4135
Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation")
Reported-by: Trend Micro's Zero Day Initiative
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f2e5a2fa737b..e9b5a55811b8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5120,6 +5120,11 @@ static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t 
endgrpid,
 }
 
 log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * sizeof(NvmeFdpEvent);
+
+if (off >= log_size) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
 trans_len = MIN(log_size - off, buf_len);
 elog = g_malloc0(log_size);
 elog->num_events = cpu_to_le32(ebuf->nelems);
-- 
2.41.0

[PULL 1/1] hw/nvme: use stl/ldl pci dma api

2023-07-30 Thread Klaus Jensen

From: Klaus Jensen 

Use the stl/ldl pci dma api for writing/reading doorbells. This removes
the explicit endian conversions.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Cédric Le Goater 
Tested-by: Cédric Le Goater 
Reviewed-by: Thomas Huth 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 42 +-
 1 file changed, 13 insertions(+), 29 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index dadc2dc7da10..f2e5a2fa737b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1468,20 +1468,16 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 
 static void nvme_update_cq_eventidx(const NvmeCQueue *cq)
 {
-uint32_t v = cpu_to_le32(cq->head);
-
 trace_pci_nvme_update_cq_eventidx(cq->cqid, cq->head);
 
-pci_dma_write(PCI_DEVICE(cq->ctrl), cq->ei_addr, , sizeof(v));
+stl_le_pci_dma(PCI_DEVICE(cq->ctrl), cq->ei_addr, cq->head,
+   MEMTXATTRS_UNSPECIFIED);
 }
 
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
-uint32_t v;
-
-pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, , sizeof(v));
-
-cq->head = le32_to_cpu(v);
+ldl_le_pci_dma(PCI_DEVICE(cq->ctrl), cq->db_addr, >head,
+   MEMTXATTRS_UNSPECIFIED);
 
 trace_pci_nvme_update_cq_head(cq->cqid, cq->head);
 }
@@ -6801,7 +6797,6 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 PCIDevice *pci = PCI_DEVICE(n);
 uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
 uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
-uint32_t v;
 int i;
 
 /* Address should be page aligned */
@@ -6819,8 +6814,6 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 NvmeCQueue *cq = n->cq[i];
 
 if (sq) {
-v = cpu_to_le32(sq->tail);
-
 /*
  * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
  * nvme_process_db() uses this hard-coded way to calculate
@@ -6828,7 +6821,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
  */
 sq->db_addr = dbs_addr + (i << 3);
 sq->ei_addr = eis_addr + (i << 3);
-pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
+stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED);
 
 if (n->params.ioeventfd && sq->sqid != 0) {
 if (!nvme_init_sq_ioeventfd(sq)) {
@@ -6838,12 +6831,10 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 }
 
 if (cq) {
-v = cpu_to_le32(cq->head);
-
 /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */
 cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
 cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
-pci_dma_write(pci, cq->db_addr, , sizeof(cq->head));
+stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED);
 
 if (n->params.ioeventfd && cq->cqid != 0) {
 if (!nvme_init_cq_ioeventfd(cq)) {
@@ -6974,20 +6965,16 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
 {
-uint32_t v = cpu_to_le32(sq->tail);
-
 trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail);
 
-pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, , sizeof(v));
+stl_le_pci_dma(PCI_DEVICE(sq->ctrl), sq->ei_addr, sq->tail,
+   MEMTXATTRS_UNSPECIFIED);
 }
 
 static void nvme_update_sq_tail(NvmeSQueue *sq)
 {
-uint32_t v;
-
-pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, , sizeof(v));
-
-sq->tail = le32_to_cpu(v);
+ldl_le_pci_dma(PCI_DEVICE(sq->ctrl), sq->db_addr, >tail,
+   MEMTXATTRS_UNSPECIFIED);
 
 trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
 }
@@ -7592,7 +7579,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, 
unsigned size)
 static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
 {
 PCIDevice *pci = PCI_DEVICE(n);
-uint32_t qid, v;
+uint32_t qid;
 
 if (unlikely(addr & ((1 << 2) - 1))) {
 NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
@@ -7659,8 +7646,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
 start_sqs = nvme_cq_full(cq) ? 1 : 0;
 cq->head = new_head;
 if (!qid && n->dbbuf_enabled) {
-v = cpu_to_le32(cq->head);
-pci_dma_write(pci, cq->db_addr, , sizeof(cq->head));
+stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED);
 }
 if (start_sqs) {
 NvmeSQueue *sq;
@@ -7720,8 +7706,6 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr add

[PULL 0/1] hw/nvme fixes

2023-07-30 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

This should also fix coverity cid 1518067 and 1518066.

The following changes since commit ccb86f079a9e4d94918086a9df18c1844347aff8:

  Merge tag 'pull-nbd-2023-07-28' of https://repo.or.cz/qemu/ericb into staging 
(2023-07-28 09:56:57 -0700)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request

for you to fetch changes up to c1e244b6552efdff5612a33c6630aaf95964eaf5:

  hw/nvme: use stl/ldl pci dma api (2023-07-30 20:09:54 +0200)


hw/nvme fixes

- use the stl/ldl pci dma api
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmTGuc8ACgkQTeGvMW1P
Dek41wgAwqgRmtUhmmaQJJpF5Pya3J7n3Zkbp+cULdnSp/su7W7yIUTcTzdbr34d
9LbNHmWerXYinlIxG08ZWw2lq0TwApKj+8gv/wf8H7dG86/pBYfoQvOlkNx2QKyR
vtRNlILCEbJpbSfY3LbFNvRGOkArr6HkzT4hZprUIfCvRg58u5oIxEx/ZYa+m3WU
ED0y/46e7HbVbmbwJKrn4EK3k0zGdFyeINRZ5TB5DML3lCTX6eaZTLUXGIb7LLcK
Xyv6/TCkPTggDszTam24kx0A7DhC+3f2C8DsJg7H8jnWb1F+oq/2EJam/0HU22Uk
n348MrWOusuF7kbHMCP9h28gYT3aWw==
=KjVO
-END PGP SIGNATURE-



Klaus Jensen (1):
  hw/nvme: use stl/ldl pci dma api

 hw/nvme/ctrl.c | 42 +-
 1 file changed, 13 insertions(+), 29 deletions(-)

-- 
2.41.0

[PATCH] hw/nvme: use stl/ldl pci dma api

2023-07-20 Thread Klaus Jensen

From: Klaus Jensen 

Use the stl/ldl pci dma api for writing/reading doorbells. This removes
the explicit endian conversions.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 42 +-
 1 file changed, 13 insertions(+), 29 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index dadc2dc7da10..f2e5a2fa737b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1468,20 +1468,16 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 
 static void nvme_update_cq_eventidx(const NvmeCQueue *cq)
 {
-uint32_t v = cpu_to_le32(cq->head);
-
 trace_pci_nvme_update_cq_eventidx(cq->cqid, cq->head);
 
-pci_dma_write(PCI_DEVICE(cq->ctrl), cq->ei_addr, , sizeof(v));
+stl_le_pci_dma(PCI_DEVICE(cq->ctrl), cq->ei_addr, cq->head,
+   MEMTXATTRS_UNSPECIFIED);
 }
 
 static void nvme_update_cq_head(NvmeCQueue *cq)
 {
-uint32_t v;
-
-pci_dma_read(PCI_DEVICE(cq->ctrl), cq->db_addr, , sizeof(v));
-
-cq->head = le32_to_cpu(v);
+ldl_le_pci_dma(PCI_DEVICE(cq->ctrl), cq->db_addr, >head,
+   MEMTXATTRS_UNSPECIFIED);
 
 trace_pci_nvme_update_cq_head(cq->cqid, cq->head);
 }
@@ -6801,7 +6797,6 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 PCIDevice *pci = PCI_DEVICE(n);
 uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
 uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
-uint32_t v;
 int i;
 
 /* Address should be page aligned */
@@ -6819,8 +6814,6 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 NvmeCQueue *cq = n->cq[i];
 
 if (sq) {
-v = cpu_to_le32(sq->tail);
-
 /*
  * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
  * nvme_process_db() uses this hard-coded way to calculate
@@ -6828,7 +6821,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
  */
 sq->db_addr = dbs_addr + (i << 3);
 sq->ei_addr = eis_addr + (i << 3);
-pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
+stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED);
 
 if (n->params.ioeventfd && sq->sqid != 0) {
 if (!nvme_init_sq_ioeventfd(sq)) {
@@ -6838,12 +6831,10 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 }
 
 if (cq) {
-v = cpu_to_le32(cq->head);
-
 /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */
 cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
 cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
-pci_dma_write(pci, cq->db_addr, , sizeof(cq->head));
+stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED);
 
 if (n->params.ioeventfd && cq->cqid != 0) {
 if (!nvme_init_cq_ioeventfd(cq)) {
@@ -6974,20 +6965,16 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
 {
-uint32_t v = cpu_to_le32(sq->tail);
-
 trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail);
 
-pci_dma_write(PCI_DEVICE(sq->ctrl), sq->ei_addr, , sizeof(v));
+stl_le_pci_dma(PCI_DEVICE(sq->ctrl), sq->ei_addr, sq->tail,
+   MEMTXATTRS_UNSPECIFIED);
 }
 
 static void nvme_update_sq_tail(NvmeSQueue *sq)
 {
-uint32_t v;
-
-pci_dma_read(PCI_DEVICE(sq->ctrl), sq->db_addr, , sizeof(v));
-
-sq->tail = le32_to_cpu(v);
+ldl_le_pci_dma(PCI_DEVICE(sq->ctrl), sq->db_addr, >tail,
+   MEMTXATTRS_UNSPECIFIED);
 
 trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
 }
@@ -7592,7 +7579,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, 
unsigned size)
 static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
 {
 PCIDevice *pci = PCI_DEVICE(n);
-uint32_t qid, v;
+uint32_t qid;
 
 if (unlikely(addr & ((1 << 2) - 1))) {
 NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
@@ -7659,8 +7646,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
 start_sqs = nvme_cq_full(cq) ? 1 : 0;
 cq->head = new_head;
 if (!qid && n->dbbuf_enabled) {
-v = cpu_to_le32(cq->head);
-pci_dma_write(pci, cq->db_addr, , sizeof(cq->head));
+stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED);
 }
 if (start_sqs) {
 NvmeSQueue *sq;
@@ -7720,8 +7706,6 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
 
 sq->tail = new_tail;
 if (!qid && n->dbbuf_enabled) {
-v = c

Re: [PULL 1/1] hw/nvme: fix endianness issue for shadow doorbells

2023-07-20 Thread Klaus Jensen

On Jul 20 09:51, Peter Maydell wrote:
> On Thu, 20 Jul 2023 at 09:49, Klaus Jensen  wrote:
> >
> > On Jul 20 09:43, Peter Maydell wrote:
> > > On Wed, 19 Jul 2023 at 21:13, Michael Tokarev  wrote:
> > > >
> > > > 19.07.2023 10:36, Klaus Jensen wrote:
> > > > pu(req->cmd.dptr.prp2);
> > > > > +uint32_t v;
> > > >
> > > > >   if (sq) {
> > > > > +v = cpu_to_le32(sq->tail);
> > > >
> > > > > -pci_dma_write(pci, sq->db_addr, >tail, 
> > > > > sizeof(sq->tail));
> > > > > +pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
> > > >
> > > > This and similar cases hurts my eyes.
> > > >
> > > > Why we pass address of v here, but use sizeof(sq->tail) ?
> > > >
> > > > Yes, I know both in theory should be of the same size, but heck,
> > > > this is puzzling at best, and confusing in a regular case.
> > > >
> > > > Dunno how it slipped in the review, it instantly catched my eye
> > > > in a row of applied patches..
> > > >
> > > > Also, why v is computed a few lines before it is used, with
> > > > some expressions between the assignment and usage?
> > > >
> > > > How about the following patch:
> > >
> > > If you're going to change this, better to take the approach
> > > Philippe suggested in review of using stl_le_pci_dma().
> > >
> > > https://lore.kernel.org/qemu-devel/376e5e45-a3e7-0029-603a-b7ad9673f...@linaro.org/
> > >
> >
> > Yup, that was my plan for next. But the original patch was already
> > verified on hardware and mutiple testes, so wanted to go with that for
> > the "fix".
> >
> > But yes, I will refactor into the much nicer stl/ldl api.
> 
> FWIW, I don't think this bug fix was so urgent that we
> needed to go with a quick fix and a followup -- we're
> not yet that close to 8.1 release.
> 

Alright, noted ;) I will spin this into the other fix I have under
review.


signature.asc
Description: PGP signature

Re: [PULL 1/1] hw/nvme: fix endianness issue for shadow doorbells

2023-07-20 Thread Klaus Jensen

On Jul 20 09:43, Peter Maydell wrote:
> On Wed, 19 Jul 2023 at 21:13, Michael Tokarev  wrote:
> >
> > 19.07.2023 10:36, Klaus Jensen wrote:
> > pu(req->cmd.dptr.prp2);
> > > +uint32_t v;
> >
> > >   if (sq) {
> > > +v = cpu_to_le32(sq->tail);
> >
> > > -pci_dma_write(pci, sq->db_addr, >tail, sizeof(sq->tail));
> > > +pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
> >
> > This and similar cases hurts my eyes.
> >
> > Why we pass address of v here, but use sizeof(sq->tail) ?
> >
> > Yes, I know both in theory should be of the same size, but heck,
> > this is puzzling at best, and confusing in a regular case.
> >
> > Dunno how it slipped in the review, it instantly catched my eye
> > in a row of applied patches..
> >
> > Also, why v is computed a few lines before it is used, with
> > some expressions between the assignment and usage?
> >
> > How about the following patch:
> 
> If you're going to change this, better to take the approach
> Philippe suggested in review of using stl_le_pci_dma().
> 
> https://lore.kernel.org/qemu-devel/376e5e45-a3e7-0029-603a-b7ad9673f...@linaro.org/
> 

Yup, that was my plan for next. But the original patch was already
verified on hardware and mutiple testes, so wanted to go with that for
the "fix".

But yes, I will refactor into the much nicer stl/ldl api.


signature.asc
Description: PGP signature

[PATCH for-8.1] hw/nvme: fix compliance issue wrt. iosqes/iocqes

2023-07-19 Thread Klaus Jensen

From: Klaus Jensen 

As of prior to this patch, the controller checks the value of CC.IOCQES
and CC.IOSQES prior to enabling the controller. As reported by Ben in
GitLab issue #1691, this is not spec compliant. The controller should
only check these values when queues are created.

This patch moves these checks to nvme_create_cq(). We do not need to
check it in nvme_create_sq() since that will error out if the completion
queue is not already created.

Also, since the controlle exclusively supports SQEs of size 64 bytes and
CQEs of size 16 bytes, hard code that.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1691
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 46 
 hw/nvme/nvme.h   |  9 +++--
 hw/nvme/trace-events |  1 +
 3 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8e8e870b9a80..414e9ea60e05 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1511,7 +1511,7 @@ static void nvme_post_cqes(void *opaque)
 req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
 req->cqe.sq_id = cpu_to_le16(sq->sqid);
 req->cqe.sq_head = cpu_to_le16(sq->head);
-addr = cq->dma_addr + cq->tail * n->cqe_size;
+addr = cq->dma_addr + (cq->tail << NVME_CQES);
 ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)>cqe,
 sizeof(req->cqe));
 if (ret) {
@@ -5299,10 +5299,18 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
*req)
 uint16_t qsize = le16_to_cpu(c->qsize);
 uint16_t qflags = le16_to_cpu(c->cq_flags);
 uint64_t prp1 = le64_to_cpu(c->prp1);
+uint32_t cc = ldq_le_p(>bar.cc);
+uint8_t iocqes = NVME_CC_IOCQES(cc);
+uint8_t iosqes = NVME_CC_IOSQES(cc);
 
 trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
  NVME_CQ_FLAGS_IEN(qflags) != 0);
 
+if (iosqes != NVME_SQES || iocqes != NVME_CQES) {
+trace_pci_nvme_err_invalid_create_cq_entry_size(iosqes, iocqes);
+return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
+}
+
 if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) {
 trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
 return NVME_INVALID_QID | NVME_DNR;
@@ -7003,7 +7011,7 @@ static void nvme_process_sq(void *opaque)
 }
 
 while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(>req_list))) {
-addr = sq->dma_addr + sq->head * n->sqe_size;
+addr = sq->dma_addr + (sq->head << NVME_SQES);
 if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) {
 trace_pci_nvme_err_addr_read(addr);
 trace_pci_nvme_err_cfs();
@@ -7228,34 +7236,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 NVME_CAP_MPSMAX(cap));
 return -1;
 }
-if (unlikely(NVME_CC_IOCQES(cc) <
- NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
-trace_pci_nvme_err_startfail_cqent_too_small(
-NVME_CC_IOCQES(cc),
-NVME_CTRL_CQES_MIN(cap));
-return -1;
-}
-if (unlikely(NVME_CC_IOCQES(cc) >
- NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
-trace_pci_nvme_err_startfail_cqent_too_large(
-NVME_CC_IOCQES(cc),
-NVME_CTRL_CQES_MAX(cap));
-return -1;
-}
-if (unlikely(NVME_CC_IOSQES(cc) <
- NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
-trace_pci_nvme_err_startfail_sqent_too_small(
-NVME_CC_IOSQES(cc),
-NVME_CTRL_SQES_MIN(cap));
-return -1;
-}
-if (unlikely(NVME_CC_IOSQES(cc) >
- NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
-trace_pci_nvme_err_startfail_sqent_too_large(
-NVME_CC_IOSQES(cc),
-NVME_CTRL_SQES_MAX(cap));
-return -1;
-}
 if (unlikely(!NVME_AQA_ASQS(aqa))) {
 trace_pci_nvme_err_startfail_asqent_sz_zero();
 return -1;
@@ -7268,8 +7248,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 n->page_bits = page_bits;
 n->page_size = page_size;
 n->max_prp_ents = n->page_size / sizeof(uint64_t);
-n->cqe_size = 1 << NVME_CC_IOCQES(cc);
-n->sqe_size = 1 << NVME_CC_IOSQES(cc);
 nvme_init_cq(>admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
 nvme_init_sq(>admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);
 
@@ -8238,8 +8216,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
 id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
 
-id->sqes = (0x6 << 4) | 0x6;
-id->cqes = (0x4 << 4) | 0x4;
+id->sqes = (NVME_SQES << 4) | NVME_SQES;
+id->cqes = (NVME_CQES << 4) | NVME_CQES;

[PULL 1/1] hw/nvme: fix endianness issue for shadow doorbells

2023-07-19 Thread Klaus Jensen

From: Klaus Jensen 

In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for
doorbell buffers"), we fixed shadow doorbells for big-endian guests
running on little endian hosts. But I did not fix little-endian guests
on big-endian hosts. Fix this.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1765
Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Reported-by: Thomas Huth 
Tested-by: Cédric Le Goater 
Tested-by: Thomas Huth 
Reviewed-by: Keith Busch 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8e8e870b9a80..dadc2dc7da10 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 PCIDevice *pci = PCI_DEVICE(n);
 uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
 uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
+uint32_t v;
 int i;
 
 /* Address should be page aligned */
@@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 NvmeCQueue *cq = n->cq[i];
 
 if (sq) {
+v = cpu_to_le32(sq->tail);
+
 /*
  * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
  * nvme_process_db() uses this hard-coded way to calculate
@@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
  */
 sq->db_addr = dbs_addr + (i << 3);
 sq->ei_addr = eis_addr + (i << 3);
-pci_dma_write(pci, sq->db_addr, >tail, sizeof(sq->tail));
+pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
 
 if (n->params.ioeventfd && sq->sqid != 0) {
 if (!nvme_init_sq_ioeventfd(sq)) {
@@ -6835,10 +6838,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 }
 
 if (cq) {
+v = cpu_to_le32(cq->head);
+
 /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */
 cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
 cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
-pci_dma_write(pci, cq->db_addr, >head, sizeof(cq->head));
+pci_dma_write(pci, cq->db_addr, , sizeof(cq->head));
 
 if (n->params.ioeventfd && cq->cqid != 0) {
 if (!nvme_init_cq_ioeventfd(cq)) {
@@ -7587,7 +7592,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, 
unsigned size)
 static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
 {
 PCIDevice *pci = PCI_DEVICE(n);
-uint32_t qid;
+uint32_t qid, v;
 
 if (unlikely(addr & ((1 << 2) - 1))) {
 NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
@@ -7654,7 +7659,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
 start_sqs = nvme_cq_full(cq) ? 1 : 0;
 cq->head = new_head;
 if (!qid && n->dbbuf_enabled) {
-pci_dma_write(pci, cq->db_addr, >head, sizeof(cq->head));
+v = cpu_to_le32(cq->head);
+pci_dma_write(pci, cq->db_addr, , sizeof(cq->head));
 }
 if (start_sqs) {
 NvmeSQueue *sq;
@@ -7714,6 +7720,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
 
 sq->tail = new_tail;
 if (!qid && n->dbbuf_enabled) {
+v = cpu_to_le32(sq->tail);
+
 /*
  * The spec states "the host shall also update the controller's
  * corresponding doorbell property to match the value of that entry
@@ -7727,7 +7735,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
  * including ones that run on Linux, are not updating Admin Queues,
  * so we can't trust reading it for an appropriate sq tail.
  */
-pci_dma_write(pci, sq->db_addr, >tail, sizeof(sq->tail));
+pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
 }
 
 qemu_bh_schedule(sq->bh);
-- 
2.41.0

[PULL 0/1] hw/nvme fixes

2023-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

The following changes since commit 361d5397355276e3007825cc17217c1e4d4320f7:

  Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into 
staging (2023-07-17 15:49:27 +0100)

are available in the Git repository at:

  https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request

for you to fetch changes up to ea3c76f1494d0c75873c3b470e6e048202661ad8:

  hw/nvme: fix endianness issue for shadow doorbells (2023-07-19 09:33:54 +0200)


hw/nvme fixes

* fix shadow doorbell endian issue
-BEGIN PGP SIGNATURE-

iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmS3kkAACgkQTeGvMW1P
DenG1ggArIHi1dQQBIG1ubzHx/C+93cybpKwT73/5wfO7BT8CCh1v+qrH/6SsYUT
5O7y1MaCLDV4ocf5dRQseXFK0tpjo7EqDnr25UhcSunQ+d2Tn7MAIuubQOFD+Axh
5gIwOEJbKqw9apJgnVWnInTBd//ManOgh6OyC1uJ+DEJE7ISJzLlJeWaBekiWpAA
hNL1zsR5+eTcwnewDRmMs4FlKBlSfgcNgNYnz8tfpnW0DzXKuiY4ITnk6kX9eMAM
kDlbjFjlgoTPZ8IsYcyhVCJMcH8jqY/LuZcaF7XHHsdX7fa5p17C6rR1hxVyDs+E
rydOtWetQDhXlyakE+Jp2RB3HLcSmg==
=j1TL
-END PGP SIGNATURE-



Klaus Jensen (1):
  hw/nvme: fix endianness issue for shadow doorbells

 hw/nvme/ctrl.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

-- 
2.41.0

Re: [PATCH] hw/nvme: fix endianness issue for shadow doorbells

2023-07-18 Thread Klaus Jensen

On Jul 18 13:18, Philippe Mathieu-Daudé wrote:
> On 18/7/23 12:35, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for
> > doorbell buffers"), we fixed shadow doorbells for big-endian guests
> > running on little endian hosts. But I did not fix little-endian guests
> > on big-endian hosts. Fix this.
> > 
> > Solves issue #1765.
> > 
> > Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
> > Cc: qemu-sta...@nongnu.org
> > Reported-by: Thomas Huth 
> > Signed-off-by: Klaus Jensen 
> > ---
> >   hw/nvme/ctrl.c | 18 +-
> >   1 file changed, 13 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > index 8e8e870b9a80..dadc2dc7da10 100644
> > --- a/hw/nvme/ctrl.c
> > +++ b/hw/nvme/ctrl.c
> > @@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
> > NvmeRequest *req)
> >   PCIDevice *pci = PCI_DEVICE(n);
> >   uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
> >   uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
> > +uint32_t v;
> >   int i;
> >   /* Address should be page aligned */
> > @@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
> > NvmeRequest *req)
> >   NvmeCQueue *cq = n->cq[i];
> >   if (sq) {
> > +v = cpu_to_le32(sq->tail);
> > +
> >   /*
> >* CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
> >* nvme_process_db() uses this hard-coded way to calculate
> > @@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
> > NvmeRequest *req)
> >*/
> >   sq->db_addr = dbs_addr + (i << 3);
> >   sq->ei_addr = eis_addr + (i << 3);
> > -pci_dma_write(pci, sq->db_addr, >tail, sizeof(sq->tail));
> > +pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
> 
> Or use the PCI DMA ldst API which does the swapping for you:
> 
>   stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED);
> 

Nice! That's definitely preferable. I'll queue that up for next, but
leave this patch as-is since it's been tested on a real big-endian host
and gotten its tags ;)


signature.asc
Description: PGP signature

[PATCH] hw/nvme: fix endianness issue for shadow doorbells

2023-07-18 Thread Klaus Jensen

From: Klaus Jensen 

In commit 2fda0726e514 ("hw/nvme: fix missing endian conversions for
doorbell buffers"), we fixed shadow doorbells for big-endian guests
running on little endian hosts. But I did not fix little-endian guests
on big-endian hosts. Fix this.

Solves issue #1765.

Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
Cc: qemu-sta...@nongnu.org
Reported-by: Thomas Huth 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8e8e870b9a80..dadc2dc7da10 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6801,6 +6801,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 PCIDevice *pci = PCI_DEVICE(n);
 uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
 uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
+uint32_t v;
 int i;
 
 /* Address should be page aligned */
@@ -6818,6 +6819,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 NvmeCQueue *cq = n->cq[i];
 
 if (sq) {
+v = cpu_to_le32(sq->tail);
+
 /*
  * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
  * nvme_process_db() uses this hard-coded way to calculate
@@ -6825,7 +6828,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
  */
 sq->db_addr = dbs_addr + (i << 3);
 sq->ei_addr = eis_addr + (i << 3);
-pci_dma_write(pci, sq->db_addr, >tail, sizeof(sq->tail));
+pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
 
 if (n->params.ioeventfd && sq->sqid != 0) {
 if (!nvme_init_sq_ioeventfd(sq)) {
@@ -6835,10 +6838,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 }
 
 if (cq) {
+v = cpu_to_le32(cq->head);
+
 /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */
 cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
 cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
-pci_dma_write(pci, cq->db_addr, >head, sizeof(cq->head));
+pci_dma_write(pci, cq->db_addr, , sizeof(cq->head));
 
 if (n->params.ioeventfd && cq->cqid != 0) {
 if (!nvme_init_cq_ioeventfd(cq)) {
@@ -7587,7 +7592,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, 
unsigned size)
 static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
 {
 PCIDevice *pci = PCI_DEVICE(n);
-uint32_t qid;
+uint32_t qid, v;
 
 if (unlikely(addr & ((1 << 2) - 1))) {
 NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
@@ -7654,7 +7659,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
 start_sqs = nvme_cq_full(cq) ? 1 : 0;
 cq->head = new_head;
 if (!qid && n->dbbuf_enabled) {
-pci_dma_write(pci, cq->db_addr, >head, sizeof(cq->head));
+v = cpu_to_le32(cq->head);
+pci_dma_write(pci, cq->db_addr, , sizeof(cq->head));
 }
 if (start_sqs) {
 NvmeSQueue *sq;
@@ -7714,6 +7720,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
 
 sq->tail = new_tail;
 if (!qid && n->dbbuf_enabled) {
+v = cpu_to_le32(sq->tail);
+
 /*
  * The spec states "the host shall also update the controller's
  * corresponding doorbell property to match the value of that entry
@@ -7727,7 +7735,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
  * including ones that run on Linux, are not updating Admin Queues,
  * so we can't trust reading it for an appropriate sq tail.
  */
-pci_dma_write(pci, sq->db_addr, >tail, sizeof(sq->tail));
+pci_dma_write(pci, sq->db_addr, , sizeof(sq->tail));
 }
 
 qemu_bh_schedule(sq->bh);
-- 
2.41.0

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 2270 matches

Mail list logo