date:20240306

Re: [PATCH v7 9/9] qemu-options.hx: Document the virtio-iommu-pci aw-bits option

2024-03-06 Thread Eric Auger




On 3/6/24 21:48, Philippe Mathieu-Daudé wrote:
> On 6/3/24 21:32, Eric Auger wrote:
>> Document the new aw-bits option.
>>
>> Signed-off-by: Eric Auger 
>> Reviewed-by: Cédric Le Goater 
>>
>> ---
>>
>> v4 -> v5
>> - tweek the aw-bits option description according to Cédric's
>>    suggestion
>> ---
>>   qemu-options.hx | 3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 757df3eac0..87959ede08 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -1180,6 +1180,9 @@ SRST
>>   This decides the default granule to be be exposed by the
>>   virtio-iommu. If host, the granule matches the host page size.
>>   +    ``aw-bits=val`` (val between 32 and 64, default depends on
>> machine)
>> +    This decides the address width of IOVA address space. It
>> defaults
>
> "It defaults"?
Yes needs to be removed.
>
> IMO this should be squashed in patch 6/9 "virtio-iommu: Add an option
> to define the input range width".
in [6/9] the default does not yet depend on machine, hence my choice to
keep it separate and after 8/9

Eric
>
>>   ERST
>>     DEF("name", HAS_ARG, QEMU_OPTION_name,
>

[PATCH V3 1/1] target/loongarch: Fixed tlb huge page loading issue

2024-03-06 Thread Xianglai Li

When we use qemu tcg simulation, the page size of bios is 4KB.
When using the level 2 super large page (page size is 1G) to create the page 
table,
it is found that the content of the corresponding address space is abnormal,
resulting in the bios can not start the operating system and graphical 
interface normally.

The lddir and ldpte instruction emulation has
a problem with the use of super large page processing above level 2.
The page size is not correctly calculated,
resulting in the wrong page size of the table entry found by tlb.

Signed-off-by: Xianglai Li 
Cc: maob...@loongson.cn
Cc: Song Gao 
Cc: Xiaojuan Yang 
Cc: zhaotian...@loongson.cn
---
 target/loongarch/internals.h  |  8 +++
 target/loongarch/tcg/tlb_helper.c | 92 +++
 2 files changed, 76 insertions(+), 24 deletions(-)

diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index a2fc54c8a7..55ceb4c079 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -16,6 +16,14 @@
 #define TARGET_PHYS_MASK MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS)
 #define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS)
 
+/*
+ * The [13..14]bits of the entry base address of the lddir/ldpte
+ * directive are used to represent the level of the large page
+ * when processing the huge page entry
+ */
+#define HUGE_PAGE_LEVEL_SHIFT   13
+#define HUGE_PAGE_LEVEL_MASK MAKE_64BIT_MASK(HUGE_PAGE_LEVEL_SHIFT, 2)
+
 /* Global bit used for lddir/ldpte */
 #define LOONGARCH_PAGE_HUGE_SHIFT   6
 /* Global bit for huge page */
diff --git a/target/loongarch/tcg/tlb_helper.c 
b/target/loongarch/tcg/tlb_helper.c
index a08c08b05a..2db77b48c5 100644
--- a/target/loongarch/tcg/tlb_helper.c
+++ b/target/loongarch/tcg/tlb_helper.c
@@ -17,6 +17,9 @@
 #include "exec/log.h"
 #include "cpu-csr.h"
 
+static int get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base,
+  uint64_t *dir_width, target_ulong level);
+
 static void raise_mmu_exception(CPULoongArchState *env, target_ulong address,
 MMUAccessType access_type, int tlb_error)
 {
@@ -487,6 +490,16 @@ target_ulong helper_lddir(CPULoongArchState *env, 
target_ulong base,
 int shift;
 uint64_t dir_base, dir_width;
 bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1;
+uint64_t huge_page_level = base & HUGE_PAGE_LEVEL_MASK;
+
+if (huge) {
+if (huge_page_level) {
+return base;
+} else {
+huge_page_level = (level & 0x3) << HUGE_PAGE_LEVEL_SHIFT;
+return base | huge_page_level;
+}
+}
 
 badvaddr = env->CSR_TLBRBADV;
 base = base & TARGET_PHYS_MASK;
@@ -495,30 +508,10 @@ target_ulong helper_lddir(CPULoongArchState *env, 
target_ulong base,
 shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH);
 shift = (shift + 1) * 3;
 
-if (huge) {
-return base;
-}
-switch (level) {
-case 1:
-dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE);
-dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH);
-break;
-case 2:
-dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE);
-dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH);
-break;
-case 3:
-dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE);
-dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH);
-break;
-case 4:
-dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE);
-dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH);
-break;
-default:
-do_raise_exception(env, EXCCODE_INE, GETPC());
+if (get_dir_base_width(env, _base, _width, level) != 0) {
 return 0;
 }
+
 index = (badvaddr >> dir_base) & ((1 << dir_width) - 1);
 phys = base | index << shift;
 ret = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK;
@@ -534,17 +527,38 @@ void helper_ldpte(CPULoongArchState *env, target_ulong 
base, target_ulong odd,
 bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1;
 uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE);
 uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH);
+uint64_t dir_base, dir_width;
+uint64_t huge_page_level;
 
 base = base & TARGET_PHYS_MASK;
 
 if (huge) {
-/* Huge Page. base is paddr */
+/*
+ * Gets the huge page level
+ * Clears the huge page level information in the address
+ * Clears huge page bit
+ * Gets huge page size
+ */
+huge_page_level = (base & HUGE_PAGE_LEVEL_MASK) >>
+  HUGE_PAGE_LEVEL_SHIFT;
+
+base &= ~HUGE_PAGE_LEVEL_MASK;
+
 tmp0 = base ^ (1 << LOONGARCH_PAGE_HUGE_SHIFT);
 /* Move Global bit */
 tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT))  >>
 LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT |
 (tmp0 &

[PATCH V3 0/1] target/loongarch: Fixed tlb huge page loading issue

2024-03-06 Thread Xianglai Li

When we use qemu tcg simulation, the page size of bios is 4KB.
When using the level 2 super large page (page size is 1G) to create the page 
table,
it is found that the content of the corresponding address space is abnormal,
resulting in the bios can not start the operating system and graphical 
interface normally.

The lddir and ldpte instruction emulation has
a problem with the use of super large page processing above level 2.
The page size is not correctly calculated,
resulting in the wrong page size of the table entry found by tlb.

Changes log:
V2->V3:
Delete the intermediate variable LDDIR_PS, and implement lddir and ldpte
huge pages by referring to the latest architecture reference manual.

V1->V2:
Modified the patch title format and Enrich the commit mesg description

Cc: maob...@loongson.cn
Cc: Song Gao 
Cc: Xiaojuan Yang 
Cc: zhaotian...@loongson.cn

Xianglai Li (1):
  target/loongarch: Fixed tlb huge page loading issue

 target/loongarch/internals.h  |  8 +++
 target/loongarch/tcg/tlb_helper.c | 92 +++
 2 files changed, 76 insertions(+), 24 deletions(-)

-- 
2.39.1

Re: [RISC-V][tech-server-soc] [RFC 2/2] target/riscv: Add server platform reference cpu

2024-03-06 Thread Wu, Fei

On 3/6/2024 9:26 PM, Wu, Fei wrote:
> On 3/5/2024 1:58 PM, Wu, Fei wrote:
>> On 3/5/2024 3:43 AM, Daniel Henrique Barboza wrote:
>>>
>>>
>>> On 3/4/24 07:25, Fei Wu wrote:
 The harts requirements of RISC-V server platform [1] require RVA23 ISA
 profile support, plus Sv48, Svadu, H, Sscofmpf etc. This patch provides
 a virt CPU type (rvsp-ref) as compliant as possible.

 [1]
 https://github.com/riscv-non-isa/riscv-server-platform/blob/main/server_platform_requirements.adoc

 Signed-off-by: Fei Wu 
 --->   hw/riscv/server_platform_ref.c |  6 +++-
   target/riscv/cpu-qom.h |  1 +
   target/riscv/cpu.c | 62 ++
   3 files changed, 68 insertions(+), 1 deletion(-)

 diff --git a/hw/riscv/server_platform_ref.c
 b/hw/riscv/server_platform_ref.c
 index ae90c4b27a..52ec607cee 100644
 --- a/hw/riscv/server_platform_ref.c
 +++ b/hw/riscv/server_platform_ref.c
 @@ -1205,11 +1205,15 @@ static void
 rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
   {
   char str[128];
   MachineClass *mc = MACHINE_CLASS(oc);
 +    static const char * const valid_cpu_types[] = {
 +    TYPE_RISCV_CPU_RVSP_REF,
 +    };
     mc->desc = "RISC-V Server SoC Reference board";
   mc->init = rvsp_ref_machine_init;
   mc->max_cpus = RVSP_CPUS_MAX;
 -    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
 +    mc->default_cpu_type = TYPE_RISCV_CPU_RVSP_REF;
 +    mc->valid_cpu_types = valid_cpu_types;
>>>
>>> I suggest introducing this patch first, then the new machine type that
>>> will use it as a default
>>> CPU. The reason is to facilitate future bisects. If we introduce the
>>> board first, a future bisect
>>> might hit the previous patch, the board will be run using RV64 instead
>>> of the correct CPU, and
>>> we'll have different results because of it.
>>>
>> Good suggestion.
>>
   mc->pci_allow_0_address = true;
   mc->default_nic = "e1000e";
   mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
 diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
 index 3670cfe6d9..adb934d19e 100644
 --- a/target/riscv/cpu-qom.h
 +++ b/target/riscv/cpu-qom.h
 @@ -49,6 +49,7 @@
   #define TYPE_RISCV_CPU_SIFIVE_U54  
 RISCV_CPU_TYPE_NAME("sifive-u54")
   #define TYPE_RISCV_CPU_THEAD_C906  
 RISCV_CPU_TYPE_NAME("thead-c906")
   #define TYPE_RISCV_CPU_VEYRON_V1   
 RISCV_CPU_TYPE_NAME("veyron-v1")
 +#define TYPE_RISCV_CPU_RVSP_REF RISCV_CPU_TYPE_NAME("rvsp-ref")
   #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host")
     OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
 diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
 index 5ff0192c52..bc91be702b 100644
 --- a/target/riscv/cpu.c
 +++ b/target/riscv/cpu.c
 @@ -2282,6 +2282,67 @@ static void rva22s64_profile_cpu_init(Object *obj)
     RVA22S64.enabled = true;
   }
 +
 +static void rv64_rvsp_ref_cpu_init(Object *obj)
 +{
 +    CPURISCVState *env = _CPU(obj)->env;
 +    RISCVCPU *cpu = RISCV_CPU(obj);
 +
 +    riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV);
 +
 +    /* FIXME: change to 1.13 */
 +    env->priv_ver = PRIV_VERSION_1_12_0;
 +
 +    /* RVA22U64 */
 +    cpu->cfg.mmu = true;
 +    cpu->cfg.ext_zifencei = true;
 +    cpu->cfg.ext_zicsr = true;
 +    cpu->cfg.ext_zicntr = true;
 +    cpu->cfg.ext_zihpm = true;
 +    cpu->cfg.ext_zihintpause = true;
 +    cpu->cfg.ext_zba = true;
 +    cpu->cfg.ext_zbb = true;
 +    cpu->cfg.ext_zbs = true;
 +    cpu->cfg.zic64b = true;
 +    cpu->cfg.ext_zicbom = true;
 +    cpu->cfg.ext_zicbop = true;
 +    cpu->cfg.ext_zicboz = true;
 +    cpu->cfg.cbom_blocksize = 64;
 +    cpu->cfg.cbop_blocksize = 64;
 +    cpu->cfg.cboz_blocksize = 64;
 +    cpu->cfg.ext_zfhmin = true;
 +    cpu->cfg.ext_zkt = true;
>>>
>>> You can change this whole block with:
>>>
>>> RVA22U64.enabled = true;
>>>
>>>
>>> riscv_cpu_add_profiles() will check if we have a profile enabled and, if
>>> that's the
>>> case, we'll enable all its extensions in the CPU.
>>>
>>> In the near future, when we implement a proper RVA23 support, we'll be
>>> able to just do
>>> a single RVA23S64.enabled = true in this cpu_init(). But for now we can
>>> at least declare
>>> RVA22U64 (perhaps RVA22S64) support for this CPU.
>>>
> 
> Hi Daniel,
> 
> I'm not sure if it's a regression or the usage has been changed. I'm not
> able to use '-cpu rva22s64' on latest qemu (db596ae190).
> 
I did a quick git bisect and found that commit d06f28db6 "target/riscv:
move 'mmu' to riscv_cpu_properties[]" disabled mmu by default, so that
an explicit mmu option should be added to qemu command line like '-cpu

Re: [PATCH v7 6/9] virtio-iommu: Add an option to define the input range width

2024-03-06 Thread Eric Auger




On 3/6/24 21:45, Philippe Mathieu-Daudé wrote:
> On 6/3/24 21:32, Eric Auger wrote:
>> aw-bits is a new option that allows to set the bit width of
>> the input address range. This value will be used as a default for
>> the device config input_range.end. By default it is set to 64 bits
>> which is the current value.
>>
>> Signed-off-by: Eric Auger 
>> Reviewed-by: Zhenzhong Duan 
>> Reviewed-by: Cédric Le Goater 
>>
>> ---
>>
>> v1 -> v2:
>> - Check the aw-bits value is within [32,64]
>> ---
>>   include/hw/virtio/virtio-iommu.h | 1 +
>>   hw/virtio/virtio-iommu.c | 7 ++-
>>   2 files changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/virtio/virtio-iommu.h
>> b/include/hw/virtio/virtio-iommu.h
>> index 67ea5022af..83a52cc446 100644
>> --- a/include/hw/virtio/virtio-iommu.h
>> +++ b/include/hw/virtio/virtio-iommu.h
>> @@ -68,6 +68,7 @@ struct VirtIOIOMMU {
>>   Notifier machine_done;
>>   bool granule_frozen;
>>   GranuleMode granule_mode;
>> +    uint8_t aw_bits;
>>   };
>>     #endif
>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>> index aab97e1527..9b2813188b 100644
>> --- a/hw/virtio/virtio-iommu.c
>> +++ b/hw/virtio/virtio-iommu.c
>> @@ -1314,7 +1314,11 @@ static void
>> virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>>    * in vfio realize
>>    */
>>   s->config.bypass = s->boot_bypass;
>> -    s->config.input_range.end = UINT64_MAX;
>> +    if (s->aw_bits < 32 || s->aw_bits > 64) {
>> +    error_setg(errp, "aw-bits must be within [32,64]");
>
> Don't we need to return?
It looks better. Nevertheless this was tested and it gives the expected
behavior.

Thanks

Eric
>
>> +    }
>> +    s->config.input_range.end =
>> +    s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1;
>>     switch (s->granule_mode) {
>>   case GRANULE_MODE_4K:
>> @@ -1544,6 +1548,7 @@ static Property virtio_iommu_properties[] = {
>>   DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
>>   DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode,
>>    GRANULE_MODE_HOST),
>> +    DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
>>   DEFINE_PROP_END_OF_LIST(),
>>   };
>>   
>

Re: [PATCH v7 4/9] qemu-options.hx: Document the virtio-iommu-pci granule option

2024-03-06 Thread Eric Auger

Hi Philippe,

On 3/6/24 21:44, Philippe Mathieu-Daudé wrote:
> On 6/3/24 21:32, Eric Auger wrote:
>> We are missing an entry for the virtio-iommu-pci device. Add the
>> information on which machine it is currently supported and document
>> the new granule option.
>>
>> Signed-off-by: Eric Auger 
>> ---
>>   qemu-options.hx | 8 
>>   1 file changed, 8 insertions(+)
>>
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 9a47385c15..757df3eac0 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -1172,6 +1172,14 @@ SRST
>>   Please also refer to the wiki page for general scenarios of VT-d
>>   emulation in QEMU: https://wiki.qemu.org/Features/VT-d.
>>   +``-device virtio-iommu-pci[,option=...]``
>> +    This is only supported by ``-machine q35`` and ``-machine virt``.
>
> Don't we need to precise x86 and arm here?
Well I got inspired of intel_iommu doc. I think this is sufficiently
precise, no?

Thanks

Eric
>
>> +    It supports below options:
>> +
>> +    ``granule=val`` (possible values are 4k, 8k, 16k, 64k and host;
>> default: host)
>> +    This decides the default granule to be be exposed by the
>> +    virtio-iommu. If host, the granule matches the host page size.
>> +
>>   ERST
>>     DEF("name", HAS_ARG, QEMU_OPTION_name,
>

Re: [PATCH v4 21/25] vfio: Reverse test on vfio_get_dirty_bitmap()

2024-03-06 Thread Cédric Le Goater


On 3/6/24 21:51, Philippe Mathieu-Daudé wrote:

On 6/3/24 14:34, Cédric Le Goater wrote:

It will simplify the changes coming after.

Signed-off-by: Cédric Le Goater 
---
  hw/vfio/common.c | 22 +-
  1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
5b2e6a179cdd5f8ca5be84b7097661e96b391456..6820d2efe4923d5043da7eb8deecb6ff20e1fd16
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1241,16 +1241,20 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier 
*n, IOMMUTLBEntry *iotlb)
  }
  rcu_read_lock();
-    if (vfio_get_xlat_addr(iotlb, NULL, _addr, NULL)) {
-    ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
-    translated_addr);
-    if (ret) {
-    error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova, iotlb->addr_mask + 1, ret,
- strerror(-ret));
-    }
+    if (!vfio_get_xlat_addr(iotlb, NULL, _addr, NULL)) {
+    goto out_lock;
  }
+
+    ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
+    translated_addr);
+    if (ret) {
+    error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx") = %d (%s)",
+ bcontainer, iova, iotlb->addr_mask + 1, ret,
+ strerror(-ret));
+    }
+
+out_lock:


Alternatively use WITH_RCU_READ_LOCK_GUARD() to avoid label.


Sure. I remember your patch. I will resend with your suggestion when
the first part of this series is addressed.

Thanks,

C.

Re: [PATCH 00/14] Cleanup on SMP and its test

2024-03-06 Thread Zhao Liu

Hi Philippe,

> In a previous community call, Zhao asked us how his work will scale
> in the heterogeneous context.
> 
> My first idea is CPUs must belong to a cluster.

Thank you for considering this!

At present, cluster is a arch-specific topology level used by ARM.
So maybe we need call this abstraction as another name not "cluster"?

I guess the cluster you mentioned is the cluster device used in TCG,
right? I also tried to eliminate differences between cluster devices
and the cluster level in CPU topology [1].

My previous proposal introduced a abstract topology device [2]. And all
topology specific levels are derived from the underlying topology
device, including CPU.

I feel like this topology device abstraction seems close to your idea,
am I understanding it correctly? ;-)

> For machines without
> explicit cluster, we could always create the first one. Then -smp
> would become a sugar property of the first cluster. Next -smp could
> also be sugar property of the next cluster.

Could you please explain the above ideas more?

It feels we need to split -smp for each cluster. But I'm not sure if
sugar property means defining smp-like properties for each cluster.

Or is there a command line example? ;-)

[1]: 
https://lore.kernel.org/qemu-devel/20231130144203.2307629-23-zhao1@linux.intel.com/
[2]: 
https://lore.kernel.org/qemu-devel/20231130144203.2307629-9-zhao1@linux.intel.com/

Thanks,
Zhao

Re: [PATCH 02/14] hw/core/machine-smp: Deprecate unsupported "parameter=1" SMP configurations

2024-03-06 Thread Zhao Liu

On Thu, Mar 07, 2024 at 07:22:10AM +0100, Thomas Huth wrote:
> Date: Thu, 7 Mar 2024 07:22:10 +0100
> From: Thomas Huth 
> Subject: Re: [PATCH 02/14] hw/core/machine-smp: Deprecate unsupported
>  "parameter=1" SMP configurations
> 
> On 06/03/2024 10.53, Zhao Liu wrote:
> > From: Zhao Liu 
> > 
> > Currentlt, it was allowed for users to specify the unsupported
> 
> s/Currentlt/Currently/
> 
> > topology parameter as "1". For example, x86 PC machine doesn't
> > support drawer/book/cluster topology levels, but user could specify
> > "-smp drawers=1,books=1,clusters=1".
> > 
> > This is meaningless and confusing, so that the support for this kind of
> > configurations is marked depresated since 9.0. And report warning
> 
> s/depresated/deprecated/
> 
> > message for such case like:
> > 
> > qemu-system-x86_64: warning: Deprecated CPU topology (considered invalid):
> >  Unsupported clusters parameter mustn't be specified as 
> > 1
> > qemu-system-x86_64: warning: Deprecated CPU topology (considered invalid):
> >  Unsupported books parameter mustn't be specified as 1
> > qemu-system-x86_64: warning: Deprecated CPU topology (considered invalid):
> >  Unsupported drawers parameter mustn't be specified as 1
> > 
> > Users have to ensure that all the topology members described with -smp
> > are supported by the target machine.
> > 
> > Cc: de...@lists.libvirt.org
> > Signed-off-by: Zhao Liu 
> > ---
> >   docs/about/deprecated.rst | 14 +
> >   hw/core/machine-smp.c | 63 +--
> >   2 files changed, 61 insertions(+), 16 deletions(-)
> > 
> > diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
> > index 872974640252..2e782e83e952 100644
> > --- a/docs/about/deprecated.rst
> > +++ b/docs/about/deprecated.rst
> > @@ -47,6 +47,20 @@ as short-form boolean values, and passed to plugins as 
> > ``arg_name=on``.
> >   However, short-form booleans are deprecated and full explicit 
> > ``arg_name=on``
> >   form is preferred.
> > +``-smp`` (Unsopported "parameter=1" SMP configurations) (since 9.0)
> 
> s/Unsopported/Unsupported/
> 
> > +'''
> > +
> > +Specified CPU topology parameters must be supported by the machine.
> > +
> > +In the SMP configuration, users should provide the CPU topology parameters 
> > that
> > +are supported by the target machine.
> > +
> > +However, historically it was allowed for users to specify the unsupported
> > +topology parameter as "1", which is meaningless. So support for this kind 
> > of
> > +configurations (e.g. -smp drawers=1,books=1,clusters=1 for x86 PC machine) 
> > is
> > +marked depresated since 9.0, users have to ensure that all the topology 
> > members
> 
> s/depresated/deprecated/
> 
> > +described with -smp are supported by the target machine.
> > +
> >   QEMU Machine Protocol (QMP) commands
> >   
> > diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
> > index 96533886b14e..50a5a40dbc3d 100644
> > --- a/hw/core/machine-smp.c
> > +++ b/hw/core/machine-smp.c
> > @@ -112,30 +112,61 @@ void machine_parse_smp_config(MachineState *ms,
> >   /*
> >* If not supported by the machine, a topology parameter must be
> > - * omitted or specified equal to 1.
> > + * omitted.
> >*/
> > -if (!mc->smp_props.dies_supported && dies > 1) {
> > -error_setg(errp, "dies not supported by this machine's CPU 
> > topology");
> > -return;
> > -}
> > -if (!mc->smp_props.clusters_supported && clusters > 1) {
> > -error_setg(errp, "clusters not supported by this machine's CPU 
> > topology");
> > -return;
> > +if (!mc->smp_props.clusters_supported && config->has_clusters) {
> > +if (config->clusters > 1) {
> > +error_setg(errp, "clusters not supported by this "
> > +   "machine's CPU topology");
> > +return;
> > +} else {
> > +/* Here clusters only equals 1 since we've checked zero case. 
> > */
> > +warn_report("Deprecated CPU topology (considered invalid): "
> > +"Unsupported clusters parameter mustn't be "
> > +"specified as 1");
> > +}
> >   }
> > +clusters = clusters > 0 ? clusters : 1;
> > +if (!mc->smp_props.dies_supported && config->has_dies) {
> > +if (config->dies > 1) {
> > +error_setg(errp, "dies not supported by this "
> > +   "machine's CPU topology");
> > +return;
> > +} else {
> > +/* Here dies only equals 1 since we've checked zero case. */
> > +warn_report("Deprecated CPU topology (considered invalid): "
> > +"Unsupported dies parameter mustn't be "
> > +"specified as 1");
> > +}
> > +}
> >

Re: [PATCH 07/14] tests/unit/test-smp-parse: Bump max_cpus to 4096

2024-03-06 Thread Zhao Liu

Hi Thomas,

> >   /*
> > - * config: -smp 512
> > + * config: -smp 4096
> >* The test machine should tweak the supported max CPUs to
> > - * 511 (MAX_CPUS - 1) for testing.
> > + * 4095 (MAX_CPUS - 1) for testing.
> >*/
> > -.config = SMP_CONFIG_GENERIC(T, MAX_CPUS, F, 0, F, 0, F, 0, F, 0),
> > -.expect_error = "Invalid SMP CPUs 512. The max CPUs supported "
> > -"by machine '" SMP_MACHINE_NAME "' is 511",
> > +.config = SMP_CONFIG_GENERIC(T, 4096, F, 0, F, 0, F, 0, F, 0),
> > +.expect_error = "Invalid SMP CPUs 4096. The max CPUs supported "
> 
> You could maybe use stringify(MAX_CPUS) in above line
> (but it won't work for the 4095 below, so it's maybe not worth the effort)

Yes, it's also because of corner cases like 4095 that I don't do such a
thorough cleanup here.
 
> > +"by machine '" SMP_MACHINE_NAME "' is 4095",
> >   },
> >   };
> 
> Reviewed-by: Thomas Huth 
> 

Thanks!

Zhao

RE: [PATCH v4 3/8] configure: add --enable-qpl build option

2024-03-06 Thread Liu, Yuan1

> -Original Message-
> From: Fabiano Rosas 
> Sent: Wednesday, March 6, 2024 7:56 PM
> To: Liu, Yuan1 ; pet...@redhat.com
> Cc: qemu-devel@nongnu.org; hao.xi...@bytedance.com;
> bryan.zh...@bytedance.com; Zou, Nanhai 
> Subject: RE: [PATCH v4 3/8] configure: add --enable-qpl build option
> 
> "Liu, Yuan1"  writes:
> 
> >> -Original Message-
> >> From: Fabiano Rosas 
> >> Sent: Wednesday, March 6, 2024 4:32 AM
> >> To: Liu, Yuan1 ; pet...@redhat.com
> >> Cc: qemu-devel@nongnu.org; hao.xi...@bytedance.com;
> >> bryan.zh...@bytedance.com; Liu, Yuan1 ; Zou,
> Nanhai
> >> 
> >> Subject: Re: [PATCH v4 3/8] configure: add --enable-qpl build option
> >>
> >> Yuan Liu  writes:
> >>
> >> > add --enable-qpl and --disable-qpl options to enable and disable
> >> > the QPL compression method for multifd migration.
> >> >
> >> > the Query Processing Library (QPL) is an open-source library
> >> > that supports data compression and decompression features.
> >> >
> >> > The QPL compression is based on the deflate compression algorithm
> >> > and use Intel In-Memory Analytics Accelerator(IAA) hardware for
> >> > compression and decompression acceleration.
> >> >
> >> > Please refer to the following for more information about QPL
> >> >
> >>
> https://intel.github.io/qpl/documentation/introduction_docs/introduction.h
> >> tml
> >> >
> >> > Signed-off-by: Yuan Liu 
> >> > Reviewed-by: Nanhai Zou 
> >> > ---
> >> >  meson.build   | 18 ++
> >> >  meson_options.txt |  2 ++
> >> >  scripts/meson-buildoptions.sh |  3 +++
> >> >  3 files changed, 23 insertions(+)
> >> >
> >> > diff --git a/meson.build b/meson.build
> >> > index c1dc83e4c0..2dea1e6834 100644
> >> > --- a/meson.build
> >> > +++ b/meson.build
> >> > @@ -1197,6 +1197,22 @@ if not get_option('zstd').auto() or have_block
> >> >  required: get_option('zstd'),
> >> >  method: 'pkg-config')
> >> >  endif
> >> > +qpl = not_found
> >> > +if not get_option('qpl').auto()
> >> > +  libqpl = cc.find_library('qpl', required: false)
> >> > +  if not libqpl.found()
> >> > +error('libqpl not found, please install it from ' +
> >> > +
> >>
> 'https://intel.github.io/qpl/documentation/get_started_docs/installation.h
> >> tml')
> >> > +  endif
> >> > +  libaccel = cc.find_library('accel-config', required: false)
> >> > +  if not libaccel.found()
> >> > +error('libaccel-config not found, please install it from ' +
> >> > +'https://github.com/intel/idxd-config')
> >>
> >> accel-config seems to be packaged by many distros, I'm not sure we need
> >> to reference the repository here.
> >>
> >> https://repology.org/project/accel-config/versions
> >
> > Yes, accel-config has been added to many distributions, I will use
> pkgconfig to
> > detect the libaccel and the version(at least v4.0).
> >
> > I have a question, I didn't find accel-config installation package from
> > https://repology.org/project/accel-config/versions. Does using this link
> also
> > require the user to build an accel-config package, and then install it?
> 
> That is just an aggregated list of distros and the version of the
> package they provide in their repos. So I'm just pointing out to you
> that there seems to be a packaged accel-config for most distros
> already. Which means we just want to say "install accel-config" and
> users should be able to use their distro's package manager.
> 
> >
> > It is easy to install accel-config using the installation package, but I
> didn't
> > find a repo that provides accel-config installation packages for most
> distributions.
> >
> > First check accel-config is available through pktconfig, and if it is
> not available,
> > prompts users to install it from https://github.com/intel/idxd-config,
> is it OK?
> 
> There's no need, just check if its available and suggest the user to
> install it. We already have the link in the docs.

Get it, thanks~

> >
> >> > +  endif
> >> > +  qpl = declare_dependency(dependencies: [libqpl, libaccel,
> >> > +cc.find_library('dl', required: get_option('qpl'))],
> >> > +link_args: ['-lstdc++'])
> >> > +endif
> >> >  virgl = not_found
> >> >
> >> >  have_vhost_user_gpu = have_tools and host_os == 'linux' and
> >> pixman.found()
> >> > @@ -2298,6 +2314,7 @@ config_host_data.set('CONFIG_MALLOC_TRIM',
> >> has_malloc_trim)
> >> >  config_host_data.set('CONFIG_STATX', has_statx)
> >> >  config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id)
> >> >  config_host_data.set('CONFIG_ZSTD', zstd.found())
> >> > +config_host_data.set('CONFIG_QPL', qpl.found())
> >> >  config_host_data.set('CONFIG_FUSE', fuse.found())
> >> >  config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found())
> >> >  config_host_data.set('CONFIG_SPICE_PROTOCOL',
> spice_protocol.found())
> >> > @@ -4438,6 +4455,7 @@ summary_info += {'snappy support':snappy}
> >> >  summary_info += {'bzip2 support': libbzip2}
> >> >  summary_info += {'lzfse support':

Re: [PATCH v5 08/65] kvm: handle KVM_EXIT_MEMORY_FAULT

2024-03-06 Thread Xiaoyao Li


On 3/5/2024 5:10 PM, Isaku Yamahata wrote:

On Thu, Feb 29, 2024 at 01:36:29AM -0500,
Xiaoyao Li  wrote:


From: Chao Peng 

When geeting KVM_EXIT_MEMORY_FAULT exit, it indicates userspace needs to
do the memory conversion on the RAMBlock to turn the memory into desired
attribute, i.e., private/shared.

Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when
KVM_EXIT_MEMORY_FAULT happens.

Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has
guest_memfd memory backend.

Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is
added.

When page is converted from shared to private, the original shared
memory can be discarded via ram_block_discard_range(). Note, shared
memory can be discarded only when it's not back'ed by hugetlb because
hugetlb is supposed to be pre-allocated and no need for discarding.

Signed-off-by: Chao Peng 
Co-developed-by: Xiaoyao Li 
Signed-off-by: Xiaoyao Li 

---
Changes in v4:
- open-coded ram_block_discard logic;
- change warn_report() to error_report(); (Daniel)
---
  accel/kvm/kvm-all.c | 94 -
  1 file changed, 84 insertions(+), 10 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 70d482a2c936..87e4275932a7 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2903,6 +2903,68 @@ static void kvm_eat_signals(CPUState *cpu)
  } while (sigismember(, SIG_IPI));
  }
  
+static int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)

+{
+MemoryRegionSection section;
+ram_addr_t offset;
+MemoryRegion *mr;
+RAMBlock *rb;
+void *addr;
+int ret = -1;
+
+if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
+!QEMU_PTR_IS_ALIGNED(size, qemu_host_page_size)) {
+return -1;
+}
+
+if (!size) {
+return -1;
+}
+
+section = memory_region_find(get_system_memory(), start, size);
+mr = section.mr;
+if (!mr) {
+return -1;
+}
+
+if (memory_region_has_guest_memfd(mr)) {
+if (to_private) {
+ret = kvm_set_memory_attributes_private(start, size);
+} else {
+ret = kvm_set_memory_attributes_shared(start, size);
+}
+
+if (ret) {
+memory_region_unref(section.mr);
+return ret;
+}
+
+addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
+rb = qemu_ram_block_from_host(addr, false, );
+
+if (to_private) {
+if (rb->page_size != qemu_host_page_size) {
+/*
+* shared memory is back'ed by  hugetlb, which is supposed to be
+* pre-allocated and doesn't need to be discarded
+*/
+return 0;


The reference count leaks. Add memory_region_unref() is needed.


thanks for catching it. Will fix it in next version.


Otherwise looks good to me.
Reviewed-by: Isaku Yamahata

Re: [RFC 1/2] hw/riscv: Add server platform reference machine

2024-03-06 Thread Wu, Fei

On 3/7/2024 8:48 AM, Alistair Francis wrote:
> On Thu, Mar 7, 2024 at 5:13 AM Atish Kumar Patra  wrote:
>>
>> On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei  wrote:
>>>
>>> On 3/6/2024 8:19 AM, Alistair Francis wrote:
 On Mon, Mar 4, 2024 at 8:28 PM Fei Wu  wrote:
>
> The RISC-V Server Platform specification[1] defines a standardized set
> of hardware and software capabilities, that portable system software,
> such as OS and hypervisors can rely on being present in a RISC-V server
> platform.
>
> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
> short) machine type is added to provide a environment for firmware/OS
> development and testing. The main features included in rvsp-ref are:
>
>  - Based on riscv virt machine type
>  - A new memory map as close as virt machine as possible
>  - A new virt CPU type rvsp-ref-cpu for server platform compliance
>  - AIA
>  - PCIe AHCI
>  - PCIe NIC
>  - No virtio device
>  - No fw_cfg device
>  - No ACPI table provided
>  - Only minimal device tree nodes
>
> [1] https://github.com/riscv-non-isa/riscv-server-platform

 + Atish

>
> Signed-off-by: Fei Wu 
> ---
>  configs/devices/riscv64-softmmu/default.mak |1 +
>  hw/riscv/Kconfig|   13 +
>  hw/riscv/meson.build|1 +
>  hw/riscv/server_platform_ref.c  | 1244 +++
>  4 files changed, 1259 insertions(+)
>  create mode 100644 hw/riscv/server_platform_ref.c
>
> diff --git a/configs/devices/riscv64-softmmu/default.mak 
> b/configs/devices/riscv64-softmmu/default.mak
> index 3f68059448..a1d98e49ef 100644
> --- a/configs/devices/riscv64-softmmu/default.mak
> +++ b/configs/devices/riscv64-softmmu/default.mak
> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
>  CONFIG_SIFIVE_E=y
>  CONFIG_SIFIVE_U=y
>  CONFIG_RISCV_VIRT=y
> +CONFIG_SERVER_PLATFORM_REF=y
>  CONFIG_MICROCHIP_PFSOC=y
>  CONFIG_SHAKTI_C=y
> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
> index 5d644eb7b1..debac5a7f5 100644
> --- a/hw/riscv/Kconfig
> +++ b/hw/riscv/Kconfig
> @@ -48,6 +48,19 @@ config RISCV_VIRT
>  select ACPI
>  select ACPI_PCI
>
> +config SERVER_PLATFORM_REF
> +bool
> +select RISCV_NUMA
> +select GOLDFISH_RTC
> +select PCI
> +select PCI_EXPRESS_GENERIC_BRIDGE
> +select PFLASH_CFI01
> +select SERIAL
> +select RISCV_ACLINT
> +select RISCV_APLIC
> +select RISCV_IMSIC
> +select SIFIVE_TEST

 Do we really need SiFive Test in the server platform?

>>> It's used to reset the system, is there any better choice?
> 
> If we add this now we are stuck with it forever (or at least a long
> time). So it'd be nice to think about these and decide if these really
> are the best way to do things. We don't have to just copy the existing
> virt machine.
> 
We need a solution to poweroff/reboot, and sifive test is one of the
hardware implementations, so in general I think it's okay. But I agree
Sifive test looks a device for testing only.

> There must be a more standard way to do this then MMIO mapped SiFive hardware?
> 
The mapped MMIO mechanism leveraged by Sifive test by itself is kinda
generic, the sbsa_ec for sbsa-ref is also an MMIO mapped device. These
two devices look very similar except different encodings of the
shutdown/reboot command.

Probably we can have a generic shutdown/reboot device in QEMU for both
sifive test and sbsa_ec, and likely more (not in this patch series). In
this way, sifive test device will be replaced by this more generic
device. Any suggestions?

>>>
>>> Probably I can remove the "sifive,test1 sifive,test0" from the
>>> compatible list in fdt, and only keep "syscon", I see opensbi has
>>> already removed that support in commit c2e602707.
>>>
 Same with the goldfish RTC?

>>> Although the spec doesn't make RTC mandatory, it should be a common
>>> practice having a RTC on server, so I add a RTC here no matter it's
>>> goldfish or not.
>>>
>>
>> The platform spec says
>> HPER_070 : A battery-backed RTC or analogous timekeeping mechanism
>> MUST be implemented.
>>
Thank Atish for pointing this out.

>> Can we consider goldfish RTC in this category ?
> 
> I think so, although I haven't read the spec yet :)
> 
Yes, I think goldfish RTC falls in this category.

> My point was more that if we are going to implement a new machine, we
> should aim to standardise on things that other
> machines/servers/platforms/architectures do. Some of the things in the
> virt machine are historical because that's what worked at the time.
> But with a clean slate design there might be better alternatives.
> Obviously while still sticking to the spec
> 
>>
>> But I want to discuss a larger point as the server platform/SoC spec
>> defines a

Re: [PATCH 02/14] hw/core/machine-smp: Deprecate unsupported "parameter=1" SMP configurations

2024-03-06 Thread Thomas Huth


On 06/03/2024 10.53, Zhao Liu wrote:

From: Zhao Liu 

Currentlt, it was allowed for users to specify the unsupported


s/Currentlt/Currently/


topology parameter as "1". For example, x86 PC machine doesn't
support drawer/book/cluster topology levels, but user could specify
"-smp drawers=1,books=1,clusters=1".

This is meaningless and confusing, so that the support for this kind of
configurations is marked depresated since 9.0. And report warning


s/depresated/deprecated/


message for such case like:

qemu-system-x86_64: warning: Deprecated CPU topology (considered invalid):
 Unsupported clusters parameter mustn't be specified as 1
qemu-system-x86_64: warning: Deprecated CPU topology (considered invalid):
 Unsupported books parameter mustn't be specified as 1
qemu-system-x86_64: warning: Deprecated CPU topology (considered invalid):
 Unsupported drawers parameter mustn't be specified as 1

Users have to ensure that all the topology members described with -smp
are supported by the target machine.

Cc: de...@lists.libvirt.org
Signed-off-by: Zhao Liu 
---
  docs/about/deprecated.rst | 14 +
  hw/core/machine-smp.c | 63 +--
  2 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 872974640252..2e782e83e952 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -47,6 +47,20 @@ as short-form boolean values, and passed to plugins as 
``arg_name=on``.
  However, short-form booleans are deprecated and full explicit ``arg_name=on``
  form is preferred.
  
+``-smp`` (Unsopported "parameter=1" SMP configurations) (since 9.0)


s/Unsopported/Unsupported/


+'''
+
+Specified CPU topology parameters must be supported by the machine.
+
+In the SMP configuration, users should provide the CPU topology parameters that
+are supported by the target machine.
+
+However, historically it was allowed for users to specify the unsupported
+topology parameter as "1", which is meaningless. So support for this kind of
+configurations (e.g. -smp drawers=1,books=1,clusters=1 for x86 PC machine) is
+marked depresated since 9.0, users have to ensure that all the topology members


s/depresated/deprecated/


+described with -smp are supported by the target machine.
+
  QEMU Machine Protocol (QMP) commands
  
  
diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c

index 96533886b14e..50a5a40dbc3d 100644
--- a/hw/core/machine-smp.c
+++ b/hw/core/machine-smp.c
@@ -112,30 +112,61 @@ void machine_parse_smp_config(MachineState *ms,
  
  /*

   * If not supported by the machine, a topology parameter must be
- * omitted or specified equal to 1.
+ * omitted.
   */
-if (!mc->smp_props.dies_supported && dies > 1) {
-error_setg(errp, "dies not supported by this machine's CPU topology");
-return;
-}
-if (!mc->smp_props.clusters_supported && clusters > 1) {
-error_setg(errp, "clusters not supported by this machine's CPU 
topology");
-return;
+if (!mc->smp_props.clusters_supported && config->has_clusters) {
+if (config->clusters > 1) {
+error_setg(errp, "clusters not supported by this "
+   "machine's CPU topology");
+return;
+} else {
+/* Here clusters only equals 1 since we've checked zero case. */
+warn_report("Deprecated CPU topology (considered invalid): "
+"Unsupported clusters parameter mustn't be "
+"specified as 1");
+}
  }
+clusters = clusters > 0 ? clusters : 1;
  
+if (!mc->smp_props.dies_supported && config->has_dies) {

+if (config->dies > 1) {
+error_setg(errp, "dies not supported by this "
+   "machine's CPU topology");
+return;
+} else {
+/* Here dies only equals 1 since we've checked zero case. */
+warn_report("Deprecated CPU topology (considered invalid): "
+"Unsupported dies parameter mustn't be "
+"specified as 1");
+}
+}
  dies = dies > 0 ? dies : 1;
-clusters = clusters > 0 ? clusters : 1;
  
-if (!mc->smp_props.books_supported && books > 1) {

-error_setg(errp, "books not supported by this machine's CPU topology");
-return;
+if (!mc->smp_props.books_supported && config->has_books) {
+if (config->books > 1) {
+error_setg(errp, "books not supported by this "
+   "machine's CPU topology");
+return;
+} else {
+/* Here books only equals 1 since we've checked zero case. */
+warn_report("Deprecated CPU topology (considered invalid): "
+

Re: [PATCH 14/14] tests/unit/test-smp-parse: Test "parameter=0" SMP configurations

2024-03-06 Thread Thomas Huth


On 06/03/2024 10.54, Zhao Liu wrote:

From: Zhao Liu 

The support for "parameter=0" SMP configurations is removed, and QEMU
returns error for those cases.

So add the related test cases to ensure parameters can't accept 0.

Signed-off-by: Zhao Liu 
---
  tests/unit/test-smp-parse.c | 92 +
  1 file changed, 92 insertions(+)


Reviewed-by: Thomas Huth

Re: [PATCH 11/14] tests/unit/test-smp-parse: Test "drawers" and "books" combination case

2024-03-06 Thread Thomas Huth


On 06/03/2024 10.54, Zhao Liu wrote:

From: Zhao Liu 

Since s390 machine supports both "drawers" and "books" in -smp, add the
"drawers" and "books" combination test case to match the actual topology
usage scenario.

Signed-off-by: Zhao Liu 
Tested-by: Xiaoling Song 
---
  tests/unit/test-smp-parse.c | 103 
  1 file changed, 103 insertions(+)


Reviewed-by: Thomas Huth 



diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index aea1b2e73a55..0cf611519865 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -445,6 +445,33 @@ static const struct SMPTestData 
data_with_drawers_invalid[] = {
  },
  };
  
+static const struct SMPTestData data_with_drawers_books_invalid[] = {

+{
+/*
+ * config: -smp 200,drawers=2,books=2,sockets=2,cores=4,\
+ * threads=2,maxcpus=200
+ */
+.config = SMP_CONFIG_WITH_BOOKS_DRAWERS(T, 200, T, 3, T, 5, T,
+2, T, 4, T, 2, T, 200),
+.expect_error = "Invalid CPU topology: "
+"product of the hierarchy must match maxcpus: "
+"drawers (3) * books (5) * sockets (2) * "
+"cores (4) * threads (2) != maxcpus (200)",
+}, {
+/*
+ * config: -smp 242,drawers=2,books=2,sockets=2,cores=4,\
+ * threads=2,maxcpus=240
+ */
+.config = SMP_CONFIG_WITH_BOOKS_DRAWERS(T, 242, T, 3, T, 5, T,
+2, T, 4, T, 2, T, 240),
+.expect_error = "Invalid CPU topology: "
+"maxcpus must be equal to or greater than smp: "
+"drawers (3) * books (5) * sockets (2) * "
+"cores (4) * threads (2) "
+"== maxcpus (240) < smp_cpus (242)",
+},
+};
+
  static char *smp_config_to_string(const SMPConfiguration *config)
  {
  return g_strdup_printf(
@@ -698,6 +725,14 @@ static void machine_with_drawers_class_init(ObjectClass 
*oc, void *data)
  mc->smp_props.drawers_supported = true;
  }
  
+static void machine_with_drawers_books_class_init(ObjectClass *oc, void *data)

+{
+MachineClass *mc = MACHINE_CLASS(oc);
+
+mc->smp_props.drawers_supported = true;
+mc->smp_props.books_supported = true;
+}
+
  static void test_generic_valid(const void *opaque)
  {
  const char *machine_type = opaque;
@@ -936,6 +971,67 @@ static void test_with_drawers(const void *opaque)
  object_unref(obj);
  }
  
+static void test_with_drawers_books(const void *opaque)

+{
+const char *machine_type = opaque;
+Object *obj = object_new(machine_type);
+MachineState *ms = MACHINE(obj);
+MachineClass *mc = MACHINE_GET_CLASS(obj);
+SMPTestData data = {};
+unsigned int num_drawers = 5, num_books = 3;
+int i;
+
+for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
+data = data_generic_valid[i];
+unsupported_params_init(mc, );
+
+/*
+ * when drawers and books parameters are omitted, they will
+ * be both set as 1.
+ */
+data.expect_prefer_sockets.drawers = 1;
+data.expect_prefer_sockets.books = 1;
+data.expect_prefer_cores.drawers = 1;
+data.expect_prefer_cores.books = 1;
+
+smp_parse_test(ms, , true);
+
+/* when drawers and books parameters are both specified */
+data.config.has_drawers = true;
+data.config.drawers = num_drawers;
+data.config.has_books = true;
+data.config.books = num_books;
+
+if (data.config.has_cpus) {
+data.config.cpus *= num_drawers * num_books;
+}
+if (data.config.has_maxcpus) {
+data.config.maxcpus *= num_drawers * num_books;
+}
+
+data.expect_prefer_sockets.drawers = num_drawers;
+data.expect_prefer_sockets.books = num_books;
+data.expect_prefer_sockets.cpus *= num_drawers * num_books;
+data.expect_prefer_sockets.max_cpus *= num_drawers * num_books;
+
+data.expect_prefer_cores.drawers = num_drawers;
+data.expect_prefer_cores.books = num_books;
+data.expect_prefer_cores.cpus *= num_drawers * num_books;
+data.expect_prefer_cores.max_cpus *= num_drawers * num_books;
+
+smp_parse_test(ms, , true);
+}
+
+for (i = 0; i < ARRAY_SIZE(data_with_drawers_books_invalid); i++) {
+data = data_with_drawers_books_invalid[i];
+unsupported_params_init(mc, );
+
+smp_parse_test(ms, , false);
+}
+
+object_unref(obj);
+}
+
  /* Type info of the tested machine */
  static const TypeInfo smp_machine_types[] = {
  {
@@ -968,6 +1064,10 @@ static const TypeInfo smp_machine_types[] = {
  .name   = MACHINE_TYPE_NAME("smp-with-drawers"),
  .parent = TYPE_MACHINE,
  .class_init = machine_with_drawers_class_init,
+}, {
+

Re: [PATCH 07/14] tests/unit/test-smp-parse: Bump max_cpus to 4096

2024-03-06 Thread Thomas Huth


On 06/03/2024 10.54, Zhao Liu wrote:

From: Zhao Liu 

The q35 machine is trying to support up to 4096 vCPUs [1], so it's
necessary to bump max_cpus in test-smp-parse to 4096 to cover the
topological needs of future machines.

[1]: 
https://lore.kernel.org/qemu-devel/20240228143351.3967-1-anisi...@redhat.com/

Signed-off-by: Zhao Liu 
Tested-by: Xiaoling Song 
---
  tests/unit/test-smp-parse.c | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 84e342277452..2eb9533bc505 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -20,8 +20,8 @@
  #define T true
  #define F false
  
-#define MIN_CPUS 1   /* set the min CPUs supported by the machine as 1 */

-#define MAX_CPUS 512 /* set the max CPUs supported by the machine as 512 */
+#define MIN_CPUS 1/* set the min CPUs supported by the machine as 1 */
+#define MAX_CPUS 4096 /* set the max CPUs supported by the machine as 4096 */
  
  #define SMP_MACHINE_NAME "TEST-SMP"
  
@@ -333,13 +333,13 @@ static const struct SMPTestData data_generic_invalid[] = {

  "by machine '" SMP_MACHINE_NAME "' is 2",
  }, {
  /*
- * config: -smp 512
+ * config: -smp 4096
   * The test machine should tweak the supported max CPUs to
- * 511 (MAX_CPUS - 1) for testing.
+ * 4095 (MAX_CPUS - 1) for testing.
   */
-.config = SMP_CONFIG_GENERIC(T, MAX_CPUS, F, 0, F, 0, F, 0, F, 0),
-.expect_error = "Invalid SMP CPUs 512. The max CPUs supported "
-"by machine '" SMP_MACHINE_NAME "' is 511",
+.config = SMP_CONFIG_GENERIC(T, 4096, F, 0, F, 0, F, 0, F, 0),
+.expect_error = "Invalid SMP CPUs 4096. The max CPUs supported "


You could maybe use stringify(MAX_CPUS) in above line
(but it won't work for the 4095 below, so it's maybe not worth the effort)


+"by machine '" SMP_MACHINE_NAME "' is 4095",
  },
  };


Reviewed-by: Thomas Huth

Re: [PATCH v2 07/20] smbios: avoid mangling user provided tables

2024-03-06 Thread Ani Sinha




> On 06-Mar-2024, at 12:11, Ani Sinha  wrote:
> 
> 
> 
> On Tue, 5 Mar 2024, Igor Mammedov wrote:
> 
>> currently smbios_entry_add() preserves internally '-smbios type='
>> options but tables provided with '-smbios file=' are stored directly
>> into blob that eventually will be exposed to VM. And then later
>> QEMU adds default/'-smbios type' entries on top into the same blob.
>> 
>> It makes impossible to generate tables more than once, hence
>> 'immutable' guard was used.
>> Make it possible to regenerate final blob by storing user provided
>> blobs into a dedicated area (usr_blobs) and then copy it when
>> composing final blob. Which also makes handling of -smbios
>> options consistent.
>> 
>> As side effect of this and previous commits there is no need to
>> generate legacy smbios_entries at the time options are parsed.
>> Instead compose smbios_entries on demand from  usr_blobs like
>> it is done for non-legacy SMBIOS tables.
>> 
>> Signed-off-by: Igor Mammedov 
>> Tested-by: Fiona Ebner 
> 
> Reviewed-by: Ani Sinha 
> 
>> ---
>> hw/smbios/smbios.c | 179 +++--
>> 1 file changed, 92 insertions(+), 87 deletions(-)
>> 
>> diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
>> index c46fc93357..aa2cc5bdbd 100644
>> --- a/hw/smbios/smbios.c
>> +++ b/hw/smbios/smbios.c
>> @@ -57,6 +57,14 @@ static size_t smbios_entries_len;
>> static bool smbios_uuid_encoded = true;
>> /* end: legacy structures & constants for <= 2.0 machines */
>> 
>> +/*
>> + * SMBIOS tables provided by user with '-smbios file=' option
>> + */
>> +uint8_t *usr_blobs;
>> +size_t usr_blobs_len;
>> +static GArray *usr_blobs_sizes;
>> +static unsigned usr_table_max;
>> +static unsigned usr_table_cnt;
>> 
>> uint8_t *smbios_tables;
>> size_t smbios_tables_len;
>> @@ -67,7 +75,6 @@ static SmbiosEntryPointType smbios_ep_type = 
>> SMBIOS_ENTRY_POINT_TYPE_32;
>> static SmbiosEntryPoint ep;
>> 
>> static int smbios_type4_count = 0;
>> -static bool smbios_immutable;
>> static bool smbios_have_defaults;
>> static uint32_t smbios_cpuid_version, smbios_cpuid_features;
>> 
>> @@ -569,9 +576,8 @@ static void smbios_build_type_1_fields(void)
>> 
>> uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length)
>> {
>> -/* drop unwanted version of command-line file blob(s) */
>> -g_free(smbios_tables);
>> -smbios_tables = NULL;
>> +int i;
>> +size_t usr_offset;
>> 
>> /* also complain if fields were given for types > 1 */
>> if (find_next_bit(have_fields_bitmap,
>> @@ -581,12 +587,33 @@ uint8_t *smbios_get_table_legacy(uint32_t 
>> expected_t4_count, size_t *length)
>> exit(1);
>> }
>> 
>> -if (!smbios_immutable) {
>> -smbios_build_type_0_fields();
>> -smbios_build_type_1_fields();
>> -smbios_validate_table(expected_t4_count);
>> -smbios_immutable = true;
>> +g_free(smbios_entries);
>> +smbios_entries_len = sizeof(uint16_t);
>> +smbios_entries = g_malloc0(smbios_entries_len);
>> +
>> +for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len;
>> + i++)
>> +{
>> +struct smbios_table *table;
>> +struct smbios_structure_header *header;
>> +size_t size = g_array_index(usr_blobs_sizes, size_t, i);
>> +
>> +header = (struct smbios_structure_header *)(usr_blobs + usr_offset);
>> +smbios_entries = g_realloc(smbios_entries, smbios_entries_len +
>> +   size + sizeof(*table));
>> +table = (struct smbios_table *)(smbios_entries + 
>> smbios_entries_len);
>> +table->header.type = SMBIOS_TABLE_ENTRY;
>> +table->header.length = cpu_to_le16(sizeof(*table) + size);
>> +memcpy(table->data, header, size);
>> +smbios_entries_len += sizeof(*table) + size;
>> +(*(uint16_t *)smbios_entries) =
>> +cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1);
> 
> I know this comes from existing code but can you please explain why we add
> 1 to it? This is confusing and a comment here would be nice.
> 
>> +usr_offset += size;
> 
> It would be better if we could add a comment here describing a bit what
> this is all about.
> 
> user blobs are an array of smbios_structure_header entries whereas legacy
> tables are an array of smbios_table structures where
> smbios_table->data represents the a single user provided table blob in
> smbios_structure_header.

Igor, are you going to send a v3 for this with the comments added?

> 
>> }
>> +
>> +smbios_build_type_0_fields();
>> +smbios_build_type_1_fields();
>> +smbios_validate_table(expected_t4_count);
>> *length = smbios_entries_len;
>> return smbios_entries;
>> }
>> @@ -1094,67 +1121,67 @@ void smbios_get_tables(MachineState *ms,
>> {
>> unsigned i, dimm_cnt, offset;
>> 
>> -/* drop unwanted (legacy) version of command-line file blob(s) */
>> -g_free(smbios_entries);
>> -

Re: [PATCH v2 1/2] hw/arm/virt-acpi-build.c: Migrate SPCR creation to common location

2024-03-06 Thread Sunil V L

On Thu, Mar 07, 2024 at 11:33:25AM +1000, Alistair Francis wrote:
> On Thu, Mar 7, 2024 at 4:59 AM Daniel Henrique Barboza
>  wrote:
> >
> > Hi,
> >
> > This patch break check-qtest, most specifically 'bios-table'test', for 
> > aarch64.
> > I found this while running riscv-to-apply.next in the Gitlab pipeline.
> >
> >
> > Here's the output:
> >
> > $ make -j && QTEST_QEMU_BINARY=./qemu-system-aarch64 V=1 
> > ./tests/qtest/bios-tables-test
> > TAP version 13
> > # random seed: R02Sf0f2fa0a3fac5d540b1681c820621b7d
> > # starting QEMU: exec ./qemu-system-aarch64 -qtest 
> > unix:/tmp/qtest-591353.sock -qtest-log /dev/null -chardev 
> > socket,path=/tmp/qtest-591353.qmp,id=char0 -mon chardev=char0,mode=control 
> > -display none -audio none -machine none -accel qtest
> > 1..8
> > # Start of aarch64 tests
> > # Start of acpi tests
> > # starting QEMU: exec ./qemu-system-aarch64 -qtest 
> > unix:/tmp/qtest-591353.sock -qtest-log /dev/null -chardev 
> > socket,path=/tmp/qtest-591353.qmp,id=char0 -mon chardev=char0,mode=control 
> > -display none -audio none -machine virt  -accel tcg -nodefaults -nographic 
> > -drive if=pflash,format=raw,file=pc-bios/edk2-aarch64-code.fd,readonly=on 
> > -drive if=pflash,format=raw,file=pc-bios/edk2-arm-vars.fd,snapshot=on 
> > -cdrom tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2 -cpu 
> > cortex-a57 -smbios type=4,max-speed=2900,current-speed=2700 -accel qtest
> > acpi-test: Warning! SPCR binary file mismatch. Actual 
> > [aml:/tmp/aml-9G53J2], Expected [aml:tests/data/acpi/virt/SPCR].
> > See source file tests/qtest/bios-tables-test.c for instructions on how to 
> > update expected files.
> > acpi-test: Warning! SPCR mismatch. Actual [asl:/tmp/asl-SR53J2.dsl, 
> > aml:/tmp/aml-9G53J2], Expected [asl:/tmp/asl-4Z33J2.dsl, 
> > aml:tests/data/acpi/virt/SPCR].
> >
> > The diff is here:
> >
> > --- /tmp/asl-4Z33J2.dsl 2024-03-06 15:40:24.879879348 -0300
> > +++ /tmp/asl-SR53J2.dsl 2024-03-06 15:40:24.877879347 -0300
> > @@ -1,57 +1,49 @@
> >   /*
> >* Intel ACPI Component Architecture
> >* AML/ASL+ Disassembler version 20220331 (64-bit version)
> >* Copyright (c) 2000 - 2022 Intel Corporation
> >
> > (...)
> >
> >   [000h    4]Signature : "SPCR"[Serial Port 
> > Console Redirection Table]
> > -[004h 0004   4] Table Length : 0050
> > +[004h 0004   4] Table Length : 004F
> >   [008h 0008   1] Revision : 02
> > -[009h 0009   1] Checksum : B1
> > +[009h 0009   1] Checksum : B2
> >   [00Ah 0010   6]   Oem ID : "BOCHS "
> >
> > (...)
> >
> > -[042h 0066   2]PCI Vendor ID : 
> > +[042h 0066   2]PCI Vendor ID : 00FF
> >
> >
> > After inspecting the common helper and what the original ARM code was doing
> > I found out that we're missing something down there:
> >
> >
> > On 1/15/24 22:09, Sia Jee Heng wrote:
> > > RISC-V should also generate the SPCR in a manner similar to ARM.
> > > Therefore, instead of replicating the code, relocate this function
> > > to the common AML build.
> > >
> > > Signed-off-by: Sia Jee Heng 
> > > ---
> > >   hw/acpi/aml-build.c | 51 
> > >   hw/arm/virt-acpi-build.c| 68 +++--
> > >   include/hw/acpi/acpi-defs.h | 33 ++
> > >   include/hw/acpi/aml-build.h |  4 +++
> > >   4 files changed, 115 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> > > index af66bde0f5..f3904650e4 100644
> > > --- a/hw/acpi/aml-build.c
> > > +++ b/hw/acpi/aml-build.c
> > > @@ -1994,6 +1994,57 @@ static void build_processor_hierarchy_node(GArray 
> > > *tbl, uint32_t flags,
> > >   }
> > >   }
> > >
> > > +void build_spcr(GArray *table_data, BIOSLinker *linker,
> > > +const AcpiSpcrData *f, const uint8_t rev,
> > > +const char *oem_id, const char *oem_table_id)
> > > +{
> > > +AcpiTable table = { .sig = "SPCR", .rev = rev, .oem_id = oem_id,
> > > +.oem_table_id = oem_table_id };
> > > +
> > > +acpi_table_begin(, table_data);
> > > +/* Interface type */
> > > +build_append_int_noprefix(table_data, f->interface_type, 1);
> > > +/* Reserved */
> > > +build_append_int_noprefix(table_data, 0, 3);
> > > +/* Base Address */
> > > +build_append_gas(table_data, f->base_addr.id, f->base_addr.width,
> > > + f->base_addr.offset, f->base_addr.size,
> > > + f->base_addr.addr);
> > > +/* Interrupt type */
> > > +build_append_int_noprefix(table_data, f->interrupt_type, 1);
> > > +/* IRQ */
> > > +build_append_int_noprefix(table_data, f->pc_interrupt, 1);
> > > +/* Global System Interrupt */
> > > +build_append_int_noprefix(table_data, f->interrupt, 4);
> > > +/* Baud Rate */
> > > +

Re: Problem with migration/rdma

2024-03-06 Thread Peter Xu

On Thu, Mar 07, 2024 at 02:41:37AM +, Zhijian Li (Fujitsu) via wrote:
> Yu,
> 
> 
> On 07/03/2024 00:30, Philippe Mathieu-Daudé wrote:
> > Cc'ing RDMA migration reviewers/maintainers:
> > 
> > $ ./scripts/get_maintainer.pl -f migration/rdma.c
> > Li Zhijian  (reviewer:RDMA Migration)
> > Peter Xu  (maintainer:Migration)
> > Fabiano Rosas  (maintainer:Migration)
> > 
> > On 5/3/24 22:32, Yu Zhang wrote:
> >> Hello Het and all,
> >>
> >> while I was testing qemu-8.2, I saw a lot of our migration test cases 
> >> failed.
> >> After debugging the commits of the 8.2 branch, I saw the issue and mad a 
> >> diff:
> >>
> >> diff --git a/migration/rdma.c b/migration/rdma.c
> >> index 6a29e53daf..f10d56f556 100644
> >> --- a/migration/rdma.c
> >> +++ b/migration/rdma.c
> >> @@ -3353,9 +3353,9 @@ static int qemu_rdma_accept(RDMAContext *rdma)
> >>   goto err_rdma_dest_wait;
> >>   }
> >>
> >> -    isock->host = rdma->host;
> >> +    isock->host = g_strdup_printf("%s", rdma->host);
> >>   isock->port = g_strdup_printf("%d", rdma->port);
> 
> 
> Thanks for your analysis.
> 
> It will be great if you send this as a patch.
> 
> 
> isock is defined as a _autoptr VVV
>  _autoptr(InetSocketAddress) isock = g_new0(InetSocketAddress, 1);
> 
> I'm surprised that it seems the auto free scheme will free the member of 
> isock as well
> see below valrind log. That will cause a double free.

Right, all the QAPI-free is a deep one.  Thanks for checking this up,
Zhijian.

Yu, would you please send a formal patch (better before this week ends) so
that I can include it for the last pull for 9.0 soft-freeze (March 12th)?
As 8.2 affected, please also attach proper tags:

Cc: qemu-stable 
Fixes: 3fa9642ff7 ("migration: convert rdma backend to accept MigrateAddress")

> 
> ==809138== Invalid free() / delete / delete[] / realloc()
> ==809138==at 0x483A9F5: free (vg_replace_malloc.c:538)
> ==809138==by 0x598F70C: g_free (in /usr/lib64/libglib-2.0.so.0.6600.8)
> ==809138==by 0x79B6AD: qemu_rdma_cleanup (rdma.c:2432)
> ==809138==by 0x79CEE6: qio_channel_rdma_close_rcu (rdma.c:3108)
> ==809138==by 0xC2E339: call_rcu_thread (rcu.c:301)
> ==809138==by 0xC2116A: qemu_thread_start (qemu-thread-posix.c:541)
> ==809138==by 0x72683F8: ??? (in /usr/lib64/libpthread-2.32.so)
> ==809138==by 0x73824C2: clone (in /usr/lib64/libc-2.32.so)
> ==809138==  Address 0x13daa070 is 0 bytes inside a block of size 14 free'd
> ==809138==at 0x483A9F5: free (vg_replace_malloc.c:538)
> ==809138==by 0x598F70C: g_free (in /usr/lib64/libglib-2.0.so.0.6600.8)
> ==809138==by 0xC058CF: qapi_dealloc_type_str (qapi-dealloc-visitor.c:68)
> ==809138==by 0xC09EF3: visit_type_str (qapi-visit-core.c:349)
> ==809138==by 0xBDDECC: visit_type_InetSocketAddressBase_members 
> (qapi-visit-sockets.c:29)
> ==809138==by 0xBDE055: visit_type_InetSocketAddress_members 
> (qapi-visit-sockets.c:67)
> ==809138==by 0xBDE30D: visit_type_InetSocketAddress 
> (qapi-visit-sockets.c:119)
> ==809138==by 0xBDDB38: qapi_free_InetSocketAddress 
> (qapi-types-sockets.c:51)
> ==809138==by 0x792351: glib_autoptr_clear_InetSocketAddress 
> (qapi-types-sockets.h:109)
> ==809138==by 0x79236F: glib_autoptr_cleanup_InetSocketAddress 
> (qapi-types-sockets.h:109)
> ==809138==by 0x79D956: qemu_rdma_accept (rdma.c:3341)
> ==809138==by 0x79F05A: rdma_accept_incoming_migration (rdma.c:4041)
> ==809138==  Block was alloc'd at
> ==809138==at 0x4839809: malloc (vg_replace_malloc.c:307)
> ==809138==by 0x5992BB8: g_malloc (in /usr/lib64/libglib-2.0.so.0.6600.8)
> ==809138==by 0x59A7FE3: g_strdup (in /usr/lib64/libglib-2.0.so.0.6600.8)
> ==809138==by 0x79C2A8: qemu_rdma_data_init (rdma.c:2731)
> ==809138==by 0x79F183: rdma_start_incoming_migration (rdma.c:4081)
> ==809138==by 0x76F200: qemu_start_incoming_migration (migration.c:581)
> ==809138==by 0x77193A: qmp_migrate_incoming (migration.c:1735)
> ==809138==by 0x74B3D3: qmp_x_exit_preconfig (vl.c:2718)
> ==809138==by 0x74DB6F: qemu_init (vl.c:3753)
> ==809138==by 0xA14F3F: main (main.c:47)

-- 
Peter Xu

[PATCH v2 3/9] Hexagon (target/hexagon) Mark dest_idx in trans functions

2024-03-06 Thread Taylor Simpson

Check that the value matches opcode_reginfo/opcode_wregs

Signed-off-by: Taylor Simpson 
---
 target/hexagon/insn.h   | 1 +
 target/hexagon/decode.c | 2 ++
 target/hexagon/mmvec/decode_ext_mmvec.c | 2 ++
 target/hexagon/gen_trans_funcs.py   | 6 ++
 4 files changed, 11 insertions(+)

diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index 36502bf056..a770379958 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -40,6 +40,7 @@ struct Instruction {
 uint32_t which_extended:1;/* If has an extender, which immediate */
 uint32_t new_value_producer_slot:4;
 int32_t new_read_idx;
+int32_t dest_idx;
 
 bool part1;  /*
   * cmp-jumps are split into two insns.
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index 4595e30384..a4d8500fea 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -184,6 +184,8 @@ decode_fill_newvalue_regno(Packet *packet)
 
 /* Now patch up the consumer with the register number */
 dst_idx = dststr - opcode_reginfo[def_opcode];
+g_assert(packet->insn[def_idx].dest_idx != -1 &&
+ packet->insn[def_idx].dest_idx == dst_idx);
 packet->insn[i].regno[use_regidx] =
 packet->insn[def_idx].regno[dst_idx];
 /*
diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c 
b/target/hexagon/mmvec/decode_ext_mmvec.c
index e9007f5d71..c1320406df 100644
--- a/target/hexagon/mmvec/decode_ext_mmvec.c
+++ b/target/hexagon/mmvec/decode_ext_mmvec.c
@@ -86,6 +86,8 @@ check_new_value(Packet *pkt)
 /* still not there, we have a bad packet */
 g_assert_not_reached();
 }
+g_assert(pkt->insn[def_idx].dest_idx != -1 &&
+ pkt->insn[def_idx].dest_idx == dststr - reginfo);
 int def_regnum = pkt->insn[def_idx].regno[dststr - reginfo];
 /* Now patch up the consumer with the register number */
 pkt->insn[i].regno[use_regidx] = def_regnum ^ def_oreg;
diff --git a/target/hexagon/gen_trans_funcs.py 
b/target/hexagon/gen_trans_funcs.py
index 8acecdb993..1201172dda 100755
--- a/target/hexagon/gen_trans_funcs.py
+++ b/target/hexagon/gen_trans_funcs.py
@@ -69,6 +69,7 @@ def mark_which_imm_extended(f, tag):
 ## insn->regno[1] = args->Rs;
 ## insn->regno[2] = args->Rt;
 ## insn->new_read_idx = -1;
+## insn->dest_idx = 0;
 ## return true;
 ## }
 ##
@@ -86,6 +87,7 @@ def gen_trans_funcs(f):
 """))
 
 new_read_idx = -1
+dest_idx = -1
 for regno, (reg_type, reg_id, *_) in enumerate(regs):
 reg = hex_common.get_register(tag, reg_type, reg_id)
 f.write(code_fmt(f"""\
@@ -93,6 +95,9 @@ def gen_trans_funcs(f):
 """))
 if reg.is_read() and reg.is_new():
 new_read_idx = regno
+# dest_idx should be the first destination, so check for -1
+if reg.is_written() and dest_idx == -1:
+dest_idx = regno
 
 if len(imms) != 0:
 mark_which_imm_extended(f, tag)
@@ -115,6 +120,7 @@ def gen_trans_funcs(f):
 
 f.write(code_fmt(f"""\
 insn->new_read_idx = {new_read_idx};
+insn->dest_idx = {dest_idx};
 """))
 f.write(textwrap.dedent(f"""\
 return true;
-- 
2.34.1

[PATCH v2 8/9] Hexagon (target/hexagon) Remove gen_shortcode.py

2024-03-06 Thread Taylor Simpson

This data structure is not used

Signed-off-by: Taylor Simpson 
---
 target/hexagon/opcodes.c|  7 
 target/hexagon/README   |  1 -
 target/hexagon/gen_shortcode.py | 63 -
 target/hexagon/meson.build  | 10 --
 4 files changed, 81 deletions(-)
 delete mode 100755 target/hexagon/gen_shortcode.py

diff --git a/target/hexagon/opcodes.c b/target/hexagon/opcodes.c
index 02ae9cf787..c8bde2f9e9 100644
--- a/target/hexagon/opcodes.c
+++ b/target/hexagon/opcodes.c
@@ -37,13 +37,6 @@ const char * const opcode_names[] = {
 };
 
 
-const char * const opcode_short_semantics[] = {
-#define DEF_SHORTCODE(TAG, SHORTCODE)  [TAG] = #SHORTCODE,
-#include "shortcode_generated.h.inc"
-#undef DEF_SHORTCODE
-NULL
-};
-
 DECLARE_BITMAP(opcode_attribs[XX_LAST_OPCODE], A_ZZ_LASTATTRIB);
 
 static void init_attribs(int tag, ...)
diff --git a/target/hexagon/README b/target/hexagon/README
index 065c05154d..65b4fcc0fa 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -46,7 +46,6 @@ header files in /target/hexagon
 gen_printinsn.py-> printinsn_generated.h.inc
 gen_op_attribs.py   -> op_attribs_generated.h.inc
 gen_helper_protos.py-> helper_protos_generated.h.inc
-gen_shortcode.py-> shortcode_generated.h.inc
 gen_tcg_funcs.py-> tcg_funcs_generated.c.inc
 gen_tcg_func_table.py   -> tcg_func_table_generated.c.inc
 gen_helper_funcs.py -> helper_funcs_generated.c.inc
diff --git a/target/hexagon/gen_shortcode.py b/target/hexagon/gen_shortcode.py
deleted file mode 100755
index deb94446c4..00
--- a/target/hexagon/gen_shortcode.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-
-##
-##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
-##
-##  This program is free software; you can redistribute it and/or modify
-##  it under the terms of the GNU General Public License as published by
-##  the Free Software Foundation; either version 2 of the License, or
-##  (at your option) any later version.
-##
-##  This program is distributed in the hope that it will be useful,
-##  but WITHOUT ANY WARRANTY; without even the implied warranty of
-##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-##  GNU General Public License for more details.
-##
-##  You should have received a copy of the GNU General Public License
-##  along with this program; if not, see .
-##
-
-import sys
-import re
-import string
-import hex_common
-
-
-def gen_shortcode(f, tag):
-f.write(f"DEF_SHORTCODE({tag}, {hex_common.semdict[tag]})\n")
-
-
-def main():
-hex_common.read_semantics_file(sys.argv[1])
-hex_common.read_attribs_file(sys.argv[2])
-hex_common.calculate_attribs()
-tagregs = hex_common.get_tagregs()
-tagimms = hex_common.get_tagimms()
-
-with open(sys.argv[3], "w") as f:
-f.write("#ifndef DEF_SHORTCODE\n")
-f.write("#define DEF_SHORTCODE(TAG,SHORTCODE)/* Nothing */\n")
-f.write("#endif\n")
-
-for tag in hex_common.tags:
-## Skip the priv instructions
-if "A_PRIV" in hex_common.attribdict[tag]:
-continue
-## Skip the guest instructions
-if "A_GUEST" in hex_common.attribdict[tag]:
-continue
-## Skip the diag instructions
-if tag == "Y6_diag":
-continue
-if tag == "Y6_diag0":
-continue
-if tag == "Y6_diag1":
-continue
-
-gen_shortcode(f, tag)
-
-f.write("#undef DEF_SHORTCODE\n")
-
-
-if __name__ == "__main__":
-main()
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index b3a0944d3b..988e7489ba 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -42,21 +42,11 @@ hexagon_ss.add(semantics_generated)
 #
 # Step 2
 # We use Python scripts to generate the following files
-# shortcode_generated.h.inc
 # tcg_func_table_generated.c.inc
 # printinsn_generated.h.inc
 # op_attribs_generated.h.inc
 # opcodes_def_generated.h.inc
 #
-shortcode_generated = custom_target(
-'shortcode_generated.h.inc',
-output: 'shortcode_generated.h.inc',
-depends: [semantics_generated],
-depend_files: [hex_common_py, attribs_def],
-command: [python, files('gen_shortcode.py'), semantics_generated, 
attribs_def, '@OUTPUT@'],
-)
-hexagon_ss.add(shortcode_generated)
-
 tcg_func_table_generated = custom_target(
 'tcg_func_table_generated.c.inc',
 output: 'tcg_func_table_generated.c.inc',
-- 
2.34.1

[PATCH v2 2/9] Hexagon (target/hexagon) Mark new_read_idx in trans functions

2024-03-06 Thread Taylor Simpson

Check that the value matches opcode_reginfo

Signed-off-by: Taylor Simpson 
---
 target/hexagon/insn.h   |  3 ++-
 target/hexagon/decode.c |  2 ++
 target/hexagon/mmvec/decode_ext_mmvec.c |  2 ++
 target/hexagon/gen_trans_funcs.py   | 15 ++-
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index 3e7a22c91e..36502bf056 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+ *  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -39,6 +39,7 @@ struct Instruction {
 uint32_t slot:3;
 uint32_t which_extended:1;/* If has an extender, which immediate */
 uint32_t new_value_producer_slot:4;
+int32_t new_read_idx;
 
 bool part1;  /*
   * cmp-jumps are split into two insns.
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index a40210ca1e..4595e30384 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -131,6 +131,8 @@ decode_fill_newvalue_regno(Packet *packet)
 use_regidx = strchr(opcode_reginfo[use_opcode], 's') -
 opcode_reginfo[use_opcode];
 }
+g_assert(packet->insn[i].new_read_idx != -1 &&
+ packet->insn[i].new_read_idx == use_regidx);
 
 /*
  * What's encoded at the N-field is the offset to who's producing
diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c 
b/target/hexagon/mmvec/decode_ext_mmvec.c
index 202d84c7c0..e9007f5d71 100644
--- a/target/hexagon/mmvec/decode_ext_mmvec.c
+++ b/target/hexagon/mmvec/decode_ext_mmvec.c
@@ -41,6 +41,8 @@ check_new_value(Packet *pkt)
 GET_ATTRIB(use_opcode, A_STORE)) {
 int use_regidx = strchr(opcode_reginfo[use_opcode], 's') -
 opcode_reginfo[use_opcode];
+g_assert(pkt->insn[i].new_read_idx != -1 &&
+ pkt->insn[i].new_read_idx == use_regidx);
 /*
  * What's encoded at the N-field is the offset to who's producing
  * the value.
diff --git a/target/hexagon/gen_trans_funcs.py 
b/target/hexagon/gen_trans_funcs.py
index 53e844a44b..8acecdb993 100755
--- a/target/hexagon/gen_trans_funcs.py
+++ b/target/hexagon/gen_trans_funcs.py
@@ -68,6 +68,7 @@ def mark_which_imm_extended(f, tag):
 ## insn->regno[0] = args->Rd;
 ## insn->regno[1] = args->Rs;
 ## insn->regno[2] = args->Rt;
+## insn->new_read_idx = -1;
 ## return true;
 ## }
 ##
@@ -84,14 +85,14 @@ def gen_trans_funcs(f):
 insn->opcode = {tag};
 """))
 
-regno = 0
-for reg in regs:
-reg_type = reg[0]
-reg_id = reg[1]
+new_read_idx = -1
+for regno, (reg_type, reg_id, *_) in enumerate(regs):
+reg = hex_common.get_register(tag, reg_type, reg_id)
 f.write(code_fmt(f"""\
 insn->regno[{regno}] = args->{reg_type}{reg_id};
 """))
-regno += 1
+if reg.is_read() and reg.is_new():
+new_read_idx = regno
 
 if len(imms) != 0:
 mark_which_imm_extended(f, tag)
@@ -112,6 +113,9 @@ def gen_trans_funcs(f):
 insn->immed[{immno}] = args->{imm_type}{imm_letter};
 """))
 
+f.write(code_fmt(f"""\
+insn->new_read_idx = {new_read_idx};
+"""))
 f.write(textwrap.dedent(f"""\
 return true;
 {close_curly}
@@ -120,5 +124,6 @@ def gen_trans_funcs(f):
 
 if __name__ == "__main__":
 hex_common.read_semantics_file(sys.argv[1])
+hex_common.init_registers()
 with open(sys.argv[2], "w") as f:
 gen_trans_funcs(f)
-- 
2.34.1

[PATCH v2 9/9] Hexagon (target/hexagon) Remove hex_common.read_attribs_file

2024-03-06 Thread Taylor Simpson

The attribinfo data structure is not used
Adjust the command-line arguments to the python scripts
Add hex_common.read_common_files for TCG/helper generation scripts

Signed-off-by: Taylor Simpson 
---
 target/hexagon/gen_analyze_funcs.py | 21 ++-
 target/hexagon/gen_helper_funcs.py  | 21 ++-
 target/hexagon/gen_helper_protos.py | 21 ++-
 target/hexagon/gen_idef_parser_funcs.py |  5 ++--
 target/hexagon/gen_op_attribs.py|  5 ++--
 target/hexagon/gen_opcodes_def.py   |  4 +--
 target/hexagon/gen_printinsn.py |  5 ++--
 target/hexagon/gen_tcg_func_table.py|  5 ++--
 target/hexagon/gen_tcg_funcs.py | 21 ++-
 target/hexagon/hex_common.py| 35 +++--
 target/hexagon/meson.build  | 31 +++---
 11 files changed, 54 insertions(+), 120 deletions(-)

diff --git a/target/hexagon/gen_analyze_funcs.py 
b/target/hexagon/gen_analyze_funcs.py
index a9af666cef..b73b4e2349 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+##  Copyright(c) 2022-2024 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -67,24 +67,7 @@ def gen_analyze_func(f, tag, regs, imms):
 
 
 def main():
-hex_common.read_semantics_file(sys.argv[1])
-hex_common.read_attribs_file(sys.argv[2])
-hex_common.read_overrides_file(sys.argv[3])
-hex_common.read_overrides_file(sys.argv[4])
-## Whether or not idef-parser is enabled is
-## determined by the number of arguments to
-## this script:
-##
-##   5 args. -> not enabled,
-##   6 args. -> idef-parser enabled.
-##
-## The 6:th arg. then holds a list of the successfully
-## parsed instructions.
-is_idef_parser_enabled = len(sys.argv) > 6
-if is_idef_parser_enabled:
-hex_common.read_idef_parser_enabled_file(sys.argv[5])
-hex_common.calculate_attribs()
-hex_common.init_registers()
+hex_common.read_common_files()
 tagregs = hex_common.get_tagregs()
 tagimms = hex_common.get_tagimms()
 
diff --git a/target/hexagon/gen_helper_funcs.py 
b/target/hexagon/gen_helper_funcs.py
index 9cc3d69c49..e9685bff2f 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+##  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -102,24 +102,7 @@ def gen_helper_function(f, tag, tagregs, tagimms):
 
 
 def main():
-hex_common.read_semantics_file(sys.argv[1])
-hex_common.read_attribs_file(sys.argv[2])
-hex_common.read_overrides_file(sys.argv[3])
-hex_common.read_overrides_file(sys.argv[4])
-## Whether or not idef-parser is enabled is
-## determined by the number of arguments to
-## this script:
-##
-##   5 args. -> not enabled,
-##   6 args. -> idef-parser enabled.
-##
-## The 6:th arg. then holds a list of the successfully
-## parsed instructions.
-is_idef_parser_enabled = len(sys.argv) > 6
-if is_idef_parser_enabled:
-hex_common.read_idef_parser_enabled_file(sys.argv[5])
-hex_common.calculate_attribs()
-hex_common.init_registers()
+hex_common.read_common_files()
 tagregs = hex_common.get_tagregs()
 tagimms = hex_common.get_tagimms()
 
diff --git a/target/hexagon/gen_helper_protos.py 
b/target/hexagon/gen_helper_protos.py
index c82b0f54e4..4cc72a1581 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+##  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -44,24 +44,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
 
 
 def main():
-hex_common.read_semantics_file(sys.argv[1])
-hex_common.read_attribs_file(sys.argv[2])
-hex_common.read_overrides_file(sys.argv[3])
-hex_common.read_overrides_file(sys.argv[4])
-## Whether or not idef-parser is enabled is
-## determined by the number of arguments to
-## this script:
-##
-##   5 args. -> not enabled,
-##   6 args. -> idef-parser enabled.
-##
-## The 6:th arg. then holds a list of the successfully
-## parsed

[PATCH v2 1/9] Hexagon (target/hexagon) Add is_old/is_new to Register class

2024-03-06 Thread Taylor Simpson

Signed-off-by: Taylor Simpson 
Reviewed-by: Philippe Mathieu-Daudé 
---
 target/hexagon/hex_common.py | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 195620c7ec..4bacef223f 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+##  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -397,10 +397,18 @@ def is_readwrite(self):
 class OldSource(Source):
 def reg_tcg(self):
 return f"{self.regtype}{self.regid}V"
+def is_old(self):
+return True
+def is_new(self):
+return False
 
 class NewSource(Source):
 def reg_tcg(self):
 return f"{self.regtype}{self.regid}N"
+def is_old(self):
+return False
+def is_new(self):
+return True
 
 class ReadWrite:
 def reg_tcg(self):
@@ -413,6 +421,10 @@ def is_read(self):
 return True
 def is_readwrite(self):
 return True
+def is_old(self):
+return True
+def is_new(self):
+return False
 
 class GprDest(Register, Single, Dest):
 def decl_tcg(self, f, tag, regno):
-- 
2.34.1

[PATCH v2 4/9] Hexagon (target/hexagon) Mark has_pred_dest in trans functions

2024-03-06 Thread Taylor Simpson

Check that the value matches opcode_wregs

Signed-off-by: Taylor Simpson 
---
 target/hexagon/insn.h | 1 +
 target/hexagon/decode.c   | 3 +++
 target/hexagon/gen_trans_funcs.py | 5 +
 3 files changed, 9 insertions(+)

diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index a770379958..24dcf7fe9f 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -41,6 +41,7 @@ struct Instruction {
 uint32_t new_value_producer_slot:4;
 int32_t new_read_idx;
 int32_t dest_idx;
+bool has_pred_dest;
 
 bool part1;  /*
   * cmp-jumps are split into two insns.
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index a4d8500fea..84a3899556 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -366,6 +366,9 @@ static void decode_shuffle_for_execution(Packet *packet)
 for (flag = false, i = 0; i < last_insn + 1; i++) {
 int opcode = packet->insn[i].opcode;
 
+g_assert(packet->insn[i].has_pred_dest ==
+ (strstr(opcode_wregs[opcode], "Pd4") ||
+  strstr(opcode_wregs[opcode], "Pe4")));
 if ((strstr(opcode_wregs[opcode], "Pd4") ||
  strstr(opcode_wregs[opcode], "Pe4")) &&
 GET_ATTRIB(opcode, A_STORE) == 0) {
diff --git a/target/hexagon/gen_trans_funcs.py 
b/target/hexagon/gen_trans_funcs.py
index 1201172dda..9f86b4edbd 100755
--- a/target/hexagon/gen_trans_funcs.py
+++ b/target/hexagon/gen_trans_funcs.py
@@ -70,6 +70,7 @@ def mark_which_imm_extended(f, tag):
 ## insn->regno[2] = args->Rt;
 ## insn->new_read_idx = -1;
 ## insn->dest_idx = 0;
+## insn->has_pred_dest = false;
 ## return true;
 ## }
 ##
@@ -88,6 +89,7 @@ def gen_trans_funcs(f):
 
 new_read_idx = -1
 dest_idx = -1
+has_pred_dest = "false"
 for regno, (reg_type, reg_id, *_) in enumerate(regs):
 reg = hex_common.get_register(tag, reg_type, reg_id)
 f.write(code_fmt(f"""\
@@ -98,6 +100,8 @@ def gen_trans_funcs(f):
 # dest_idx should be the first destination, so check for -1
 if reg.is_written() and dest_idx == -1:
 dest_idx = regno
+if reg_type == "P" and reg.is_written() and not reg.is_read():
+has_pred_dest = "true"
 
 if len(imms) != 0:
 mark_which_imm_extended(f, tag)
@@ -121,6 +125,7 @@ def gen_trans_funcs(f):
 f.write(code_fmt(f"""\
 insn->new_read_idx = {new_read_idx};
 insn->dest_idx = {dest_idx};
+insn->has_pred_dest = {has_pred_dest};
 """))
 f.write(textwrap.dedent(f"""\
 return true;
-- 
2.34.1

[PATCH v2 5/9] Hexagon (tests/tcg/hexagon) Test HVX .new read from high half of pair

2024-03-06 Thread Taylor Simpson

Make sure the decoding of HVX .new is correctly handling this case

Signed-off-by: Taylor Simpson 
---
 tests/tcg/hexagon/hvx_misc.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index b45170acd1..1fe14b5158 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2021-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
+ *  Copyright(c) 2021-2024 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -231,6 +231,7 @@ static void test_masked_store(bool invert)
 static void test_new_value_store(void)
 {
 void *p0 = buffer0;
+void *p1 = buffer1;
 void *pout = output;
 
 asm("{\n\t"
@@ -242,6 +243,19 @@ static void test_new_value_store(void)
 expect[0] = buffer0[0];
 
 check_output_w(__LINE__, 1);
+
+/* Test the .new read from the high half of a pair */
+asm("v7 = vmem(%0 + #0)\n\t"
+"v12 = vmem(%1 + #0)\n\t"
+"{\n\t"
+"v5:4 = vcombine(v12, v7)\n\t"
+"vmem(%2 + #0) = v5.new\n\t"
+"}\n\t"
+: : "r"(p0), "r"(p1), "r"(pout) : "v4", "v5", "v7", "v12", "memory");
+
+expect[0] = buffer1[0];
+
+check_output_w(__LINE__, 1);
 }
 
 static void test_max_temps()
-- 
2.34.1

[PATCH v2 0/9] Clean up .new decode and scripts

2024-03-06 Thread Taylor Simpson

During .new decode, there are several places where strchr is used.
We remove these by generating the values that are needed.

Once we have generated the proper values, we no longer need
op_regs_generated.h.inc.  We remove the script that generates it as
well as the code in meson.build

We also remove the script and meson.build code that creates
shortcode_generated.h.inc.  The data structure that includes it is
not used.

We remove hex_common.read_attribs_file.  The Python data structures built
during this step are not used.

 Changes in v2 
Address feedback from Matheus Tavares Bernardino 
Mark Philippe's Reviewed-by on patch 01
Update example comment in gen_trans_funcs.py



Taylor Simpson (9):
  Hexagon (target/hexagon) Add is_old/is_new to Register class
  Hexagon (target/hexagon) Mark new_read_idx in trans functions
  Hexagon (target/hexagon) Mark dest_idx in trans functions
  Hexagon (target/hexagon) Mark has_pred_dest in trans functions
  Hexagon (tests/tcg/hexagon) Test HVX .new read from high half of pair
  Hexagon (target/hexagon) Remove uses of op_regs_generated.h.inc
  Hexagon (target/hexagon) Remove gen_op_regs.py
  Hexagon (target/hexagon) Remove gen_shortcode.py
  Hexagon (target/hexagon) Remove hex_common.read_attribs_file

 target/hexagon/insn.h   |   5 +-
 target/hexagon/opcodes.h|   4 -
 target/hexagon/decode.c |  50 ++
 target/hexagon/mmvec/decode_ext_mmvec.c |  30 ++
 target/hexagon/opcodes.c|  35 ---
 tests/tcg/hexagon/hvx_misc.c|  16 ++-
 target/hexagon/README   |   2 -
 target/hexagon/gen_analyze_funcs.py |  21 +---
 target/hexagon/gen_helper_funcs.py  |  21 +---
 target/hexagon/gen_helper_protos.py |  21 +---
 target/hexagon/gen_idef_parser_funcs.py |   5 +-
 target/hexagon/gen_op_attribs.py|   5 +-
 target/hexagon/gen_op_regs.py   | 125 
 target/hexagon/gen_opcodes_def.py   |   4 +-
 target/hexagon/gen_printinsn.py |   5 +-
 target/hexagon/gen_shortcode.py |  63 
 target/hexagon/gen_tcg_func_table.py|   5 +-
 target/hexagon/gen_tcg_funcs.py |  21 +---
 target/hexagon/gen_trans_funcs.py   |  26 -
 target/hexagon/hex_common.py|  49 +++---
 target/hexagon/meson.build  |  55 ---
 21 files changed, 122 insertions(+), 446 deletions(-)
 delete mode 100755 target/hexagon/gen_op_regs.py
 delete mode 100755 target/hexagon/gen_shortcode.py

-- 
2.34.1

[PATCH v2 6/9] Hexagon (target/hexagon) Remove uses of op_regs_generated.h.inc

2024-03-06 Thread Taylor Simpson

Signed-off-by: Taylor Simpson 
---
 target/hexagon/opcodes.h|  4 --
 target/hexagon/decode.c | 57 +++--
 target/hexagon/mmvec/decode_ext_mmvec.c | 34 +++
 target/hexagon/opcodes.c| 28 
 4 files changed, 13 insertions(+), 110 deletions(-)

diff --git a/target/hexagon/opcodes.h b/target/hexagon/opcodes.h
index fa7e321950..0ee11bd445 100644
--- a/target/hexagon/opcodes.h
+++ b/target/hexagon/opcodes.h
@@ -40,10 +40,6 @@ typedef enum {
 
 extern const char * const opcode_names[];
 
-extern const char * const opcode_reginfo[];
-extern const char * const opcode_rregs[];
-extern const char * const opcode_wregs[];
-
 typedef struct {
 const char * const encoding;
 const EncClass enc_class;
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index 84a3899556..23deba2426 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -115,24 +115,13 @@ static void
 decode_fill_newvalue_regno(Packet *packet)
 {
 int i, use_regidx, offset, def_idx, dst_idx;
-uint16_t def_opcode, use_opcode;
-char *dststr;
 
 for (i = 1; i < packet->num_insns; i++) {
 if (GET_ATTRIB(packet->insn[i].opcode, A_DOTNEWVALUE) &&
 !GET_ATTRIB(packet->insn[i].opcode, A_EXTENSION)) {
-use_opcode = packet->insn[i].opcode;
-
-/* It's a store, so we're adjusting the Nt field */
-if (GET_ATTRIB(use_opcode, A_STORE)) {
-use_regidx = strchr(opcode_reginfo[use_opcode], 't') -
-opcode_reginfo[use_opcode];
-} else {/* It's a Jump, so we're adjusting the Ns field */
-use_regidx = strchr(opcode_reginfo[use_opcode], 's') -
-opcode_reginfo[use_opcode];
-}
-g_assert(packet->insn[i].new_read_idx != -1 &&
- packet->insn[i].new_read_idx == use_regidx);
+
+g_assert(packet->insn[i].new_read_idx != -1);
+use_regidx = packet->insn[i].new_read_idx;
 
 /*
  * What's encoded at the N-field is the offset to who's producing
@@ -153,39 +142,9 @@ decode_fill_newvalue_regno(Packet *packet)
  */
 g_assert(!((def_idx < 0) || (def_idx > (packet->num_insns - 1;
 
-/*
- * packet->insn[def_idx] is the producer
- * Figure out which type of destination it produces
- * and the corresponding index in the reginfo
- */
-def_opcode = packet->insn[def_idx].opcode;
-dststr = strstr(opcode_wregs[def_opcode], "Rd");
-if (dststr) {
-dststr = strchr(opcode_reginfo[def_opcode], 'd');
-} else {
-dststr = strstr(opcode_wregs[def_opcode], "Rx");
-if (dststr) {
-dststr = strchr(opcode_reginfo[def_opcode], 'x');
-} else {
-dststr = strstr(opcode_wregs[def_opcode], "Re");
-if (dststr) {
-dststr = strchr(opcode_reginfo[def_opcode], 'e');
-} else {
-dststr = strstr(opcode_wregs[def_opcode], "Ry");
-if (dststr) {
-dststr = strchr(opcode_reginfo[def_opcode], 'y');
-} else {
-g_assert_not_reached();
-}
-}
-}
-}
-g_assert(dststr != NULL);
-
 /* Now patch up the consumer with the register number */
-dst_idx = dststr - opcode_reginfo[def_opcode];
-g_assert(packet->insn[def_idx].dest_idx != -1 &&
- packet->insn[def_idx].dest_idx == dst_idx);
+g_assert(packet->insn[def_idx].dest_idx != -1);
+dst_idx = packet->insn[def_idx].dest_idx;
 packet->insn[i].regno[use_regidx] =
 packet->insn[def_idx].regno[dst_idx];
 /*
@@ -366,11 +325,7 @@ static void decode_shuffle_for_execution(Packet *packet)
 for (flag = false, i = 0; i < last_insn + 1; i++) {
 int opcode = packet->insn[i].opcode;
 
-g_assert(packet->insn[i].has_pred_dest ==
- (strstr(opcode_wregs[opcode], "Pd4") ||
-  strstr(opcode_wregs[opcode], "Pe4")));
-if ((strstr(opcode_wregs[opcode], "Pd4") ||
- strstr(opcode_wregs[opcode], "Pe4")) &&
+if (packet->insn[i].has_pred_dest &&
 GET_ATTRIB(opcode, A_STORE) == 0) {
 /* This should be a compare (not a store conditional) */
 if (flag) {
diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c 
b/target/hexagon/mmvec/decode_ext_mmvec.c
index c1320406df..f850d0154d 100644
--- a/target/hexagon/mmvec/decode_ext_mmvec.c
+++ b/target/hexagon/mmvec/decode_ext_mmvec.c
@@ -28,21

[PATCH v2 7/9] Hexagon (target/hexagon) Remove gen_op_regs.py

2024-03-06 Thread Taylor Simpson

Signed-off-by: Taylor Simpson 
---
 target/hexagon/README |   1 -
 target/hexagon/gen_op_regs.py | 125 --
 target/hexagon/meson.build|  14 +---
 3 files changed, 2 insertions(+), 138 deletions(-)
 delete mode 100755 target/hexagon/gen_op_regs.py

diff --git a/target/hexagon/README b/target/hexagon/README
index 746ebec378..065c05154d 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -43,7 +43,6 @@ target/hexagon/gen_semantics.c.  This step produces
 That file is consumed by the following python scripts to produce the indicated
 header files in /target/hexagon
 gen_opcodes_def.py  -> opcodes_def_generated.h.inc
-gen_op_regs.py  -> op_regs_generated.h.inc
 gen_printinsn.py-> printinsn_generated.h.inc
 gen_op_attribs.py   -> op_attribs_generated.h.inc
 gen_helper_protos.py-> helper_protos_generated.h.inc
diff --git a/target/hexagon/gen_op_regs.py b/target/hexagon/gen_op_regs.py
deleted file mode 100755
index 7b7b33895a..00
--- a/target/hexagon/gen_op_regs.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/usr/bin/env python3
-
-##
-##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights 
Reserved.
-##
-##  This program is free software; you can redistribute it and/or modify
-##  it under the terms of the GNU General Public License as published by
-##  the Free Software Foundation; either version 2 of the License, or
-##  (at your option) any later version.
-##
-##  This program is distributed in the hope that it will be useful,
-##  but WITHOUT ANY WARRANTY; without even the implied warranty of
-##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-##  GNU General Public License for more details.
-##
-##  You should have received a copy of the GNU General Public License
-##  along with this program; if not, see .
-##
-
-import sys
-import re
-import string
-import hex_common
-
-
-##
-## Generate the register and immediate operands for each instruction
-##
-def calculate_regid_reg(tag):
-def letter_inc(x):
-return chr(ord(x) + 1)
-
-ordered_implregs = ["SP", "FP", "LR"]
-srcdst_lett = "X"
-src_lett = "S"
-dst_lett = "D"
-retstr = ""
-mapdict = {}
-for reg in ordered_implregs:
-reg_rd = 0
-reg_wr = 0
-if ("A_IMPLICIT_WRITES_" + reg) in hex_common.attribdict[tag]:
-reg_wr = 1
-if reg_rd and reg_wr:
-retstr += srcdst_lett
-mapdict[srcdst_lett] = reg
-srcdst_lett = letter_inc(srcdst_lett)
-elif reg_rd:
-retstr += src_lett
-mapdict[src_lett] = reg
-src_lett = letter_inc(src_lett)
-elif reg_wr:
-retstr += dst_lett
-mapdict[dst_lett] = reg
-dst_lett = letter_inc(dst_lett)
-return retstr, mapdict
-
-
-def calculate_regid_letters(tag):
-retstr, mapdict = calculate_regid_reg(tag)
-return retstr
-
-
-def strip_reg_prefix(x):
-y = x.replace("UREG.", "")
-y = y.replace("MREG.", "")
-return y.replace("GREG.", "")
-
-
-def main():
-hex_common.read_semantics_file(sys.argv[1])
-hex_common.read_attribs_file(sys.argv[2])
-hex_common.init_registers()
-tagregs = hex_common.get_tagregs(full=True)
-tagimms = hex_common.get_tagimms()
-
-with open(sys.argv[3], "w") as f:
-for tag in hex_common.tags:
-regs = tagregs[tag]
-rregs = []
-wregs = []
-regids = ""
-for regtype, regid, _, numregs in regs:
-reg = hex_common.get_register(tag, regtype, regid)
-if reg.is_read():
-if regid[0] not in regids:
-regids += regid[0]
-rregs.append(regtype + regid + numregs)
-if reg.is_written():
-wregs.append(regtype + regid + numregs)
-if regid[0] not in regids:
-regids += regid[0]
-for attrib in hex_common.attribdict[tag]:
-if hex_common.attribinfo[attrib]["rreg"]:
-rregs.append(strip_reg_prefix(attribinfo[attrib]["rreg"]))
-if hex_common.attribinfo[attrib]["wreg"]:
-wregs.append(strip_reg_prefix(attribinfo[attrib]["wreg"]))
-regids += calculate_regid_letters(tag)
-f.write(
-f'REGINFO({tag},"{regids}",\t/*RD:*/\t"{",".join(rregs)}",'
-f'\t/*WR:*/\t"{",".join(wregs)}")\n'
-)
-
-for tag in hex_common.tags:
-imms = tagimms[tag]
-f.write(f"IMMINFO({tag}")
-if not imms:
-f.write(""",'u',0,0,'U',0,0""")
-for sign, size, shamt in imms:
-if sign == "r":
-sign = "s"
-if not

Re: [PATCH v8 2/2] hw/acpi: Implement the SRAT GI affinity structure

2024-03-06 Thread Ankit Agrawal

>>
>> [1] ACPI Spec 6.3, Section 5.2.16.6
>> [2] ACPI Spec 6.3, Table 5.80
>>
>> Cc: Jonathan Cameron 
>> Cc: Alex Williamson 
>> Cc: Cedric Le Goater 
>> Signed-off-by: Ankit Agrawal 
>
> I guess we gloss over the bisection breakage due to being able to add
> these nodes and have them used in HMAT as initiators before we have
> added SRAT support.  Linux will moan about it and not use such an HMAT
> but meh, it will boot.
>
> You could drag the HMAT change after this but perhaps it's not worth 
> bothering.

Sorry this part isn't clear to me. Are you suggesting we keep the HMAT
changes out from this patch?

> Otherwise LGTM
> Reviewed-by: Jonathan Cameron 

Thanks!

> Could add x86 support (posted in reply to v7 this morning)
> and sounds like you have the test nearly ready which is great.

Ok, will add the x86 part as well. I could reuse what you shared
earlier.

https://gitlab.com/jic23/qemu/-/commit/ccfb4fe22167e035173390cf147d9c226951b9b6

Re: [PATCH v8 1/2] qom: new object to associate device to NUMA node

2024-03-06 Thread Ankit Agrawal

>> -object acpi-generic-initiator,id=gi14,pci-dev=dev1,node=16 \
>> -object acpi-generic-initiator,id=gi15,pci-dev=dev1,node=17 \
>> 
>> The performance benefits can be realized by providing the NUMA node distances
>> appropriately (through libvirt tags or Qemu params). The admin can get the
>> distance among nodes in hardware using `numactl -H`.
>
> That's a lot of description when you could just have claimed you want a normal
> GI node for HMAT and we'd have all believed you ;)

Ack, I'll remove this part and change it to say as such.

>> 
>> Link: https://www.nvidia.com/en-in/technologies/multi-instance-gpu [1]
>> Cc: Jonathan Cameron 
>> Cc: Alex Williamson 
>> Cc: Markus Armbruster 
>> Acked-by: Markus Armbruster 
>> Signed-off-by: Ankit Agrawal 
>
> Hi Ankit,
>
> Some minor things inline. With the includes tidied up.
> Reviewed-by: Jonathan Cameron 

Thanks!

>> diff --git a/include/hw/acpi/acpi_generic_initiator.h 
>> b/include/hw/acpi/acpi_generic_initiator.h
>> new file mode 100644
>> index 00..23d0b591c6
>> --- /dev/null
>> +++ b/include/hw/acpi/acpi_generic_initiator.h
>> @@ -0,0 +1,32 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
>> + */
>> +
>> +#ifndef ACPI_GENERIC_INITIATOR_H
>> +#define ACPI_GENERIC_INITIATOR_H
>> +
>> +#include "hw/mem/pc-dimm.h"
>
> Why?
>
>> +#include "hw/acpi/bios-linker-loader.h"
>> +#include "hw/acpi/aml-build.h"
>> +#include "sysemu/numa.h"
>
> This should only include headers that it uses directly.
> If they are needed down in the c files, then include them there.

Ack, will fix this in the next version.

>> +typedef struct AcpiGenericInitiator {
>> +    /* private */
>> +    Object parent;
>> +
>> +    /* public */
>> +    char *pci_dev;
>> +    uint16_t node;
>> +} AcpiGenericInitiator;
>> +
>> +typedef struct AcpiGenericInitiatorClass {
>> +    ObjectClass parent_class;
>> +} AcpiGenericInitiatorClass;
>
> Trivial, but you could push the class definition down into the c file
> given it's not accessed from anywhere else.

Sure will move the AcpiGenericInitiatorClass typedef to the .c file.

> +
> +#endif

Re: Problem with migration/rdma

2024-03-06 Thread Zhijian Li (Fujitsu)

Yu,


On 07/03/2024 00:30, Philippe Mathieu-Daudé wrote:
> Cc'ing RDMA migration reviewers/maintainers:
> 
> $ ./scripts/get_maintainer.pl -f migration/rdma.c
> Li Zhijian  (reviewer:RDMA Migration)
> Peter Xu  (maintainer:Migration)
> Fabiano Rosas  (maintainer:Migration)
> 
> On 5/3/24 22:32, Yu Zhang wrote:
>> Hello Het and all,
>>
>> while I was testing qemu-8.2, I saw a lot of our migration test cases failed.
>> After debugging the commits of the 8.2 branch, I saw the issue and mad a 
>> diff:
>>
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index 6a29e53daf..f10d56f556 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -3353,9 +3353,9 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>>   goto err_rdma_dest_wait;
>>   }
>>
>> -    isock->host = rdma->host;
>> +    isock->host = g_strdup_printf("%s", rdma->host);
>>   isock->port = g_strdup_printf("%d", rdma->port);


Thanks for your analysis.

It will be great if you send this as a patch.


isock is defined as a _autoptr VVV
 _autoptr(InetSocketAddress) isock = g_new0(InetSocketAddress, 1);

I'm surprised that it seems the auto free scheme will free the member of isock 
as well
see below valrind log. That will cause a double free.

==809138== Invalid free() / delete / delete[] / realloc()
==809138==at 0x483A9F5: free (vg_replace_malloc.c:538)
==809138==by 0x598F70C: g_free (in /usr/lib64/libglib-2.0.so.0.6600.8)
==809138==by 0x79B6AD: qemu_rdma_cleanup (rdma.c:2432)
==809138==by 0x79CEE6: qio_channel_rdma_close_rcu (rdma.c:3108)
==809138==by 0xC2E339: call_rcu_thread (rcu.c:301)
==809138==by 0xC2116A: qemu_thread_start (qemu-thread-posix.c:541)
==809138==by 0x72683F8: ??? (in /usr/lib64/libpthread-2.32.so)
==809138==by 0x73824C2: clone (in /usr/lib64/libc-2.32.so)
==809138==  Address 0x13daa070 is 0 bytes inside a block of size 14 free'd
==809138==at 0x483A9F5: free (vg_replace_malloc.c:538)
==809138==by 0x598F70C: g_free (in /usr/lib64/libglib-2.0.so.0.6600.8)
==809138==by 0xC058CF: qapi_dealloc_type_str (qapi-dealloc-visitor.c:68)
==809138==by 0xC09EF3: visit_type_str (qapi-visit-core.c:349)
==809138==by 0xBDDECC: visit_type_InetSocketAddressBase_members 
(qapi-visit-sockets.c:29)
==809138==by 0xBDE055: visit_type_InetSocketAddress_members 
(qapi-visit-sockets.c:67)
==809138==by 0xBDE30D: visit_type_InetSocketAddress 
(qapi-visit-sockets.c:119)
==809138==by 0xBDDB38: qapi_free_InetSocketAddress (qapi-types-sockets.c:51)
==809138==by 0x792351: glib_autoptr_clear_InetSocketAddress 
(qapi-types-sockets.h:109)
==809138==by 0x79236F: glib_autoptr_cleanup_InetSocketAddress 
(qapi-types-sockets.h:109)
==809138==by 0x79D956: qemu_rdma_accept (rdma.c:3341)
==809138==by 0x79F05A: rdma_accept_incoming_migration (rdma.c:4041)
==809138==  Block was alloc'd at
==809138==at 0x4839809: malloc (vg_replace_malloc.c:307)
==809138==by 0x5992BB8: g_malloc (in /usr/lib64/libglib-2.0.so.0.6600.8)
==809138==by 0x59A7FE3: g_strdup (in /usr/lib64/libglib-2.0.so.0.6600.8)
==809138==by 0x79C2A8: qemu_rdma_data_init (rdma.c:2731)
==809138==by 0x79F183: rdma_start_incoming_migration (rdma.c:4081)
==809138==by 0x76F200: qemu_start_incoming_migration (migration.c:581)
==809138==by 0x77193A: qmp_migrate_incoming (migration.c:1735)
==809138==by 0x74B3D3: qmp_x_exit_preconfig (vl.c:2718)
==809138==by 0x74DB6F: qemu_init (vl.c:3753)
==809138==by 0xA14F3F: main (main.c:47)


Thanks
Zhijian


>>
>> which was introduced by the commit below:
>>
>> commit 3fa9642ff7d51f7fc3ba68e6ccd13a939d5bd609 (HEAD)
>> Author: Het Gala 
>> Date:   Mon Oct 23 15:20:45 2023 -0300
>>
>>  migration: convert rdma backend to accept MigrateAddress
>>
>>  RDMA based transport backend for 'migrate'/'migrate-incoming' QAPIs
>>  accept new wire protocol of MigrateAddress struct.
>>
>>  It is achived by parsing 'uri' string and storing migration parameters
>>  required for RDMA connection into well defined InetSocketAddress struct.
>>  ...
>>
>> A debug line
>>   isock->host = rdma->host;
>>   isock->port = g_strdup_printf("%d", rdma->port);
>> +fprintf(stdout, "QEMU: %s, host %s, port %s\n", __func__,
>> isock->host, isock->port);
>>
>> produced this error:
>> QEMU: qemu_rdma_accept, host ::, port 8089
>> corrupted size vs. prev_size in fastbins
>>
>> on the target host, which may indicate a crash related to the memory
>> allocation or a memory
>> corruption of the data. With the patch, it doesn't happen any more,
>> and the migration is fine.
>> Could you be kind to test this and confirm the issue?
>>
>> Furthermore, I'm confused by the two struct:
>>
>> struct InetSocketAddressBase {
>>  char *host;
>>  char *port;
>> };
>>
>> struct InetSocketAddress {
>>  /* Members inherited from InetSocketAddressBase: */
>>  char *host;
>>  char *port;
>>
>> To my understanding, they are used to consolidate the

Re: [PATCH] target/riscv: Fix privilege mode of G-stage translation for debugging

2024-03-06 Thread Alistair Francis

On Wed, Feb 28, 2024 at 10:14 PM Hiroaki Yamamoto  wrote:
>
> G-stage translation should be considered to be user-level access in 
> riscv_cpu_get_phys_page_debug(), as already done in riscv_cpu_tlb_fill().
>
> This fixes a bug that prevents gdb from reading memory while the VM is 
> running in VS-mode.
>
> Signed-off-by: Hiroaki Yamamoto 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/cpu_helper.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index d462d95ee1..6e13069da7 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -1212,7 +1212,7 @@ hwaddr riscv_cpu_get_phys_page_debug(CPUState *cs, 
> vaddr addr)
>
>  if (env->virt_enabled) {
>  if (get_physical_address(env, _addr, , phys_addr, NULL,
> - 0, mmu_idx, false, true, true)) {
> + 0, MMUIdx_U, false, true, true)) {
>  return -1;
>  }
>  }
> --
> 2.43.2
>
>

RE: [PATCH v4 0/4] RISC-V: Modularize common match conditions for trigger

2024-03-06 Thread 張哲嘉

Hi Alistair,

Please also take a look at this series, I guess it is ready to be applied, 
thanks!


BRs,
Alvin

> -Original Message-
> From: Alvin Che-Chia Chang(張哲嘉) 
> Sent: Tuesday, February 27, 2024 9:24 AM
> To: qemu-ri...@nongnu.org; qemu-devel@nongnu.org
> Cc: alistair.fran...@wdc.com; bin.m...@windriver.com;
> liwei1...@gmail.com; dbarb...@ventanamicro.com;
> zhiwei_...@linux.alibaba.com; Alvin Che-Chia Chang(張哲嘉)
> 
> Subject: [PATCH v4 0/4] RISC-V: Modularize common match conditions for
> trigger
>
> According to RISC-V Debug specification ratified version 0.13 [1] (also 
> applied
> to version 1.0 [2] but it has not been ratified yet), the enabled privilege 
> levels
> of the trigger is common match conditions for all the types of the trigger.
>
> This series modularize the code for checking the privilege levels of type 
> 2/3/6
> triggers by implementing functions trigger_common_match() and
> trigger_priv_match().
>
> Additional match conditions, such as CSR tcontrol and textra, can be further
> implemented into trigger_common_match() in the future.
>
> [1]: https://github.com/riscv/riscv-debug-spec/releases/tag/task_group_vote
> [2]: https://github.com/riscv/riscv-debug-spec/releases/tag/1.0.0-rc1-asciidoc
>
> Changes from v3:
> - Change this series to target Debug Spec. version 0.13
>
> Changes from v2:
> - Explicitly mention the targeting version of RISC-V Debug Spec.
>
> Changes from v1:
> - Fix typo
> - Add commit description for changing behavior of looping the triggers
>   when we check type 2 triggers.
>
> Alvin Chang (4):
>   target/riscv: Add functions for common matching conditions of trigger
>   target/riscv: Apply modularized matching conditions for breakpoint
>   target/riscv: Apply modularized matching conditions for watchpoint
>   target/riscv: Apply modularized matching conditions for icount trigger
>
>  target/riscv/debug.c | 124 +--
>  1 file changed, 83 insertions(+), 41 deletions(-)
>
> --
> 2.34.1

CONFIDENTIALITY NOTICE:

This e-mail (and its attachments) may contain confidential and legally 
privileged information or information protected from disclosure. If you are not 
the intended recipient, you are hereby notified that any disclosure, copying, 
distribution, or use of the information contained herein is strictly 
prohibited. In this case, please immediately notify the sender by return 
e-mail, delete the message (and any accompanying documents) and destroy all 
printed hard copies. Thank you for your cooperation.

Copyright ANDES TECHNOLOGY CORPORATION - All Rights Reserved.

Re: [PATCH] target/riscv: Fix privilege mode of G-stage translation for debugging

2024-03-06 Thread Alistair Francis

On Wed, Feb 28, 2024 at 10:14 PM Hiroaki Yamamoto  wrote:
>
> G-stage translation should be considered to be user-level access in 
> riscv_cpu_get_phys_page_debug(), as already done in riscv_cpu_tlb_fill().
>
> This fixes a bug that prevents gdb from reading memory while the VM is 
> running in VS-mode.

Thanks for the patch. In the future can you ensure your commit message
wraps at around 70 characters?

>
> Signed-off-by: Hiroaki Yamamoto 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index d462d95ee1..6e13069da7 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -1212,7 +1212,7 @@ hwaddr riscv_cpu_get_phys_page_debug(CPUState *cs, 
> vaddr addr)
>
>  if (env->virt_enabled) {
>  if (get_physical_address(env, _addr, , phys_addr, NULL,
> - 0, mmu_idx, false, true, true)) {
> + 0, MMUIdx_U, false, true, true)) {
>  return -1;
>  }
>  }
> --
> 2.43.2
>
>

Re: [PATCH 0/2] RISC-V APLIC fixes

2024-03-06 Thread Alistair Francis

On Wed, Mar 6, 2024 at 7:57 PM Anup Patel  wrote:
>
> Few fixes for RISC-V APLIC discovered during Linux AIA patch reviews.
>
> These patches can also be found in the apatel_aplic_fixes_v1 branch at:
> https://github.com/avpatel/qemu.git
>
> Anup Patel (2):
>   hw/intc/riscv_aplic: Fix setipnum_le write emulation for APLIC
> MSI-mode
>   hw/intc/riscv_aplic: Fix in_clrip[x] read emulation

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  hw/intc/riscv_aplic.c | 37 +++--
>  1 file changed, 31 insertions(+), 6 deletions(-)
>
> --
> 2.34.1
>
>

RE: [PATCH v2 1/2] hw/arm/virt-acpi-build.c: Migrate SPCR creation to common location

2024-03-06 Thread JeeHeng Sia



> -Original Message-
> From: Alistair Francis 
> Sent: Thursday, March 7, 2024 9:33 AM
> To: Daniel Henrique Barboza 
> Cc: JeeHeng Sia ; qemu-...@nongnu.org; 
> qemu-devel@nongnu.org; qemu-ri...@nongnu.org;
> m...@redhat.com; imamm...@redhat.com; anisi...@redhat.com; 
> peter.mayd...@linaro.org; shannon.zha...@gmail.com;
> suni...@ventanamicro.com; pal...@dabbelt.com; alistair.fran...@wdc.com; 
> bin.m...@windriver.com; liwei1...@gmail.com;
> zhiwei_...@linux.alibaba.com
> Subject: Re: [PATCH v2 1/2] hw/arm/virt-acpi-build.c: Migrate SPCR creation 
> to common location
> 
> On Thu, Mar 7, 2024 at 4:59 AM Daniel Henrique Barboza
>  wrote:
> >
> > Hi,
> >
> > This patch break check-qtest, most specifically 'bios-table'test', for 
> > aarch64.
> > I found this while running riscv-to-apply.next in the Gitlab pipeline.
> >
> >
> > Here's the output:
> >
> > $ make -j && QTEST_QEMU_BINARY=./qemu-system-aarch64 V=1 
> > ./tests/qtest/bios-tables-test
> > TAP version 13
> > # random seed: R02Sf0f2fa0a3fac5d540b1681c820621b7d
> > # starting QEMU: exec ./qemu-system-aarch64 -qtest 
> > unix:/tmp/qtest-591353.sock -qtest-log /dev/null -chardev
> socket,path=/tmp/qtest-591353.qmp,id=char0 -mon chardev=char0,mode=control 
> -display none -audio none -machine none -accel
> qtest
> > 1..8
> > # Start of aarch64 tests
> > # Start of acpi tests
> > # starting QEMU: exec ./qemu-system-aarch64 -qtest 
> > unix:/tmp/qtest-591353.sock -qtest-log /dev/null -chardev
> socket,path=/tmp/qtest-591353.qmp,id=char0 -mon chardev=char0,mode=control 
> -display none -audio none -machine virt  -accel tcg
> -nodefaults -nographic -drive 
> if=pflash,format=raw,file=pc-bios/edk2-aarch64-code.fd,readonly=on -drive
> if=pflash,format=raw,file=pc-bios/edk2-arm-vars.fd,snapshot=on -cdrom 
> tests/data/uefi-boot-images/bios-tables-
> test.aarch64.iso.qcow2 -cpu cortex-a57 -smbios 
> type=4,max-speed=2900,current-speed=2700 -accel qtest
> > acpi-test: Warning! SPCR binary file mismatch. Actual 
> > [aml:/tmp/aml-9G53J2], Expected [aml:tests/data/acpi/virt/SPCR].
> > See source file tests/qtest/bios-tables-test.c for instructions on how to 
> > update expected files.
> > acpi-test: Warning! SPCR mismatch. Actual [asl:/tmp/asl-SR53J2.dsl, 
> > aml:/tmp/aml-9G53J2], Expected [asl:/tmp/asl-4Z33J2.dsl,
> aml:tests/data/acpi/virt/SPCR].
> >
> > The diff is here:
> >
> > --- /tmp/asl-4Z33J2.dsl 2024-03-06 15:40:24.879879348 -0300
> > +++ /tmp/asl-SR53J2.dsl 2024-03-06 15:40:24.877879347 -0300
> > @@ -1,57 +1,49 @@
> >   /*
> >* Intel ACPI Component Architecture
> >* AML/ASL+ Disassembler version 20220331 (64-bit version)
> >* Copyright (c) 2000 - 2022 Intel Corporation
> >
> > (...)
> >
> >   [000h    4]Signature : "SPCR"[Serial Port 
> > Console Redirection Table]
> > -[004h 0004   4] Table Length : 0050
> > +[004h 0004   4] Table Length : 004F
> >   [008h 0008   1] Revision : 02
> > -[009h 0009   1] Checksum : B1
> > +[009h 0009   1] Checksum : B2
> >   [00Ah 0010   6]   Oem ID : "BOCHS "
> >
> > (...)
> >
> > -[042h 0066   2]PCI Vendor ID : 
> > +[042h 0066   2]PCI Vendor ID : 00FF
> >
> >
> > After inspecting the common helper and what the original ARM code was doing
> > I found out that we're missing something down there:
> >
> >
> > On 1/15/24 22:09, Sia Jee Heng wrote:
> > > RISC-V should also generate the SPCR in a manner similar to ARM.
> > > Therefore, instead of replicating the code, relocate this function
> > > to the common AML build.
> > >
> > > Signed-off-by: Sia Jee Heng 
> > > ---
> > >   hw/acpi/aml-build.c | 51 
> > >   hw/arm/virt-acpi-build.c| 68 +++--
> > >   include/hw/acpi/acpi-defs.h | 33 ++
> > >   include/hw/acpi/aml-build.h |  4 +++
> > >   4 files changed, 115 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> > > index af66bde0f5..f3904650e4 100644
> > > --- a/hw/acpi/aml-build.c
> > > +++ b/hw/acpi/aml-build.c
> > > @@ -1994,6 +1994,57 @@ static void build_processor_hierarchy_node(GArray 
> > > *tbl, uint32_t flags,
> > >   }
> > >   }
> > >
> > > +void build_spcr(GArray *table_data, BIOSLinker *linker,
> > > +const AcpiSpcrData *f, const uint8_t rev,
> > > +const char *oem_id, const char *oem_table_id)
> > > +{
> > > +AcpiTable table = { .sig = "SPCR", .rev = rev, .oem_id = oem_id,
> > > +.oem_table_id = oem_table_id };
> > > +
> > > +acpi_table_begin(, table_data);
> > > +/* Interface type */
> > > +build_append_int_noprefix(table_data, f->interface_type, 1);
> > > +/* Reserved */
> > > +build_append_int_noprefix(table_data, 0, 3);
> > > +/* Base Address */
> > > +

Re: [PULL v2 1/1] loongarch: Change the UEFI loading mode to loongarch

2024-03-06 Thread lixianglai


Hi huacai:


Hi, Xianglai,

How to pass the BIOS file to qemu after this patch? With the old
cmdline I get an RCU stall and freeze the kvm host.


The bios also has the corresponding code modification,

you can update the code from the UEFI community to recompile UEFI,

or you can directly obtain the compiled bios from the link below:

https://github.com/lixianglai/LoongarchVirtFirmware

Thanks,

Xianglai.



Huacai

On Sat, Mar 2, 2024 at 12:14 PM lixianglai  wrote:

Hi Philippe:

On 29/2/24 12:38, Song Gao wrote:

From: Xianglai Li 

The UEFI loading mode in loongarch is very different
from that in other architectures:loongarch's UEFI code
is in rom, while other architectures' UEFI code is in flash.

loongarch UEFI can be loaded as follows:
-machine virt,pflash=pflash0-format
-bios ./QEMU_EFI.fd

Other architectures load UEFI using the following methods:
-machine virt,pflash0=pflash0-format,pflash1=pflash1-format

loongarch's UEFI loading method makes qemu and libvirt incompatible
when using NVRAM, and the cost of loongarch's current loading method
far outweighs the benefits, so we decided to use the same UEFI loading
scheme as other architectures.


FYI I'm still trying to find a way to avoid that, planning to discuss
more with libvirt folks. Well, maybe it is a waste of my time and I
should just stop worrying / caring about this long standing issue.

Thank you so much for your attention to this issue

and your long-standing contributions to the community!!:-)

Best regards,

Xianglai.


Cc: Andrea Bolognani 
Cc: maob...@loongson.cn
Cc: Philippe Mathieu-Daudé 
Cc: Song Gao 
Cc: zhaotian...@loongson.cn
Signed-off-by: Xianglai Li 
Tested-by: Andrea Bolognani 
Reviewed-by: Song Gao 
Message-Id: 
<0bd892aa9b88e0f4cc904cb70efd0251fc1cde29.1708336919.git.lixiang...@loongson.cn>
Signed-off-by: Song Gao 
---
   hw/loongarch/acpi-build.c   |  29 +--
   hw/loongarch/virt.c | 101 ++--
   include/hw/loongarch/virt.h |  10 ++--
   3 files changed, 107 insertions(+), 33 deletions(-)

Re: [PATCH v2 1/2] hw/arm/virt-acpi-build.c: Migrate SPCR creation to common location

2024-03-06 Thread Alistair Francis

On Thu, Mar 7, 2024 at 4:59 AM Daniel Henrique Barboza
 wrote:
>
> Hi,
>
> This patch break check-qtest, most specifically 'bios-table'test', for 
> aarch64.
> I found this while running riscv-to-apply.next in the Gitlab pipeline.
>
>
> Here's the output:
>
> $ make -j && QTEST_QEMU_BINARY=./qemu-system-aarch64 V=1 
> ./tests/qtest/bios-tables-test
> TAP version 13
> # random seed: R02Sf0f2fa0a3fac5d540b1681c820621b7d
> # starting QEMU: exec ./qemu-system-aarch64 -qtest 
> unix:/tmp/qtest-591353.sock -qtest-log /dev/null -chardev 
> socket,path=/tmp/qtest-591353.qmp,id=char0 -mon chardev=char0,mode=control 
> -display none -audio none -machine none -accel qtest
> 1..8
> # Start of aarch64 tests
> # Start of acpi tests
> # starting QEMU: exec ./qemu-system-aarch64 -qtest 
> unix:/tmp/qtest-591353.sock -qtest-log /dev/null -chardev 
> socket,path=/tmp/qtest-591353.qmp,id=char0 -mon chardev=char0,mode=control 
> -display none -audio none -machine virt  -accel tcg -nodefaults -nographic 
> -drive if=pflash,format=raw,file=pc-bios/edk2-aarch64-code.fd,readonly=on 
> -drive if=pflash,format=raw,file=pc-bios/edk2-arm-vars.fd,snapshot=on -cdrom 
> tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2 -cpu 
> cortex-a57 -smbios type=4,max-speed=2900,current-speed=2700 -accel qtest
> acpi-test: Warning! SPCR binary file mismatch. Actual [aml:/tmp/aml-9G53J2], 
> Expected [aml:tests/data/acpi/virt/SPCR].
> See source file tests/qtest/bios-tables-test.c for instructions on how to 
> update expected files.
> acpi-test: Warning! SPCR mismatch. Actual [asl:/tmp/asl-SR53J2.dsl, 
> aml:/tmp/aml-9G53J2], Expected [asl:/tmp/asl-4Z33J2.dsl, 
> aml:tests/data/acpi/virt/SPCR].
>
> The diff is here:
>
> --- /tmp/asl-4Z33J2.dsl 2024-03-06 15:40:24.879879348 -0300
> +++ /tmp/asl-SR53J2.dsl 2024-03-06 15:40:24.877879347 -0300
> @@ -1,57 +1,49 @@
>   /*
>* Intel ACPI Component Architecture
>* AML/ASL+ Disassembler version 20220331 (64-bit version)
>* Copyright (c) 2000 - 2022 Intel Corporation
>
> (...)
>
>   [000h    4]Signature : "SPCR"[Serial Port 
> Console Redirection Table]
> -[004h 0004   4] Table Length : 0050
> +[004h 0004   4] Table Length : 004F
>   [008h 0008   1] Revision : 02
> -[009h 0009   1] Checksum : B1
> +[009h 0009   1] Checksum : B2
>   [00Ah 0010   6]   Oem ID : "BOCHS "
>
> (...)
>
> -[042h 0066   2]PCI Vendor ID : 
> +[042h 0066   2]PCI Vendor ID : 00FF
>
>
> After inspecting the common helper and what the original ARM code was doing
> I found out that we're missing something down there:
>
>
> On 1/15/24 22:09, Sia Jee Heng wrote:
> > RISC-V should also generate the SPCR in a manner similar to ARM.
> > Therefore, instead of replicating the code, relocate this function
> > to the common AML build.
> >
> > Signed-off-by: Sia Jee Heng 
> > ---
> >   hw/acpi/aml-build.c | 51 
> >   hw/arm/virt-acpi-build.c| 68 +++--
> >   include/hw/acpi/acpi-defs.h | 33 ++
> >   include/hw/acpi/aml-build.h |  4 +++
> >   4 files changed, 115 insertions(+), 41 deletions(-)
> >
> > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> > index af66bde0f5..f3904650e4 100644
> > --- a/hw/acpi/aml-build.c
> > +++ b/hw/acpi/aml-build.c
> > @@ -1994,6 +1994,57 @@ static void build_processor_hierarchy_node(GArray 
> > *tbl, uint32_t flags,
> >   }
> >   }
> >
> > +void build_spcr(GArray *table_data, BIOSLinker *linker,
> > +const AcpiSpcrData *f, const uint8_t rev,
> > +const char *oem_id, const char *oem_table_id)
> > +{
> > +AcpiTable table = { .sig = "SPCR", .rev = rev, .oem_id = oem_id,
> > +.oem_table_id = oem_table_id };
> > +
> > +acpi_table_begin(, table_data);
> > +/* Interface type */
> > +build_append_int_noprefix(table_data, f->interface_type, 1);
> > +/* Reserved */
> > +build_append_int_noprefix(table_data, 0, 3);
> > +/* Base Address */
> > +build_append_gas(table_data, f->base_addr.id, f->base_addr.width,
> > + f->base_addr.offset, f->base_addr.size,
> > + f->base_addr.addr);
> > +/* Interrupt type */
> > +build_append_int_noprefix(table_data, f->interrupt_type, 1);
> > +/* IRQ */
> > +build_append_int_noprefix(table_data, f->pc_interrupt, 1);
> > +/* Global System Interrupt */
> > +build_append_int_noprefix(table_data, f->interrupt, 4);
> > +/* Baud Rate */
> > +build_append_int_noprefix(table_data, f->baud_rate, 1);
> > +/* Parity */
> > +build_append_int_noprefix(table_data, f->parity, 1);
> > +/* Stop Bits */
> > +build_append_int_noprefix(table_data, f->stop_bits, 1);
> > +/* Flow Control */
> > +build_append_int_noprefix(table_data,

Re: [PATCH v3] target/riscv: Fix shift count overflow

2024-03-06 Thread Alistair Francis

On Mon, Feb 26, 2024 at 3:42 AM demin.han  wrote:
>
> The result of (8 - 3 - vlmul) is negative when vlmul >= 6,
> and results in wrong vill.
>
> Signed-off-by: demin.han 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
> Changes in v2:
> - Add vlen var
>
> Changes in v3:
> - Fix commit msg typo
>
>  target/riscv/vector_helper.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 84cec73eb2..fe56c007d5 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -44,6 +44,7 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, 
> target_ulong s1,
>  target_ulong reserved = s2 &
>  MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
>  xlen - 1 - 
> R_VTYPE_RESERVED_SHIFT);
> +uint16_t vlen = cpu->cfg.vlenb << 3;
>  int8_t lmul;
>
>  if (vlmul & 4) {
> @@ -53,10 +54,8 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, 
> target_ulong s1,
>   * VLEN * LMUL >= SEW
>   * VLEN >> (8 - lmul) >= sew
>   * (vlenb << 3) >> (8 - lmul) >= sew
> - * vlenb >> (8 - 3 - lmul) >= sew
>   */
> -if (vlmul == 4 ||
> -cpu->cfg.vlenb >> (8 - 3 - vlmul) < sew) {
> +if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
>  vill = true;
>  }
>  }
> --
> 2.43.2
>
>

Re: QEMU Compatibility for Cortex-A55 AArch32 Firmware

2024-03-06 Thread yb liu

Thanks a lot for you respond me soon!
The exact instruction is mrc, and the exception happended in early boot
process. I have no idea to how to debug this error because it happended in
code_gen_prologue.
I need to comform that whether QEMU A55 fully supports aarch32 mode. If it
need me to add addition instructions to support this function, I will
suspend this task for now.
I hope to get your respond as soon as possible.
Best regards!

Peter Maydell  于 2024年3月6日周三 23:24写道：

> On Wed, 6 Mar 2024 at 14:33, yb liu  wrote:
> >
> >
> > Dear QEMU developers
> >
> > I hope this email finds you well. We are currently facing an issue
> related to QEMU and the Cortex-A55 architecture. Specifically, we have
> compiled a firmware for Cortex-A55 and would like it to run smoothly on
> QEMU A55 in AArch32 mode.
> >
> > Despite our numerous attempts, we continue to encounter an "undefined
> instruction" error when running the firmware in QEMU. Our question is
> whether QEMU supports this particular use case—specifically, whether it can
> handle A55 running AArch32 firmware.
>
> We have Cortex-A55 emulation, but you may be running into one
> of a few problems:
>  * we might have a bug
>  * firmware sometimes does very low level stuff that no other guest
>code does, so it might run into something we didn't get round
>to implementing (eg it is trying to touch one of the implementation
>specific system registers: we tend to implement these as "does
>nothing" stubs, but it looks like we didn't bother for the A55,
>presumably because Linux didn't care)
>  * your guest code might be doing something that works on the
>real A55 hardware but which is architecturally UNPREDICTABLE:
>QEMU doesn't try to exactly match device-specific IMPDEF
>and UNPREDICTABLE things
>  * your guest code might be assuming the presence of some feature
>that your real A55 has but which QEMU doesn't implement
>(for instance we implement only the absolute minimum RAS
>support required by the architecture, not the full RAS
>that hardware implements)
>
> The thing you'd need to do is look at exactly what the UNDEF
> instruction is (and what the guest code that causes it is
> trying to do) to figure out which of these is the problem.
> Some of these might be easy to fix; some would be harder.
> If you're in a position to be able to modify the firmware
> image then that would also allow you to work around missing
> QEMU functionality if necessary.
>
> The more usual reason guest firmware not working in QEMU is
> not the CPU emulation itself but lack of a model of the
> device/SoC/etc hardware that the firmware assumes it's
> running on.
>
> -- PMM
>

[PATCH v5 2/3] backends: Initial support for SPDM socket support

2024-03-06 Thread Alistair Francis

From: Huai-Cheng Kuo 

SPDM enables authentication, attestation and key exchange to assist in
providing infrastructure security enablement. It's a standard published
by the DMTF [1].

SPDM supports multiple transports, including PCIe DOE and MCTP.
This patch adds support to QEMU to connect to an external SPDM
instance.

SPDM support can be added to any QEMU device by exposing a
TCP socket to a SPDM server. The server can then implement the SPDM
decoding/encoding support, generally using libspdm [2].

This is similar to how the current TPM implementation works and means
that the heavy lifting of setting up certificate chains, capabilities,
measurements and complex crypto can be done outside QEMU by a well
supported and tested library.

1: https://www.dmtf.org/standards/SPDM
2: https://github.com/DMTF/libspdm

Signed-off-by: Huai-Cheng Kuo 
Signed-off-by: Chris Browy 
Co-developed-by: Jonathan Cameron 
Signed-off-by: Jonathan Cameron 
[ Changes by WM
 - Bug fixes from testing
]
Signed-off-by: Wilfred Mallawa 
[ Changes by AF:
 - Convert to be more QEMU-ified
 - Move to backends as it isn't PCIe specific
]
Signed-off-by: Alistair Francis 
---
 MAINTAINERS  |   6 +
 include/sysemu/spdm-socket.h |  44 +++
 backends/spdm-socket.c   | 216 +++
 backends/Kconfig |   4 +
 backends/meson.build |   2 +
 5 files changed, 272 insertions(+)
 create mode 100644 include/sysemu/spdm-socket.h
 create mode 100644 backends/spdm-socket.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 4183f2f3ab..a07706c225 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3395,6 +3395,12 @@ F: tests/qtest/*tpm*
 F: docs/specs/tpm.rst
 T: git https://github.com/stefanberger/qemu-tpm.git tpm-next
 
+SPDM
+M: Alistair Francis 
+S: Maintained
+F: backends/spdm-socket.c
+F: include/sysemu/spdm-socket.h
+
 Checkpatch
 S: Odd Fixes
 F: scripts/checkpatch.pl
diff --git a/include/sysemu/spdm-socket.h b/include/sysemu/spdm-socket.h
new file mode 100644
index 00..24e6fccb83
--- /dev/null
+++ b/include/sysemu/spdm-socket.h
@@ -0,0 +1,44 @@
+/*
+ * QEMU SPDM socket support
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SPDM_REQUESTER_H
+#define SPDM_REQUESTER_H
+
+int spdm_socket_connect(uint16_t port, Error **errp);
+uint32_t spdm_socket_rsp(const int socket, uint32_t transport_type,
+ void *req, uint32_t req_len,
+ void *rsp, uint32_t rsp_len);
+void spdm_socket_close(const int socket, uint32_t transport_type);
+
+#define SPDM_SOCKET_COMMAND_NORMAL0x0001
+#define SPDM_SOCKET_COMMAND_OOB_ENCAP_KEY_UPDATE  0x8001
+#define SPDM_SOCKET_COMMAND_CONTINUE  0xFFFD
+#define SPDM_SOCKET_COMMAND_SHUTDOWN  0xFFFE
+#define SPDM_SOCKET_COMMAND_UNKOWN0x
+#define SPDM_SOCKET_COMMAND_TEST  0xDEAD
+
+#define SPDM_SOCKET_TRANSPORT_TYPE_MCTP   0x01
+#define SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE0x02
+
+#define SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE   0x1200
+
+#endif
diff --git a/backends/spdm-socket.c b/backends/spdm-socket.c
new file mode 100644
index 00..d0663d696c
--- /dev/null
+++ b/backends/spdm-socket.c
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * QEMU SPDM socket support
+ *
+ * This is based on:
+ * 
https://github.com/DMTF/spdm-emu/blob/07c0a838bcc1c6207c656ac75885c0603e344b6f/spdm_emu/spdm_emu_common/command.c
+ * but has been re-written to match QEMU style
+ *
+ * Copyright (c) 2021, DMTF. All rights reserved.
+ * Copyright (c) 2023. Western Digital Corporation or its affiliates.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/spdm-socket.h"
+#include "qapi/error.h"
+
+static bool read_bytes(const int socket, uint8_t *buffer,
+   size_t number_of_bytes)
+{
+ssize_t number_received = 0;
+ssize_t result;
+
+while (number_received <

[PATCH v5 3/3] hw/nvme: Add SPDM over DOE support

2024-03-06 Thread Alistair Francis

From: Wilfred Mallawa 

Setup Data Object Exchance (DOE) as an extended capability for the NVME
controller and connect SPDM to it (CMA) to it.

Signed-off-by: Wilfred Mallawa 
Signed-off-by: Alistair Francis 
Reviewed-by: Jonathan Cameron 
Acked-by: Klaus Jensen 
---
 docs/specs/index.rst|   1 +
 docs/specs/spdm.rst | 122 
 include/hw/pci/pci_device.h |   5 ++
 include/hw/pci/pcie_doe.h   |   3 +
 hw/nvme/ctrl.c  |  53 
 5 files changed, 184 insertions(+)
 create mode 100644 docs/specs/spdm.rst

diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 1484e3e760..e2d907959a 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -29,6 +29,7 @@ guest hardware that is specific to QEMU.
edu
ivshmem-spec
pvpanic
+   spdm
standard-vga
virt-ctlr
vmcoreinfo
diff --git a/docs/specs/spdm.rst b/docs/specs/spdm.rst
new file mode 100644
index 00..4d0942c1ad
--- /dev/null
+++ b/docs/specs/spdm.rst
@@ -0,0 +1,122 @@
+==
+QEMU Security Protocols and Data Models (SPDM) Support
+==
+
+SPDM enables authentication, attestation and key exchange to assist in
+providing infrastructure security enablement. It's a standard published
+by the `DMTF`_.
+
+QEMU supports connecting to a SPDM responder implementation. This allows an
+external application to emulate the SPDM responder logic for an SPDM device.
+
+Setting up a SPDM server
+
+
+When using QEMU with SPDM devices QEMU will connect to a server which
+implements the SPDM functionality.
+
+SPDM-Utils
+--
+
+You can use `SPDM Utils`_ to emulate a responder. This is the simplest method.
+
+SPDM-Utils is a Linux applications to manage, test and develop devices
+supporting DMTF Security Protocol and Data Model (SPDM). It is written in Rust
+and utilises libspdm.
+
+To use SPDM-Utils you will need to do the following steps. Details are included
+in the SPDM-Utils README.
+
+ 1. `Build libspdm`_
+ 2. `Build SPDM Utils using Cargo`_
+ 3. `Run it as a server`_
+
+spdm-emu
+
+
+You can use `spdm emu`_ to model the
+SPDM responder.
+
+.. code-block:: shell
+
+$ cd spdm-emu
+$ git submodule init; git submodule update --recursive
+$ mkdir build; cd build
+$ cmake -DARCH=x64 -DTOOLCHAIN=GCC -DTARGET=Debug -DCRYPTO=openssl ..
+$ make -j32
+$ make copy_sample_key # Build certificates, required for SPDM 
authentication.
+
+It is worth noting that the certificates should be in compliance with
+PCIe r6.1 sec 6.31.3. This means you will need to add the following to
+openssl.cnf
+
+.. code-block::
+
+subjectAltName = 
otherName:2.23.147;UTF8:Vendor=1b36:Device=0010:CC=010802:REV=02:SSVID=1af4:SSID=1100
+2.23.147 = ASN1:OID:2.23.147
+
+and then manually regenerate some certificates with:
+
+.. code-block:: shell
+
+$ openssl req -nodes -newkey ec:param.pem -keyout end_responder.key \
+-out end_responder.req -sha384 -batch \
+-subj "/CN=DMTF libspdm ECP384 responder cert"
+
+$ openssl x509 -req -in end_responder.req -out end_responder.cert \
+-CA inter.cert -CAkey inter.key -sha384 -days 3650 -set_serial 3 \
+-extensions v3_end -extfile ../openssl.cnf
+
+$ openssl asn1parse -in end_responder.cert -out end_responder.cert.der
+
+$ cat ca.cert.der inter.cert.der end_responder.cert.der > 
bundle_responder.certchain.der
+
+You can use SPDM-Utils instead as it will generate the correct certificates
+automatically.
+
+The responder can then be launched with
+
+.. code-block:: shell
+
+$ cd bin
+$ ./spdm_responder_emu --trans PCI_DOE
+
+Connecting an SPDM NVMe device
+==
+
+Once a SPDM server is running we can start QEMU and connect to the server.
+
+For an NVMe device first let's setup a block we can use
+
+.. code-block:: shell
+
+$ cd qemu-spdm/linux/image
+$ dd if=/dev/zero of=blknvme bs=1M count=2096 # 2GB NNMe Drive
+
+Then you can add this to your QEMU command line:
+
+.. code-block:: shell
+
+-drive file=blknvme,if=none,id=mynvme,format=raw \
+-device nvme,drive=mynvme,serial=deadbeef,spdm=2323
+
+At which point QEMU will try to connect to the SPDM server.
+
+
+.. _DMTF:
+   https://www.dmtf.org/standards/SPDM
+
+.. _SPDM Utils:
+   https://github.com/westerndigitalcorporation/spdm-utils
+
+.. _spdm emu:
+   https://github.com/dmtf/spdm-emu
+
+.. _Build SPDM Utils:
+   https://github.com/westerndigitalcorporation/spdm-utils#building
+
+.. _Generate the certificates:
+   
https://github.com/westerndigitalcorporation/spdm-utils#generate-mutable-certificates
+
+.. _Run it as a server:
+   
https://github.com/westerndigitalcorporation/spdm-utils#qemu-spdm-device-emulation
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index d3dd0f64b2..b8379c78f1 100644
---

[PATCH v5 0/3] Initial support for SPDM Responders

2024-03-06 Thread Alistair Francis

The Security Protocol and Data Model (SPDM) Specification defines
messages, data objects, and sequences for performing message exchanges
over a variety of transport and physical media.
 - 
https://www.dmtf.org/sites/default/files/standards/documents/DSP0274_1.3.0.pdf

SPDM currently supports PCIe DOE and MCTP transports, but it can be
extended to support others in the future. This series adds
support to QEMU to connect to an external SPDM instance.

SPDM support can be added to any QEMU device by exposing a
TCP socket to a SPDM server. The server can then implement the SPDM
decoding/encoding support, generally using libspdm [1].

This is similar to how the current TPM implementation works and means
that the heavy lifting of setting up certificate chains, capabilities,
measurements and complex crypto can be done outside QEMU by a well
supported and tested library.

This series implements socket support and exposes SPDM for a NVMe device.

1: https://github.com/DMTF/libspdm

v5:
 - Update MAINTAINERS
v4:
 - Rebase
v3:
 - Spelling fixes
 - Support for SPDM-Utils
v2:
 - Add cover letter
 - A few code fixes based on comments
 - Document SPDM-Utils
 - A few tweaks and clarifications to the documentation

Alistair Francis (1):
  hw/pci: Add all Data Object Types defined in PCIe r6.0

Huai-Cheng Kuo (1):
  backends: Initial support for SPDM socket support

Wilfred Mallawa (1):
  hw/nvme: Add SPDM over DOE support

 MAINTAINERS  |   6 +
 docs/specs/index.rst |   1 +
 docs/specs/spdm.rst  | 122 
 include/hw/pci/pci_device.h  |   5 +
 include/hw/pci/pcie_doe.h|   5 +
 include/sysemu/spdm-socket.h |  44 +++
 backends/spdm-socket.c   | 216 +++
 hw/nvme/ctrl.c   |  53 +
 backends/Kconfig |   4 +
 backends/meson.build |   2 +
 10 files changed, 458 insertions(+)
 create mode 100644 docs/specs/spdm.rst
 create mode 100644 include/sysemu/spdm-socket.h
 create mode 100644 backends/spdm-socket.c

-- 
2.44.0

[PATCH v5 1/3] hw/pci: Add all Data Object Types defined in PCIe r6.0

2024-03-06 Thread Alistair Francis

Add all of the defined protocols/features from the PCIe-SIG r6.0
"Table 6-32 PCI-SIG defined Data Object Types (Vendor ID = 0001h)"
table.

Signed-off-by: Alistair Francis 
Reviewed-by: Jonathan Cameron 
---
 include/hw/pci/pcie_doe.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/hw/pci/pcie_doe.h b/include/hw/pci/pcie_doe.h
index 87dc17dcef..15d94661f9 100644
--- a/include/hw/pci/pcie_doe.h
+++ b/include/hw/pci/pcie_doe.h
@@ -46,6 +46,8 @@ REG32(PCI_DOE_CAP_STATUS, 0)
 
 /* PCI-SIG defined Data Object Types - r6.0 Table 6-32 */
 #define PCI_SIG_DOE_DISCOVERY   0x00
+#define PCI_SIG_DOE_CMA 0x01
+#define PCI_SIG_DOE_SECURED_CMA 0x02
 
 #define PCI_DOE_DW_SIZE_MAX (1 << 18)
 #define PCI_DOE_PROTOCOL_NUM_MAX256
-- 
2.44.0

Re: [RFC 1/2] hw/riscv: Add server platform reference machine

2024-03-06 Thread Alistair Francis

On Thu, Mar 7, 2024 at 5:13 AM Atish Kumar Patra  wrote:
>
> On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei  wrote:
> >
> > On 3/6/2024 8:19 AM, Alistair Francis wrote:
> > > On Mon, Mar 4, 2024 at 8:28 PM Fei Wu  wrote:
> > >>
> > >> The RISC-V Server Platform specification[1] defines a standardized set
> > >> of hardware and software capabilities, that portable system software,
> > >> such as OS and hypervisors can rely on being present in a RISC-V server
> > >> platform.
> > >>
> > >> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
> > >> short) machine type is added to provide a environment for firmware/OS
> > >> development and testing. The main features included in rvsp-ref are:
> > >>
> > >>  - Based on riscv virt machine type
> > >>  - A new memory map as close as virt machine as possible
> > >>  - A new virt CPU type rvsp-ref-cpu for server platform compliance
> > >>  - AIA
> > >>  - PCIe AHCI
> > >>  - PCIe NIC
> > >>  - No virtio device
> > >>  - No fw_cfg device
> > >>  - No ACPI table provided
> > >>  - Only minimal device tree nodes
> > >>
> > >> [1] https://github.com/riscv-non-isa/riscv-server-platform
> > >
> > > + Atish
> > >
> > >>
> > >> Signed-off-by: Fei Wu 
> > >> ---
> > >>  configs/devices/riscv64-softmmu/default.mak |1 +
> > >>  hw/riscv/Kconfig|   13 +
> > >>  hw/riscv/meson.build|1 +
> > >>  hw/riscv/server_platform_ref.c  | 1244 +++
> > >>  4 files changed, 1259 insertions(+)
> > >>  create mode 100644 hw/riscv/server_platform_ref.c
> > >>
> > >> diff --git a/configs/devices/riscv64-softmmu/default.mak 
> > >> b/configs/devices/riscv64-softmmu/default.mak
> > >> index 3f68059448..a1d98e49ef 100644
> > >> --- a/configs/devices/riscv64-softmmu/default.mak
> > >> +++ b/configs/devices/riscv64-softmmu/default.mak
> > >> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
> > >>  CONFIG_SIFIVE_E=y
> > >>  CONFIG_SIFIVE_U=y
> > >>  CONFIG_RISCV_VIRT=y
> > >> +CONFIG_SERVER_PLATFORM_REF=y
> > >>  CONFIG_MICROCHIP_PFSOC=y
> > >>  CONFIG_SHAKTI_C=y
> > >> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
> > >> index 5d644eb7b1..debac5a7f5 100644
> > >> --- a/hw/riscv/Kconfig
> > >> +++ b/hw/riscv/Kconfig
> > >> @@ -48,6 +48,19 @@ config RISCV_VIRT
> > >>  select ACPI
> > >>  select ACPI_PCI
> > >>
> > >> +config SERVER_PLATFORM_REF
> > >> +bool
> > >> +select RISCV_NUMA
> > >> +select GOLDFISH_RTC
> > >> +select PCI
> > >> +select PCI_EXPRESS_GENERIC_BRIDGE
> > >> +select PFLASH_CFI01
> > >> +select SERIAL
> > >> +select RISCV_ACLINT
> > >> +select RISCV_APLIC
> > >> +select RISCV_IMSIC
> > >> +select SIFIVE_TEST
> > >
> > > Do we really need SiFive Test in the server platform?
> > >
> > It's used to reset the system, is there any better choice?

If we add this now we are stuck with it forever (or at least a long
time). So it'd be nice to think about these and decide if these really
are the best way to do things. We don't have to just copy the existing
virt machine.

There must be a more standard way to do this then MMIO mapped SiFive hardware?

> >
> > Probably I can remove the "sifive,test1 sifive,test0" from the
> > compatible list in fdt, and only keep "syscon", I see opensbi has
> > already removed that support in commit c2e602707.
> >
> > > Same with the goldfish RTC?
> > >
> > Although the spec doesn't make RTC mandatory, it should be a common
> > practice having a RTC on server, so I add a RTC here no matter it's
> > goldfish or not.
> >
>
> The platform spec says
> HPER_070 : A battery-backed RTC or analogous timekeeping mechanism
> MUST be implemented.
>
> Can we consider goldfish RTC in this category ?

I think so, although I haven't read the spec yet :)

My point was more that if we are going to implement a new machine, we
should aim to standardise on things that other
machines/servers/platforms/architectures do. Some of the things in the
virt machine are historical because that's what worked at the time.
But with a clean slate design there might be better alternatives.
Obviously while still sticking to the spec

>
> But I want to discuss a larger point as the server platform/SoC spec
> defines a bunch of optional requirement.
> Does this platform intend to be a platform that is a superset of all
> those options or allow optionality in
> the platform as well ?

Generally I feel that QEMU has the luxury of just supporting
everything. If there is an optional component created by the machine
it generally is fine if the guest doesn't use it. While creating
complex configuration options is a pain for users

Alistair

>
> > >> +
> > >>  config SHAKTI_C
> > >>  bool
> > >>  select RISCV_ACLINT
> > >> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
> > >> index 2f7ee81be3..bb3aff91ea 100644
> > >> --- a/hw/riscv/meson.build
> > >> +++ b/hw/riscv/meson.build
> > >> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: 
> >

Re: [PATCH v7 0/9] riscv: set vstart_eq_zero on mark_vs_dirty

2024-03-06 Thread Alistair Francis

On Thu, Mar 7, 2024 at 3:20 AM Daniel Henrique Barboza
 wrote:
>
> Hi,
>
> This version is rebased on top of alistair/riscv-to-apply.next, fixing
> onflicts with the Ztso changes in ldst_us_trans().
>
> No other changes made. All patches acked.
>
> v6 link: 
> https://lore.kernel.org/qemu-riscv/20240221213140.365232-1-dbarb...@ventanamicro.com/
>
> Daniel Henrique Barboza (8):
>   trans_rvv.c.inc: mark_vs_dirty() before loads and stores
>   trans_rvv.c.inc: remove 'is_store' bool from load/store fns
>   target/riscv: remove 'over' brconds from vector trans
>   target/riscv/translate.c: remove 'cpu_vstart' global
>   target/riscv: remove 'cpu_vl' global
>   target/riscv/vector_helper.c: set vstart = 0 in GEN_VEXT_VSLIDEUP_VX()
>   trans_rvv.c.inc: remove redundant mark_vs_dirty() calls
>   target/riscv/vector_helper.c: optimize loops in ldst helpers
>
> Ivan Klokov (1):
>   target/riscv: Clear vstart_qe_zero flag

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  target/riscv/insn_trans/trans_rvbf16.c.inc |  18 +-
>  target/riscv/insn_trans/trans_rvv.c.inc| 283 ++---
>  target/riscv/insn_trans/trans_rvvk.c.inc   |  30 +--
>  target/riscv/translate.c   |  11 +-
>  target/riscv/vector_helper.c   |   7 +-
>  5 files changed, 99 insertions(+), 250 deletions(-)
>
> --
> 2.43.2
>
>

Re: [PATCH v2] migration/multifd: Don't fsync when closing QIOChannelFile

2024-03-06 Thread Peter Xu

On Tue, Mar 05, 2024 at 04:56:29PM -0300, Fabiano Rosas wrote:
> Commit bc38feddeb ("io: fsync before closing a file channel") added a
> fsync/fdatasync at the closing point of the QIOChannelFile to ensure
> integrity of the migration stream in case of QEMU crash.
> 
> The decision to do the sync at qio_channel_close() was not the best
> since that function runs in the main thread and the fsync can cause
> QEMU to hang for several minutes, depending on the migration size and
> disk speed.
> 
> To fix the hang, remove the fsync from qio_channel_file_close().
> 
> At this moment, the migration code is the only user of the fsync and
> we're taking the tradeoff of not having a sync at all, leaving the
> responsibility to the upper layers.
> 
> Fixes: bc38feddeb ("io: fsync before closing a file channel")
> Reviewed-by: Daniel P. Berrangé 
> Signed-off-by: Fabiano Rosas 

Since 9.0 is reaching and it's important we avoid such hang, I queued this
version.

However to make sure we can still remember why we do this after a few
years, I added a rich comment and will squash into this patch:

===

diff --git a/migration/multifd.c b/migration/multifd.c
index 0a8fef046b..bf9d483f7a 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -714,6 +714,22 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams 
*p, Error **errp)
  * released because finalize() of the iochannel is only
  * triggered on the last reference and it's not guaranteed
  * that we always hold the last refcount when reaching here.
+ *
+ * Closing the fd explicitly has the benefit that if there is any
+ * registered I/O handler callbacks on such fd, that will get a
+ * POLLNVAL event and will further trigger the cleanup to finally
+ * release the IOC.
+ *
+ * FIXME: It should logically be guaranteed that all multifd
+ * channels have no I/O handler callback registered when reaching
+ * here, because migration thread will wait for all multifd channel
+ * establishments to complete during setup.  Since
+ * migrate_fd_cleanup() will be scheduled in main thread too, all
+ * previous callbacks should guarantee to be completed when
+ * reaching here.  See multifd_send_state.channels_created and its
+ * usage.  In the future, we could replace this with an assert
+ * making sure we're the last reference, or simply drop it if above
+ * is more clear to be justified.
  */
 qio_channel_close(p->c, _abort);
 object_unref(OBJECT(p->c));



Thanks,

-- 
Peter Xu

RE: Does "-object" support structured options now?

2024-03-06 Thread Chun Feng Wu

Yes, you’re right, QEMU >=6.0.0 works well, I failed test because I did it on 
QEMU 4.2.1

From: Daniel P. Berrangé 
Date: Wednesday, March 6, 2024 at 22:43
To: Chun Feng Wu , qemu-devel@nongnu.org 

Subject: [EXTERNAL] Re: Does "-object" support structured options now?
On Wed, Mar 06, 2024 at 02:36:08PM +, Daniel P. Berrangé wrote:
> On Wed, Mar 06, 2024 at 02:33:05PM +, Chun Feng Wu wrote:
> > Thanks Daniel for your response!
> >
> > I tried it with the following cmd
> >
> > qemu-system-x86_64 [other options...] \
> >   -object 
> > '{"qom-type":"throttle-group","id":"limits0","limits":{"iops-total":200}}'
> >
> > And I got error:
> > qemu-system-x86_64: -object 
> > {"qom-type":"throttle-group","id":"limits0","limits":{"iops-total":200}}: 
> > Parameter 'id' is missing
> >
> > Do you know why such error happens?
>
> You have made a mistake somewhere in invoking it ?

Or perhaps you are using a much older QEMU release which lacks JSON
support ?  You need QEMU >= 6.0.0

With regards,
Daniel
--
|: https://berrange.com   -o-https://www.flickr.com/photos/dberrange  :|
|: https://libvirt.org  -o-https://fstop138.berrange.com  :|
|: https://entangle-photo.org -o-https://www.instagram.com/dberrange  :|

Re: [PATCH v5 09/13] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-03-06 Thread fan

On Wed, Mar 06, 2024 at 05:48:11PM +, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:04 -0800
> nifan@gmail.com wrote:
> 
> > From: Fan Ni 
> > 
> > Since fabric manager emulation is not supported yet, the change implements
> > the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> We'll need them anyway, or to implement an fm interface via QMP which is
> going to be ugly and complex.
> 
> > 
> > Note: we skips any FM issued extent release request if the exact extent
> > does not exist in the extent list of the device. We will loose the
> > restriction later once we have partial release support in the kernel.
> 
> Maybe the kernel will treat it as a request to release the extent it
> is tracking that contains it.  So we may want to add a way to poke that.
> Not today though!
> 
> > 
> > 1. Add dynamic capacity extents:
> > 
> > For example, the command to add two continuous extents (each 128MiB long)
> > to region 0 (starting at DPA offset 0) looks like below:
> > 
> > { "execute": "qmp_capabilities" }
> > 
> > { "execute": "cxl-add-dynamic-capacity",
> >   "arguments": {
> >   "path": "/machine/peripheral/cxl-dcd0",
> >   "region-id": 0,
> >   "extents": [
> >   {
> >   "dpa": 0,
> >   "len": 134217728
> >   },
> >   {
> >   "dpa": 134217728,
> >   "len": 134217728
> >   }
> >   ]
> >   }
> > }
> > 
> > 2. Release dynamic capacity extents:
> > 
> > For example, the command to release an extent of size 128MiB from region 0
> > (DPA offset 128MiB) look like below:
> > 
> > { "execute": "cxl-release-dynamic-capacity",
> >   "arguments": {
> >   "path": "/machine/peripheral/cxl-dcd0",
> >   "region-id": 0,
> >   "extents": [
> >   {
> >   "dpa": 134217728,
> >   "len": 134217728
> >   }
> >   ]
> >   }
> > }
> > 
> > Signed-off-by: Fan Ni 
> 
> ...
>   
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index dccfaaad3a..e9c8994cdb 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -674,6 +674,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, 
> > Error **errp)
> >  ct3d->dc.total_capacity += region->len;
> >  }
> >  QTAILQ_INIT(>dc.extents);
> > +QTAILQ_INIT(>dc.extents_pending_to_add);
> >  
> >  return true;
> >  }
> > @@ -686,6 +687,12 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> >  ent = QTAILQ_FIRST(>dc.extents);
> >  cxl_remove_extent_from_extent_list(>dc.extents, ent);
> >  }
> > +
> > +while (!QTAILQ_EMPTY(>dc.extents_pending_to_add)) {
> 
> QTAILQ_FOR_EACHSAFE
> 
> > +ent = QTAILQ_FIRST(>dc.extents_pending_to_add);
> > +
> > cxl_remove_extent_from_extent_list(>dc.extents_pending_to_add,
> > +   ent);
> > +}
> >  }
> 
> > +/*
> > + * The main function to process dynamic capacity event. Currently DC 
> > extents
> > + * add/release requests are processed.
> > + */
> > +static void qmp_cxl_process_dynamic_capacity(const char *path, CxlEventLog 
> > log,
> > + CXLDCEventType type, uint16_t 
> > hid,
> > + uint8_t rid,
> > + CXLDCExtentRecordList 
> > *records,
> > + Error **errp)
> > +{
> > +Object *obj;
> > +CXLEventDynamicCapacity dCap = {};
> > +CXLEventRecordHdr *hdr = 
> > +CXLType3Dev *dcd;
> > +uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> > +uint32_t num_extents = 0;
> > +CXLDCExtentRecordList *list;
> > +g_autofree CXLDCExtentRaw *extents = NULL;
> > +uint8_t enc_log;
> > +uint64_t offset, len, block_size;
> > +int i;
> > +int rc;
> 
> Combine the two lines above.
> 
> > +g_autofree unsigned long *blk_bitmap = NULL;
> > +
> > +obj = object_resolve_path(path, NULL);
> > +if (!obj) {
> > +error_setg(errp, "Unable to resolve path");
> > +return;
> > +}
> 
> object_resolve_path_type() and skip a step (should do this in various places
> in our existing code!)
> 
> > +if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> > +error_setg(errp, "Path not point to a valid CXL type3 device");
> > +return;
> > +}
> > +
> > +dcd = CXL_TYPE3(obj);
> > +if (!dcd->dc.num_regions) {
> > +error_setg(errp, "No dynamic capacity support from the device");
> > +return;
> > +}
> > +
> > +rc = ct3d_qmp_cxl_event_log_enc(log);
> > +if (rc < 0) {
> > +error_setg(errp, "Unhandled error log type");
> > +return;
> > +}
> > +enc_log = rc;
> > +
> > +if (rid >= dcd->dc.num_regions) {
> > +error_setg(errp, "region id is too large");
> > +return;
> > +}
> > +block_size = dcd->dc.regions[rid].block_size;
> > +
> > +/* Sanity check and count the extents */
> > +list =

Re: [PATCH v5 09/13] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-03-06 Thread fan

On Wed, Mar 06, 2024 at 05:48:11PM +, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:04 -0800
> nifan@gmail.com wrote:
> 
> > From: Fan Ni 
> > 
> > Since fabric manager emulation is not supported yet, the change implements
> > the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> We'll need them anyway, or to implement an fm interface via QMP which is
> going to be ugly and complex.
> 
> > 
> > Note: we skips any FM issued extent release request if the exact extent
> > does not exist in the extent list of the device. We will loose the
> > restriction later once we have partial release support in the kernel.
> 
> Maybe the kernel will treat it as a request to release the extent it
> is tracking that contains it.  So we may want to add a way to poke that.
> Not today though!
> 
> > 
> > 1. Add dynamic capacity extents:
> > 
> > For example, the command to add two continuous extents (each 128MiB long)
> > to region 0 (starting at DPA offset 0) looks like below:
> > 
> > { "execute": "qmp_capabilities" }
> > 
> > { "execute": "cxl-add-dynamic-capacity",
> >   "arguments": {
> >   "path": "/machine/peripheral/cxl-dcd0",
> >   "region-id": 0,
> >   "extents": [
> >   {
> >   "dpa": 0,
> >   "len": 134217728
> >   },
> >   {
> >   "dpa": 134217728,
> >   "len": 134217728
> >   }
> >   ]
> >   }
> > }
> > 
> > 2. Release dynamic capacity extents:
> > 
> > For example, the command to release an extent of size 128MiB from region 0
> > (DPA offset 128MiB) look like below:
> > 
> > { "execute": "cxl-release-dynamic-capacity",
> >   "arguments": {
> >   "path": "/machine/peripheral/cxl-dcd0",
> >   "region-id": 0,
> >   "extents": [
> >   {
> >   "dpa": 134217728,
> >   "len": 134217728
> >   }
> >   ]
> >   }
> > }
> > 
> > Signed-off-by: Fan Ni 
> 
> ...
>   
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index dccfaaad3a..e9c8994cdb 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -674,6 +674,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, 
> > Error **errp)
> >  ct3d->dc.total_capacity += region->len;
> >  }
> >  QTAILQ_INIT(>dc.extents);
> > +QTAILQ_INIT(>dc.extents_pending_to_add);
> >  
> >  return true;
> >  }
> > @@ -686,6 +687,12 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> >  ent = QTAILQ_FIRST(>dc.extents);
> >  cxl_remove_extent_from_extent_list(>dc.extents, ent);
> >  }
> > +
> > +while (!QTAILQ_EMPTY(>dc.extents_pending_to_add)) {
> 
> QTAILQ_FOR_EACHSAFE
> 
> > +ent = QTAILQ_FIRST(>dc.extents_pending_to_add);
> > +
> > cxl_remove_extent_from_extent_list(>dc.extents_pending_to_add,
> > +   ent);
> > +}
> >  }
> 
> > +/*
> > + * The main function to process dynamic capacity event. Currently DC 
> > extents
> > + * add/release requests are processed.
> > + */
> > +static void qmp_cxl_process_dynamic_capacity(const char *path, CxlEventLog 
> > log,
> > + CXLDCEventType type, uint16_t 
> > hid,
> > + uint8_t rid,
> > + CXLDCExtentRecordList 
> > *records,
> > + Error **errp)
> > +{
> > +Object *obj;
> > +CXLEventDynamicCapacity dCap = {};
> > +CXLEventRecordHdr *hdr = 
> > +CXLType3Dev *dcd;
> > +uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> > +uint32_t num_extents = 0;
> > +CXLDCExtentRecordList *list;
> > +g_autofree CXLDCExtentRaw *extents = NULL;
> > +uint8_t enc_log;
> > +uint64_t offset, len, block_size;
> > +int i;
> > +int rc;
> 
> Combine the two lines above.
> 
> > +g_autofree unsigned long *blk_bitmap = NULL;
> > +
> > +obj = object_resolve_path(path, NULL);
> > +if (!obj) {
> > +error_setg(errp, "Unable to resolve path");
> > +return;
> > +}
> 
> object_resolve_path_type() and skip a step (should do this in various places
> in our existing code!)
> 
> > +if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> > +error_setg(errp, "Path not point to a valid CXL type3 device");
> > +return;
> > +}
> > +
> > +dcd = CXL_TYPE3(obj);
> > +if (!dcd->dc.num_regions) {
> > +error_setg(errp, "No dynamic capacity support from the device");
> > +return;
> > +}
> > +
> > +rc = ct3d_qmp_cxl_event_log_enc(log);
> > +if (rc < 0) {
> > +error_setg(errp, "Unhandled error log type");
> > +return;
> > +}
> > +enc_log = rc;
> > +
> > +if (rid >= dcd->dc.num_regions) {
> > +error_setg(errp, "region id is too large");
> > +return;
> > +}
> > +block_size = dcd->dc.regions[rid].block_size;
> > +
> > +/* Sanity check and count the extents */
> > +list =

Re: [PATCH] vmbus: Print a warning when enabled without the recommended set of features

2024-03-06 Thread Paolo Bonzini


On 1/25/24 17:19, Maciej S. Szmigiero wrote:

From: "Maciej S. Szmigiero" 

Some Windows versions crash at boot or fail to enable the VMBus device if
they don't see the expected set of Hyper-V features (enlightenments).

Since this provides poor user experience let's warn user if the VMBus
device is enabled without the recommended set of Hyper-V features.

The recommended set is the minimum set of Hyper-V features required to make
the VMBus device work properly in Windows Server versions 2016, 2019 and
2022.

Signed-off-by: Maciej S. Szmigiero 


Acked-by: Paolo Bonzini 

Thanks,

Paolo


---
  hw/hyperv/hyperv.c| 12 
  hw/hyperv/vmbus.c |  6 ++
  include/hw/hyperv/hyperv.h|  4 
  target/i386/kvm/hyperv-stub.c |  4 
  target/i386/kvm/hyperv.c  |  5 +
  target/i386/kvm/hyperv.h  |  2 ++
  target/i386/kvm/kvm.c |  7 +++
  7 files changed, 40 insertions(+)

diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c
index 57b402b95610..2c91de7ff4a8 100644
--- a/hw/hyperv/hyperv.c
+++ b/hw/hyperv/hyperv.c
@@ -947,3 +947,15 @@ uint64_t hyperv_syndbg_query_options(void)
  
  return msg.u.query_options.options;

  }
+
+static bool vmbus_recommended_features_enabled;
+
+bool hyperv_are_vmbus_recommended_features_enabled(void)
+{
+return vmbus_recommended_features_enabled;
+}
+
+void hyperv_set_vmbus_recommended_features_enabled(void)
+{
+vmbus_recommended_features_enabled = true;
+}
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 380239af2c7b..f33afeeea27d 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -2631,6 +2631,12 @@ static void vmbus_bridge_realize(DeviceState *dev, Error 
**errp)
  return;
  }
  
+if (!hyperv_are_vmbus_recommended_features_enabled()) {

+warn_report("VMBus enabled without the recommended set of Hyper-V features: 
"
+"hv-stimer, hv-vapic and hv-runtime. "
+"Some Windows versions might not boot or enable the VMBus 
device");
+}
+
  bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
  }
  
diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h

index 015c3524b1c2..d717b4e13d40 100644
--- a/include/hw/hyperv/hyperv.h
+++ b/include/hw/hyperv/hyperv.h
@@ -139,4 +139,8 @@ typedef struct HvSynDbgMsg {
  } HvSynDbgMsg;
  typedef uint16_t (*HvSynDbgHandler)(void *context, HvSynDbgMsg *msg);
  void hyperv_set_syndbg_handler(HvSynDbgHandler handler, void *context);
+
+bool hyperv_are_vmbus_recommended_features_enabled(void);
+void hyperv_set_vmbus_recommended_features_enabled(void);
+
  #endif
diff --git a/target/i386/kvm/hyperv-stub.c b/target/i386/kvm/hyperv-stub.c
index 778ed782e6fc..3263dcf05d31 100644
--- a/target/i386/kvm/hyperv-stub.c
+++ b/target/i386/kvm/hyperv-stub.c
@@ -52,3 +52,7 @@ void hyperv_x86_synic_reset(X86CPU *cpu)
  void hyperv_x86_synic_update(X86CPU *cpu)
  {
  }
+
+void hyperv_x86_set_vmbus_recommended_features_enabled(void)
+{
+}
diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c
index 6825c89af374..f2a3fe650a18 100644
--- a/target/i386/kvm/hyperv.c
+++ b/target/i386/kvm/hyperv.c
@@ -149,3 +149,8 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit 
*exit)
  return -1;
  }
  }
+
+void hyperv_x86_set_vmbus_recommended_features_enabled(void)
+{
+hyperv_set_vmbus_recommended_features_enabled();
+}
diff --git a/target/i386/kvm/hyperv.h b/target/i386/kvm/hyperv.h
index 67543296c3a4..e3982c8f4dd1 100644
--- a/target/i386/kvm/hyperv.h
+++ b/target/i386/kvm/hyperv.h
@@ -26,4 +26,6 @@ int hyperv_x86_synic_add(X86CPU *cpu);
  void hyperv_x86_synic_reset(X86CPU *cpu);
  void hyperv_x86_synic_update(X86CPU *cpu);
  
+void hyperv_x86_set_vmbus_recommended_features_enabled(void);

+
  #endif
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index e88e65fe014c..d3d01b3cf82d 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1650,6 +1650,13 @@ static int hyperv_init_vcpu(X86CPU *cpu)
  }
  }
  
+/* Skip SynIC and VP_INDEX since they are hard deps already */

+if (hyperv_feat_enabled(cpu, HYPERV_FEAT_STIMER) &&
+hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC) &&
+hyperv_feat_enabled(cpu, HYPERV_FEAT_RUNTIME)) {
+hyperv_x86_set_vmbus_recommended_features_enabled();
+}
+
  return 0;
  }

Re: [PATCH 1/1] kvm: add support for guest physical bits

2024-03-06 Thread Paolo Bonzini


On 3/4/24 02:54, Xiaoyao Li wrote:

On 3/1/2024 6:17 PM, Gerd Hoffmann wrote:

query kvm for supported guest physical address bits using
KVM_CAP_VM_GPA_BITS.  Expose the value to the guest via cpuid
(leaf 0x8008, eax, bits 16-23).

Signed-off-by: Gerd Hoffmann 
---
  target/i386/cpu.h | 1 +
  target/i386/cpu.c | 1 +
  target/i386/kvm/kvm.c | 8 
  3 files changed, 10 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 952174bb6f52..d427218827f6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2026,6 +2026,7 @@ struct ArchCPU {
  /* Number of physical address bits supported */
  uint32_t phys_bits;
+    uint32_t guest_phys_bits;
  /* in order to simplify APIC support, we leave this pointer to the
 user */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 2666ef380891..1a6cfc75951e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6570,6 +6570,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t 
index, uint32_t count,

  if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
  /* 64 bit processor */
   *eax |= (cpu_x86_virtual_addr_width(env) << 8);
+ *eax |= (cpu->guest_phys_bits << 16);


I think you misunderstand this field.

If you expose this field to guest, it's the information for nested 
guest. i.e., the guest itself runs as a hypervisor will know its nested 
guest can have guest_phys_bits for physical addr.


It's one possible interpretation of AMD's definition. However there's no 
processor that has different MAXPHYADDR with/without nested paging, so 
there's no real benefit in adopting that interpretation.


The only architectural case in which you have two conflicting values for 
the guest MAXPHYADDR is hCR4.LA57=0 (and likewise for Intel 4-level EPT) 
with MAXPHYADDR=52, so it's useful to treat GuestPhysAddrSize as a way 
to communicate this situation to the guest.


Paolo

Re: [PATCH v5 08/13] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2024-03-06 Thread fan

On Wed, Mar 06, 2024 at 05:28:27PM +, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:03 -0800
> nifan@gmail.com wrote:
> 
> > From: Fan Ni 
> > 
> > Per CXL spec 3.1, two mailbox commands are implemented:
> > Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
> > Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.
> > 
> > Signed-off-by: Fan Ni 
> 
> Hmm. So I had a thought which would work for what you
> have here. See include/qemu/range.h
> I like the region merging stuff that is also in the list operators
> but we shouldn't use that because we have other reasons not to
> fuse ranges (sequence numbering etc)
> 
> We could make an extent a wrapper around a struct Range though
> so that we can use the comparison stuff directly.
> + we can use the list manipulation in there as the basis for a future
> extent merging infrastructure that is tag and sequence number (if
> provided - so shared capacity or pmem) aware.
> 
> Jonathan
> 
> 
> > ---
> > +
> > +/*
> > + * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
> > + * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
> > + */
> > +typedef struct CXLUpdateDCExtentListInPl {
> > +uint32_t num_entries_updated;
> > +uint8_t flags;
> > +uint8_t rsvd[3];
> > +/* CXL r3.1 Table 8-169: Updated Extent */
> > +struct {
> > +uint64_t start_dpa;
> > +uint64_t len;
> > +uint8_t rsvd[8];
> > +} QEMU_PACKED updated_entries[];
> > +} QEMU_PACKED CXLUpdateDCExtentListInPl;
> > +
> > +/*
> > + * For the extents in the extent list to operate, check whether they are 
> > valid
> > + * 1. The extent should be in the range of a valid DC region;
> > + * 2. The extent should not cross multiple regions;
> > + * 3. The start DPA and the length of the extent should align with the 
> > block
> > + * size of the region;
> > + * 4. The address range of multiple extents in the list should not overlap.
> 
> Hmm. Interesting.  I was thinking a given add / remove command rather than
> just the extents can't overlap a region.  However I can't find text on that
> so I believe your interpretation is correct. It is only specified for the
> event records, but that is good enough I think.  We might want to propose
> tightening the spec on this to allow devices to say no to such complex
> extent lists. Maybe a nice friendly Memory vendor should query this one if
> it's a potential problem for real devices.  Might not be!
> 
> > + */
> > +static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
> > +const CXLUpdateDCExtentListInPl *in)
> > +{
> > +uint64_t min_block_size = UINT64_MAX;
> > +CXLDCRegion *region = >dc.regions[0];
> > +CXLDCRegion *lastregion = >dc.regions[ct3d->dc.num_regions - 1];
> > +g_autofree unsigned long *blk_bitmap = NULL;
> > +uint64_t dpa, len;
> > +uint32_t i;
> > +
> > +for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +region = >dc.regions[i];
> > +min_block_size = MIN(min_block_size, region->block_size);
> > +}
> > +
> > +blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
> > + ct3d->dc.regions[0].base) / min_block_size);
> > +
> > +for (i = 0; i < in->num_entries_updated; i++) {
> > +dpa = in->updated_entries[i].start_dpa;
> > +len = in->updated_entries[i].len;
> > +
> > +region = cxl_find_dc_region(ct3d, dpa, len);
> > +if (!region) {
> > +return CXL_MBOX_INVALID_PA;
> > +}
> > +
> > +dpa -= ct3d->dc.regions[0].base;
> > +if (dpa % region->block_size || len % region->block_size) {
> > +return CXL_MBOX_INVALID_EXTENT_LIST;
> > +}
> > +/* the dpa range already covered by some other extents in the list 
> > */
> > +if (test_any_bits_set(blk_bitmap, dpa / min_block_size,
> > +len / min_block_size)) {
> > +return CXL_MBOX_INVALID_EXTENT_LIST;
> > +}
> > +bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> > +   }
> > +
> > +return CXL_MBOX_SUCCESS;
> > +}
> > +
> > +/*
> > + * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 
> > 4802h)
> > + * An extent is added to the extent list and becomes usable only after the
> > + * response is processed successfully
> > + */
> > +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
> > +  uint8_t *payload_in,
> > +  size_t len_in,
> > +  uint8_t *payload_out,
> > +  size_t *len_out,
> > +  CXLCCI *cci)
> > +{
> > +CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> > +CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> > +CXLDCExtentList *extent_list = >dc.extents;
> > +CXLDCExtent *ent;
> > +uint32_t i;
> > +

[PATCH v2] virtio-gpu: first surface update with blob scanout after resumed

2024-03-06 Thread dongwon . kim

From: Dongwon Kim 

The guest surface needs to be updated with a blob scanout after resumed
from saved vm state if blob is enabled.

v2: Rebased

Cc: Marc-André Lureau 
Cc: Vivek Kasireddy 
Signed-off-by: Dongwon Kim 
---
 hw/display/virtio-gpu.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 1c1ee230b3..01bc4f9565 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1422,16 +1422,23 @@ static int virtio_gpu_post_load(void *opaque, int 
version_id)
 if (!res) {
 return -EINVAL;
 }
-scanout->ds = qemu_create_displaysurface_pixman(res->image);
-if (!scanout->ds) {
-return -EINVAL;
-}
+
+if (res->blob_size) {
+assert(g->dmabuf.primary[i] != NULL);
+g->dmabuf.primary[i]->buf.fd = res->dmabuf_fd;
+dpy_gl_scanout_dmabuf(scanout->con, >dmabuf.primary[i]->buf);
+} else {
+scanout->ds = qemu_create_displaysurface_pixman(res->image);
+if (!scanout->ds) {
+return -EINVAL;
+}
 #ifdef WIN32
-qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0);
+qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0);
 #endif
+dpy_gfx_replace_surface(scanout->con, scanout->ds);
+dpy_gfx_update_full(scanout->con);
+}
 
-dpy_gfx_replace_surface(scanout->con, scanout->ds);
-dpy_gfx_update_full(scanout->con);
 if (scanout->cursor.resource_id) {
 update_cursor(g, >cursor);
 }
-- 
2.34.1

[PATCH v3] ui/gtk: flush display pipeline before saving vmstate when blob=true

2024-03-06 Thread dongwon . kim

From: Dongwon Kim 

If the guest state is paused before it gets a response for the current
scanout frame submission (resource-flush), it won't flush new frames
after being restored as it still waits for the old response, which is
accepted as a scanout render done signal. So it's needed to unblock
the current scanout render pipeline before the run state is changed
to make sure the guest receives the response for the current frame
submission.

v2: Giving some time for the fence to be signaled before flushing
the pipeline

v3: Prevent redundant call of gd_hw_gl_flushed by checking dmabuf
and fence_fd >= 0 in it (e.g. during and after eglClientWaitSync
in gd_change_runstate).

Destroy sync object later in gd_hw_fl_flushed

Cc: Marc-André Lureau 
Cc: Vivek Kasireddy 
Signed-off-by: Dongwon Kim 
---
 ui/egl-helpers.c |  2 --
 ui/gtk.c | 31 +++
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index 3d19dbe382..a77f9e57d9 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -385,8 +385,6 @@ void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf)
 if (dmabuf->sync) {
 dmabuf->fence_fd = eglDupNativeFenceFDANDROID(qemu_egl_display,
   dmabuf->sync);
-eglDestroySyncKHR(qemu_egl_display, dmabuf->sync);
-dmabuf->sync = NULL;
 }
 }
 
diff --git a/ui/gtk.c b/ui/gtk.c
index 810d7fc796..eaca890cba 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -597,10 +597,14 @@ void gd_hw_gl_flushed(void *vcon)
 VirtualConsole *vc = vcon;
 QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf;
 
-qemu_set_fd_handler(dmabuf->fence_fd, NULL, NULL, NULL);
-close(dmabuf->fence_fd);
-dmabuf->fence_fd = -1;
-graphic_hw_gl_block(vc->gfx.dcl.con, false);
+if (dmabuf && dmabuf->fence_fd >= 0) {
+qemu_set_fd_handler(dmabuf->fence_fd, NULL, NULL, NULL);
+close(dmabuf->fence_fd);
+dmabuf->fence_fd = -1;
+eglDestroySyncKHR(qemu_egl_display, dmabuf->sync);
+dmabuf->sync = NULL;
+graphic_hw_gl_block(vc->gfx.dcl.con, false);
+}
 }
 
 /** DisplayState Callbacks (opengl version) **/
@@ -678,6 +682,25 @@ static const DisplayGLCtxOps egl_ctx_ops = {
 static void gd_change_runstate(void *opaque, bool running, RunState state)
 {
 GtkDisplayState *s = opaque;
+int i;
+
+if (state == RUN_STATE_SAVE_VM) {
+for (i = 0; i < s->nb_vcs; i++) {
+VirtualConsole *vc = >vc[i];
+
+if (vc->gfx.guest_fb.dmabuf &&
+vc->gfx.guest_fb.dmabuf->fence_fd >= 0) {
+eglClientWaitSync(qemu_egl_display,
+  vc->gfx.guest_fb.dmabuf->sync,
+  EGL_SYNC_FLUSH_COMMANDS_BIT_KHR,
+  1);
+
+/* force flushing current scanout blob rendering process
+ * just in case the fence is still not signaled */
+gd_hw_gl_flushed(vc);
+}
+}
+}
 
 gd_update_caption(s);
 }
-- 
2.34.1

Re: [PATCH v5 08/13] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2024-03-06 Thread fan

On Wed, Mar 06, 2024 at 05:28:27PM +, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:03 -0800
> nifan@gmail.com wrote:
> 
> > From: Fan Ni 
> > 
> > Per CXL spec 3.1, two mailbox commands are implemented:
> > Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
> > Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.
> > 
> > Signed-off-by: Fan Ni 
> 
> Hmm. So I had a thought which would work for what you
> have here. See include/qemu/range.h
> I like the region merging stuff that is also in the list operators
> but we shouldn't use that because we have other reasons not to
> fuse ranges (sequence numbering etc)
> 
> We could make an extent a wrapper around a struct Range though
> so that we can use the comparison stuff directly.
> + we can use the list manipulation in there as the basis for a future
> extent merging infrastructure that is tag and sequence number (if
> provided - so shared capacity or pmem) aware.
> 
> Jonathan
> 
> 
> > ---
> > +
> > +/*
> > + * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
> > + * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
> > + */
> > +typedef struct CXLUpdateDCExtentListInPl {
> > +uint32_t num_entries_updated;
> > +uint8_t flags;
> > +uint8_t rsvd[3];
> > +/* CXL r3.1 Table 8-169: Updated Extent */
> > +struct {
> > +uint64_t start_dpa;
> > +uint64_t len;
> > +uint8_t rsvd[8];
> > +} QEMU_PACKED updated_entries[];
> > +} QEMU_PACKED CXLUpdateDCExtentListInPl;
> > +
> > +/*
> > + * For the extents in the extent list to operate, check whether they are 
> > valid
> > + * 1. The extent should be in the range of a valid DC region;
> > + * 2. The extent should not cross multiple regions;
> > + * 3. The start DPA and the length of the extent should align with the 
> > block
> > + * size of the region;
> > + * 4. The address range of multiple extents in the list should not overlap.
> 
> Hmm. Interesting.  I was thinking a given add / remove command rather than
> just the extents can't overlap a region.  However I can't find text on that
> so I believe your interpretation is correct. It is only specified for the
> event records, but that is good enough I think.  We might want to propose
> tightening the spec on this to allow devices to say no to such complex
> extent lists. Maybe a nice friendly Memory vendor should query this one if
> it's a potential problem for real devices.  Might not be!
> 
> > + */
> > +static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
> > +const CXLUpdateDCExtentListInPl *in)
> > +{
> > +uint64_t min_block_size = UINT64_MAX;
> > +CXLDCRegion *region = >dc.regions[0];
> > +CXLDCRegion *lastregion = >dc.regions[ct3d->dc.num_regions - 1];
> > +g_autofree unsigned long *blk_bitmap = NULL;
> > +uint64_t dpa, len;
> > +uint32_t i;
> > +
> > +for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +region = >dc.regions[i];
> > +min_block_size = MIN(min_block_size, region->block_size);
> > +}
> > +
> > +blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
> > + ct3d->dc.regions[0].base) / min_block_size);
> > +
> > +for (i = 0; i < in->num_entries_updated; i++) {
> > +dpa = in->updated_entries[i].start_dpa;
> > +len = in->updated_entries[i].len;
> > +
> > +region = cxl_find_dc_region(ct3d, dpa, len);
> > +if (!region) {
> > +return CXL_MBOX_INVALID_PA;
> > +}
> > +
> > +dpa -= ct3d->dc.regions[0].base;
> > +if (dpa % region->block_size || len % region->block_size) {
> > +return CXL_MBOX_INVALID_EXTENT_LIST;
> > +}
> > +/* the dpa range already covered by some other extents in the list 
> > */
> > +if (test_any_bits_set(blk_bitmap, dpa / min_block_size,
> > +len / min_block_size)) {
> > +return CXL_MBOX_INVALID_EXTENT_LIST;
> > +}
> > +bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> > +   }
> > +
> > +return CXL_MBOX_SUCCESS;
> > +}
> > +
> > +/*
> > + * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 
> > 4802h)
> > + * An extent is added to the extent list and becomes usable only after the
> > + * response is processed successfully
> > + */
> > +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
> > +  uint8_t *payload_in,
> > +  size_t len_in,
> > +  uint8_t *payload_out,
> > +  size_t *len_out,
> > +  CXLCCI *cci)
> > +{
> > +CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> > +CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> > +CXLDCExtentList *extent_list = >dc.extents;
> > +CXLDCExtent *ent;
> > +uint32_t i;
> > +

Re: [PATCH v3 4/7] Add migrate_set_ports into migrate_qmp to change migration port number

2024-03-06 Thread Het Gala



On 06/03/24 9:31 pm, Fabiano Rosas wrote:

Het Gala  writes:


On 06/03/24 8:06 pm, Fabiano Rosas wrote:

Het Gala   writes:


Add a migrate_set_ports() function that from each QDict, fills in
the port in case it was 0 in the test.
Handle a list of channels so we can add a negative test that
passes more than one channel.

Signed-off-by: Het Gala
Suggested-by: Fabiano Rosas
---
   tests/qtest/migration-helpers.c | 26 ++
   1 file changed, 26 insertions(+)

diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index 478c1f259b..df4978bf17 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -17,6 +17,8 @@
   #include "qapi/qapi-visit-sockets.h"
   #include "qapi/qobject-input-visitor.h"
   #include "qapi/error.h"
+#include "qapi/qmp/qlist.h"
+

Extra line here. This is unwanted because it sometimes trips git into
thinking there's a conflict here when another patch changes the
surrounding lines.

Ack, that makes sense
   
   #include "migration-helpers.h"
   
@@ -73,6 +75,29 @@ migrate_get_socket_address(QTestState *who, const char *parameter)

   return result;
   }
   
+static void migrate_set_ports(QTestState *to, QList *channelList)

+{
+g_autofree char *addr = NULL;
+g_autofree char *addr_port = NULL;
+QListEntry *entry;
+
+addr = migrate_get_socket_address(to, "socket-address");
+addr_port = g_strsplit(addr, ":", 3)[2];

Will this always do the right thing when the src/dst use different types
of channels? If there is some kind of mismatch (say one side uses vsock
and the other inet), it's better that this function doesn't touch the
channels dict instead of putting garbage in the port field.

Yes you are right. This will fail if there is a mismatch in type of
channels.

Better idea would be to check if 'port' key is present in both, i.e. in
'addr'
as well as 'addrdict' and only then change the port ?


Yep, either parse the type from string or add a version of
migrate_get_socket_address that returns a dict. Then check if type
matches and port exists.


one silly question here, why are we not having tests for exec and rdma 
specifically ?


Another suggestion required: Parsing uri to qdict is easy to implement 
but (little)
messy codewise, and the other hand migrate_get_qdict looks clean, but 
under the hood we would convert it to socketaddress and then call 
SocketAddress_to_qdict. Which one we can prefer more here ?


Regards,

Het Gala

no target for a link in the post "How to access libvirt domains in KubeVirt"

2024-03-06 Thread info

Hello, thank you for the post.

The href in the link
https://kubevirt.io/user-guide/debug_virt_stack/;>Virtualization 
Debugging

should be replaced by
https://kubevirt.io/user-guide/debug_virt_stack/logging/

Best,
Alexei

Re: no target for a link in the post "How to access libvirt domains in KubeVirt"

2024-03-06 Thread info

Hi Stefan, you made the update swiftly;-)
BTW thank you for your meaningful posts.
Best,
Alexei
  
> On 06/03/2024 21:04 CET Stefan Hajnoczi  wrote:
> 
>  
> On Wed, 6 Mar 2024 at 14:52,  wrote:
> >
> > Hello, thank you for the post.
> >
> > The href in the link
> > https://kubevirt.io/user-guide/debug_virt_stack/;>Virtualization 
> > Debugging
> >
> > should be replaced by
> > https://kubevirt.io/user-guide/debug_virt_stack/logging/
> 
> Hi Alexei,
> Thanks for pointing out the broken link! The blog post has been updated.
> 
> Stefan

Re: [PATCH 00/14] Cleanup on SMP and its test

2024-03-06 Thread Philippe Mathieu-Daudé


Hi,

On 6/3/24 10:53, Zhao Liu wrote:

From: Zhao Liu 

Hi all,

To make review easier, I've merged my previous single SMP patch [1] and
SMP test series [2] into this series as well.

So this series includes:
  * [Patch 1] Remove deprecated "parameter=0" SMP configurations, which
is marked as deprecated in v6.2.
  * [Patch 2] Deprecate unsupported "parameter=1" SMP configurations.
  * [Patch 3 & 4] Minor code cleanup for machine_parse_smp_config().
  * [Patch 5 ~ 14] Test case enhancements to cover more SMP API changes.

[1]: 
https://lore.kernel.org/qemu-devel/20240304044510.2305849-1-zhao1@linux.intel.com/
[2]: 
https://lore.kernel.org/qemu-devel/20240118144857.2124034-1-zhao1@linux.intel.com/

Thanks and Best Regards,
Zhao


In a previous community call, Zhao asked us how his work will scale
in the heterogeneous context.

My first idea is CPUs must belong to a cluster. For machines without
explicit cluster, we could always create the first one. Then -smp
would become a sugar property of the first cluster. Next -smp could
also be sugar property of the next cluster.

Regards,

Phil.

Re: [QEMU][PATCH v3 3/7] softmmu: let qemu_map_ram_ptr() use qemu_ram_ptr_length()

2024-03-06 Thread Vikram Garhwal

On Fri, Mar 01, 2024 at 05:04:54PM +, Alex Bennée wrote:
> Vikram Garhwal  writes:
> 
> > From: Juergen Gross 
> >
> > qemu_map_ram_ptr() and qemu_ram_ptr_length() share quite some code, so
> > modify qemu_ram_ptr_length() a little bit and use it for
> > qemu_map_ram_ptr(), too.
> >
> > Signed-off-by: Juergen Gross 
> > Signed-off-by: Vikram Garhwal 
> > Reviewed-by: Stefano Stabellini 
> > ---
> >  system/physmem.c | 56 
> >  1 file changed, 23 insertions(+), 33 deletions(-)
> >
> 
> > -
> > -/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
> > - * but takes a size argument.
> > +/*
> > + * Return a host pointer to guest's ram.
> >   *
> >   * Called within RCU critical section.
> >   */
> 
> If you end up re-spinning it would be nice to kdoc this function and at
> least call out size as a return by ref and optional. 
Will do if re-spinning is needed.
> 
> >  static void *qemu_ram_ptr_length(RAMBlock *block, ram_addr_t addr,
> >   hwaddr *size, bool lock)
> >  {
> > -if (*size == 0) {
> > +hwaddr len = 0;
> > +
> > +if (size && *size == 0) {
> >  return NULL;
> >  }
> >  
> > @@ -2207,7 +2181,10 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
> > ram_addr_t addr,
> >  block = qemu_get_ram_block(addr);
> >  addr -= block->offset;
> >  }
> > -*size = MIN(*size, block->max_length - addr);
> > +if (size) {
> > +*size = MIN(*size, block->max_length - addr);
> > +len = *size;
> > +}
> >  
> >  if (xen_enabled() && block->host == NULL) {
> >  /* We need to check if the requested address is in the RAM
> > @@ -2215,7 +2192,7 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
> > ram_addr_t addr,
> >   * In that case just map the requested area.
> >   */
> >  if (block->offset == 0) {
> > -return xen_map_cache(addr, *size, lock, lock);
> > +return xen_map_cache(addr, len, lock, lock);
> 
> I did wonder if len == 0 will confuse things but it seems xen_map_cache
> will default to XC_PAGE_SIZE in that case.
> 
> Anyway:
> 
> Reviewed-by: Alex Bennée 
> 
> -- 
> Alex Bennée
> Virtualisation Tech Lead @ Linaro

Re: [QEMU][PATCH v3 7/7] hw: arm: Add grant mapping.

2024-03-06 Thread Vikram Garhwal

Hi Alex,
On Fri, Mar 01, 2024 at 05:10:28PM +, Alex Bennée wrote:
> Vikram Garhwal  writes:
> 
> > Enable grant ram mapping support for Xenpvh machine on ARM.
> >
> > Signed-off-by: Vikram Garhwal 
> > Reviewed-by: Stefano Stabellini 
> > ---
> >  hw/arm/xen_arm.c | 3 +++
> >  1 file changed, 3 insertions(+)
> >
> > diff --git a/hw/arm/xen_arm.c b/hw/arm/xen_arm.c
> > index 32776d94df..b5993ef2a6 100644
> > --- a/hw/arm/xen_arm.c
> > +++ b/hw/arm/xen_arm.c
> > @@ -125,6 +125,9 @@ static void xen_init_ram(MachineState *machine)
> >   GUEST_RAM1_BASE, ram_size[1]);
> >  memory_region_add_subregion(sysmem, GUEST_RAM1_BASE, _hi);
> >  }
> > +
> > +DPRINTF("init grant ram mapping for XEN\n");
> 
> I don't think we need the DPRINTF here (there others where recently
> converted to trace-points although I suspect a memory_region tracepoint
> would be a better place to capture this).
May be drop the print? As it's not providing much information anyways.
> 
> > +ram_grants = *xen_init_grant_ram();
> >  }
> >  
> >  void arch_handle_ioreq(XenIOState *state, ioreq_t *req)
> 
> -- 
> Alex Bennée
> Virtualisation Tech Lead @ Linaro

Re: [PATCH 04/14] hw/core/machine-smp: Calculate total CPUs once in machine_parse_smp_config()

2024-03-06 Thread Philippe Mathieu-Daudé


On 6/3/24 10:53, Zhao Liu wrote:

From: Zhao Liu 

In machine_parse_smp_config(), the number of total CPUs is calculated
by:

 drawers * books * sockets * dies * clusters * cores * threads

To avoid missing the future new topology level, use a local variable to
cache the calculation result so that total CPUs are only calculated
once.

Signed-off-by: Zhao Liu 
---
  hw/core/machine-smp.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v4 21/25] vfio: Reverse test on vfio_get_dirty_bitmap()

2024-03-06 Thread Philippe Mathieu-Daudé


On 6/3/24 14:34, Cédric Le Goater wrote:

It will simplify the changes coming after.

Signed-off-by: Cédric Le Goater 
---
  hw/vfio/common.c | 22 +-
  1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
5b2e6a179cdd5f8ca5be84b7097661e96b391456..6820d2efe4923d5043da7eb8deecb6ff20e1fd16
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1241,16 +1241,20 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier 
*n, IOMMUTLBEntry *iotlb)
  }
  
  rcu_read_lock();

-if (vfio_get_xlat_addr(iotlb, NULL, _addr, NULL)) {
-ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
-translated_addr);
-if (ret) {
-error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova, iotlb->addr_mask + 1, ret,
- strerror(-ret));
-}
+if (!vfio_get_xlat_addr(iotlb, NULL, _addr, NULL)) {
+goto out_lock;
  }
+
+ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
+translated_addr);
+if (ret) {
+error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx") = %d (%s)",
+ bcontainer, iova, iotlb->addr_mask + 1, ret,
+ strerror(-ret));
+}
+
+out_lock:


Alternatively use WITH_RCU_READ_LOCK_GUARD() to avoid label.


  rcu_read_unlock();
  
  out:

Re: [PATCH v7 9/9] qemu-options.hx: Document the virtio-iommu-pci aw-bits option

2024-03-06 Thread Philippe Mathieu-Daudé


On 6/3/24 21:32, Eric Auger wrote:

Document the new aw-bits option.

Signed-off-by: Eric Auger 
Reviewed-by: Cédric Le Goater 

---

v4 -> v5
- tweek the aw-bits option description according to Cédric's
   suggestion
---
  qemu-options.hx | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 757df3eac0..87959ede08 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1180,6 +1180,9 @@ SRST
  This decides the default granule to be be exposed by the
  virtio-iommu. If host, the granule matches the host page size.
  
+``aw-bits=val`` (val between 32 and 64, default depends on machine)

+This decides the address width of IOVA address space. It defaults


"It defaults"?

IMO this should be squashed in patch 6/9 "virtio-iommu: Add an option
to define the input range width".


  ERST
  
  DEF("name", HAS_ARG, QEMU_OPTION_name,

Re: [PATCH v7 6/9] virtio-iommu: Add an option to define the input range width

2024-03-06 Thread Philippe Mathieu-Daudé


On 6/3/24 21:32, Eric Auger wrote:

aw-bits is a new option that allows to set the bit width of
the input address range. This value will be used as a default for
the device config input_range.end. By default it is set to 64 bits
which is the current value.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Cédric Le Goater 

---

v1 -> v2:
- Check the aw-bits value is within [32,64]
---
  include/hw/virtio/virtio-iommu.h | 1 +
  hw/virtio/virtio-iommu.c | 7 ++-
  2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 67ea5022af..83a52cc446 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -68,6 +68,7 @@ struct VirtIOIOMMU {
  Notifier machine_done;
  bool granule_frozen;
  GranuleMode granule_mode;
+uint8_t aw_bits;
  };
  
  #endif

diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index aab97e1527..9b2813188b 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1314,7 +1314,11 @@ static void virtio_iommu_device_realize(DeviceState 
*dev, Error **errp)
   * in vfio realize
   */
  s->config.bypass = s->boot_bypass;
-s->config.input_range.end = UINT64_MAX;
+if (s->aw_bits < 32 || s->aw_bits > 64) {
+error_setg(errp, "aw-bits must be within [32,64]");


Don't we need to return?


+}
+s->config.input_range.end =
+s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1;
  
  switch (s->granule_mode) {

  case GRANULE_MODE_4K:
@@ -1544,6 +1548,7 @@ static Property virtio_iommu_properties[] = {
  DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
  DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode,
   GRANULE_MODE_HOST),
+DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
  DEFINE_PROP_END_OF_LIST(),
  };

Re: [PATCH v7 4/9] qemu-options.hx: Document the virtio-iommu-pci granule option

2024-03-06 Thread Philippe Mathieu-Daudé


On 6/3/24 21:32, Eric Auger wrote:

We are missing an entry for the virtio-iommu-pci device. Add the
information on which machine it is currently supported and document
the new granule option.

Signed-off-by: Eric Auger 
---
  qemu-options.hx | 8 
  1 file changed, 8 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 9a47385c15..757df3eac0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1172,6 +1172,14 @@ SRST
  Please also refer to the wiki page for general scenarios of VT-d
  emulation in QEMU: https://wiki.qemu.org/Features/VT-d.
  
+``-device virtio-iommu-pci[,option=...]``

+This is only supported by ``-machine q35`` and ``-machine virt``.


Don't we need to precise x86 and arm here?


+It supports below options:
+
+``granule=val`` (possible values are 4k, 8k, 16k, 64k and host; default: 
host)
+This decides the default granule to be be exposed by the
+virtio-iommu. If host, the granule matches the host page size.
+
  ERST
  
  DEF("name", HAS_ARG, QEMU_OPTION_name,

Re: [PATCH v7 7/9] trans_rvv.c.inc: remove redundant mark_vs_dirty() calls

2024-03-06 Thread Philippe Mathieu-Daudé


On 6/3/24 18:19, Daniel Henrique Barboza wrote:

trans_vmv_v_i , trans_vfmv_v_f and the trans_##NAME macro from
GEN_VMV_WHOLE_TRANS() are calling mark_vs_dirty() in both branches of
their 'ifs'. conditionals.

Call it just once in the end like other functions are doing.

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
  target/riscv/insn_trans/trans_rvv.c.inc | 11 +++
  1 file changed, 3 insertions(+), 8 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

[PATCH v7 0/9] VIRTIO-IOMMU: Introduce aw-bits and granule options

2024-03-06 Thread Eric Auger

This is a respin of
[1] [PATCH v5 0/4] VIRTIO-IOMMU: Introduce an aw-bits option
(https://lore.kernel.org/all/20240215084315.863897-1-eric.au...@redhat.com/)

which now also integrates

[PATCH v6 0/3] VIRTIO-IOMMU: Set default granule to host page size
(https://lore.kernel.org/all/20240227165730.14099-1-eric.au...@redhat.com/)

The introduction of those 2 new options and their new default values
fix bugs when assigning VFIO devices protected by a virtio-iommu.

patches 1 - 4: intro of the granule property, collected reviews
- we used to set the default granule to 4k. This causes failures
  when hotplugging a VFIO device on a 64kB/64kB host/guest config:
  "vfio: DMA mapping failed, unable to continue". When the device
  is hotplugged the granule is already frozen to 4k wheras 64k is
  needed. This series introduces a new granule option which is set
  by default to the host page size.

patches 5 - 9: intro of the aw-bits property, needs further review
- we used to set the input address width to 64b. This causes
  failures with some assigned devices where the guest driver
  tries to use the full 64b input range whereas the physical IOMMU
  supports less bits (39/48 gaw for instance on VTD). New default
  usually match the host HW capability.

For more details please see the cover letter of [1] and [2].

This series can be found at:
https://github.com/eauger/qemu/tree/granule_aw_bits_v7

History:
v6 -> v7:
- Made property static in virt and pc_q35. Fix qtest 32 limit.

Eric Auger (9):
  qdev: Add a granule_mode property
  virtio-iommu: Add a granule property
  virtio-iommu: Change the default granule to the host page size
  qemu-options.hx: Document the virtio-iommu-pci granule option
  virtio-iommu: Trace domain range limits as unsigned int
  virtio-iommu: Add an option to define the input range width
  hw/i386/q35: Set virtio-iommu aw-bits default value to 39
  hw/arm/virt: Set virtio-iommu aw-bits default value to 48
  qemu-options.hx: Document the virtio-iommu-pci aw-bits option

 qapi/virtio.json| 18 +++
 include/hw/qdev-properties-system.h |  3 +++
 include/hw/virtio/virtio-iommu.h|  3 +++
 hw/arm/virt.c   | 17 ++
 hw/core/machine.c   |  6 -
 hw/core/qdev-properties-system.c| 14 
 hw/i386/pc_q35.c|  9 
 hw/virtio/virtio-iommu.c| 35 +
 tests/qtest/virtio-iommu-test.c |  2 +-
 hw/virtio/trace-events  |  2 +-
 qemu-options.hx | 11 +
 11 files changed, 113 insertions(+), 7 deletions(-)

-- 
2.41.0

[PATCH v7 9/9] qemu-options.hx: Document the virtio-iommu-pci aw-bits option

2024-03-06 Thread Eric Auger

Document the new aw-bits option.

Signed-off-by: Eric Auger 
Reviewed-by: Cédric Le Goater 

---

v4 -> v5
- tweek the aw-bits option description according to Cédric's
  suggestion
---
 qemu-options.hx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 757df3eac0..87959ede08 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1180,6 +1180,9 @@ SRST
 This decides the default granule to be be exposed by the
 virtio-iommu. If host, the granule matches the host page size.
 
+``aw-bits=val`` (val between 32 and 64, default depends on machine)
+This decides the address width of IOVA address space. It defaults
+
 ERST
 
 DEF("name", HAS_ARG, QEMU_OPTION_name,
-- 
2.41.0

[PATCH v7 3/9] virtio-iommu: Change the default granule to the host page size

2024-03-06 Thread Eric Auger

We used to set the default granule to 4KB but with VFIO assignment
it makes more sense to use the actual host page size.

Indeed when hotplugging a VFIO device protected by a virtio-iommu
on a 64kB/64kB host/guest config, we current get a qemu crash:

"vfio: DMA mapping failed, unable to continue"

This is due to the hot-attached VFIO device calling
memory_region_iommu_set_page_size_mask() with 64kB granule
whereas the virtio-iommu granule was already frozen to 4KB on
machine init done.

Set the granule property to "host" and introduce a new compat.
The page size mask used before 9.0 was qemu_target_page_mask().
Since the virtio-iommu currently only supports x86_64 and aarch64,
this matched a 4KB granule.

Note that the new default will prevent 4kB guest on 64kB host
because the granule will be set to 64kB which would be larger
than the guest page size. In that situation, the virtio-iommu
driver fails on viommu_domain_finalise() with
"granule 0x1 larger than system page size 0x1000".

In that case the workaround is to request 4K granule.

The current limitation of global granule in the virtio-iommu
should be removed and turned into per domain granule. But
until we get this upgraded, this new default is probably
better because I don't think anyone is currently interested in
running a 4KB page size guest with virtio-iommu on a 64KB host.
However supporting 64kB guest on 64kB host with virtio-iommu and
VFIO looks a more important feature.

Signed-off-by: Eric Auger 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Zhenzhong Duan 

---

v4 -> v5
- use low case, mandated by the jason qapi
---
 hw/core/machine.c| 5 -
 hw/virtio/virtio-iommu.c | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9ac5d5389a..6bd09d4592 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -30,9 +30,12 @@
 #include "exec/confidential-guest-support.h"
 #include "hw/virtio/virtio-pci.h"
 #include "hw/virtio/virtio-net.h"
+#include "hw/virtio/virtio-iommu.h"
 #include "audio/audio.h"
 
-GlobalProperty hw_compat_8_2[] = {};
+GlobalProperty hw_compat_8_2[] = {
+{ TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" },
+};
 const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
 
 GlobalProperty hw_compat_8_1[] = {
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 84d6819d3b..aab97e1527 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1543,7 +1543,7 @@ static Property virtio_iommu_properties[] = {
  TYPE_PCI_BUS, PCIBus *),
 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
 DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode,
- GRANULE_MODE_4K),
+ GRANULE_MODE_HOST),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.41.0

[PATCH v7 6/9] virtio-iommu: Add an option to define the input range width

2024-03-06 Thread Eric Auger

aw-bits is a new option that allows to set the bit width of
the input address range. This value will be used as a default for
the device config input_range.end. By default it is set to 64 bits
which is the current value.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Cédric Le Goater 

---

v1 -> v2:
- Check the aw-bits value is within [32,64]
---
 include/hw/virtio/virtio-iommu.h | 1 +
 hw/virtio/virtio-iommu.c | 7 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 67ea5022af..83a52cc446 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -68,6 +68,7 @@ struct VirtIOIOMMU {
 Notifier machine_done;
 bool granule_frozen;
 GranuleMode granule_mode;
+uint8_t aw_bits;
 };
 
 #endif
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index aab97e1527..9b2813188b 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1314,7 +1314,11 @@ static void virtio_iommu_device_realize(DeviceState 
*dev, Error **errp)
  * in vfio realize
  */
 s->config.bypass = s->boot_bypass;
-s->config.input_range.end = UINT64_MAX;
+if (s->aw_bits < 32 || s->aw_bits > 64) {
+error_setg(errp, "aw-bits must be within [32,64]");
+}
+s->config.input_range.end =
+s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1;
 
 switch (s->granule_mode) {
 case GRANULE_MODE_4K:
@@ -1544,6 +1548,7 @@ static Property virtio_iommu_properties[] = {
 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
 DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode,
  GRANULE_MODE_HOST),
+DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.41.0

[PATCH v7 7/9] hw/i386/q35: Set virtio-iommu aw-bits default value to 39

2024-03-06 Thread Eric Auger

Currently the default input range can extend to 64 bits. On x86,
when the virtio-iommu protects vfio devices, the physical iommu
may support only 39 bits. Let's set the default to 39, as done
for the intel-iommu.

We use hw_compat_8_2 to handle the compatibility for machines
before 9.0 which used to have a virtio-iommu default input range
of 64 bits.

Of course if aw-bits is set from the command line, the default
is overriden.

Signed-off-by: Eric Auger 

---

v6 -> v7:
- use static pc_q35_compat_defaults
- remove spurious header addition
- s/32/UINT32_MAX in the qtest

v5 -> v6:
- split pc/arm settings

v3 -> v4:
- update the qos test to relax the check on the max input IOVA

v2 -> v3:
- collected Zhenzhong's R-b
- use _abort instead of NULL error handle
  on object_property_get_uint() call (Cédric)
- use VTD_HOST_AW_39BIT (Cédric)

v1 -> v2:
- set aw-bits to 48b on ARM
- use hw_compat_8_2 to handle the compat for older machines
  which used 64b as a default
---
 hw/core/machine.c   | 1 +
 hw/i386/pc_q35.c| 9 +
 tests/qtest/virtio-iommu-test.c | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 6bd09d4592..4b89172d1c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -35,6 +35,7 @@
 
 GlobalProperty hw_compat_8_2[] = {
 { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" },
+{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
 };
 const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 45a4102e75..1e7464d39a 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -45,6 +45,7 @@
 #include "hw/i386/pc.h"
 #include "hw/i386/amd_iommu.h"
 #include "hw/i386/intel_iommu.h"
+#include "hw/virtio/virtio-iommu.h"
 #include "hw/display/ramfb.h"
 #include "hw/ide/pci.h"
 #include "hw/ide/ahci-pci.h"
@@ -63,6 +64,12 @@
 /* ICH9 AHCI has 6 ports */
 #define MAX_SATA_PORTS 6
 
+static GlobalProperty pc_q35_compat_defaults[] = {
+{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "39" },
+};
+static const size_t pc_q35_compat_defaults_len =
+G_N_ELEMENTS(pc_q35_compat_defaults);
+
 struct ehci_companions {
 const char *name;
 int func;
@@ -356,6 +363,8 @@ static void pc_q35_machine_options(MachineClass *m)
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
+compat_props_add(m->compat_props,
+ pc_q35_compat_defaults, pc_q35_compat_defaults_len);
 }
 
 static void pc_q35_9_0_machine_options(MachineClass *m)
diff --git a/tests/qtest/virtio-iommu-test.c b/tests/qtest/virtio-iommu-test.c
index 068e7a9e6c..afb225971d 100644
--- a/tests/qtest/virtio-iommu-test.c
+++ b/tests/qtest/virtio-iommu-test.c
@@ -34,7 +34,7 @@ static void pci_config(void *obj, void *data, QGuestAllocator 
*t_alloc)
 uint8_t bypass = qvirtio_config_readb(dev, 36);
 
 g_assert_cmpint(input_range_start, ==, 0);
-g_assert_cmphex(input_range_end, ==, UINT64_MAX);
+g_assert_cmphex(input_range_end, >=, UINT32_MAX);
 g_assert_cmpint(domain_range_start, ==, 0);
 g_assert_cmpint(domain_range_end, ==, UINT32_MAX);
 g_assert_cmpint(bypass, ==, 1);
-- 
2.41.0

[PATCH v7 4/9] qemu-options.hx: Document the virtio-iommu-pci granule option

2024-03-06 Thread Eric Auger

We are missing an entry for the virtio-iommu-pci device. Add the
information on which machine it is currently supported and document
the new granule option.

Signed-off-by: Eric Auger 
---
 qemu-options.hx | 8 
 1 file changed, 8 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 9a47385c15..757df3eac0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1172,6 +1172,14 @@ SRST
 Please also refer to the wiki page for general scenarios of VT-d
 emulation in QEMU: https://wiki.qemu.org/Features/VT-d.
 
+``-device virtio-iommu-pci[,option=...]``
+This is only supported by ``-machine q35`` and ``-machine virt``.
+It supports below options:
+
+``granule=val`` (possible values are 4k, 8k, 16k, 64k and host; default: 
host)
+This decides the default granule to be be exposed by the
+virtio-iommu. If host, the granule matches the host page size.
+
 ERST
 
 DEF("name", HAS_ARG, QEMU_OPTION_name,
-- 
2.41.0

[PATCH v7 2/9] virtio-iommu: Add a granule property

2024-03-06 Thread Eric Auger

This allows to choose which granule will be used by
default by the virtio-iommu. Current page size mask
default is qemu_target_page_mask so this translates
into a 4k granule on ARM and x86_64 where virtio-iommu
is supported.

Signed-off-by: Eric Auger 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Zhenzhong Duan 

---
v4 -> v5:
- use -(n * KiB) (Phild)

v3 -> v4:
- granule_mode introduction moved to that patch
---
 include/hw/virtio/virtio-iommu.h |  2 ++
 hw/virtio/virtio-iommu.c | 28 +---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 781ebaea8f..67ea5022af 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -24,6 +24,7 @@
 #include "hw/virtio/virtio.h"
 #include "hw/pci/pci.h"
 #include "qom/object.h"
+#include "qapi/qapi-types-virtio.h"
 
 #define TYPE_VIRTIO_IOMMU "virtio-iommu-device"
 #define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci"
@@ -66,6 +67,7 @@ struct VirtIOIOMMU {
 bool boot_bypass;
 Notifier machine_done;
 bool granule_frozen;
+GranuleMode granule_mode;
 };
 
 #endif
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 86623d55a5..84d6819d3b 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -29,6 +29,7 @@
 #include "sysemu/reset.h"
 #include "sysemu/sysemu.h"
 #include "qemu/reserved-region.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "trace.h"
@@ -1115,8 +1116,8 @@ static int 
virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
 }
 
 /*
- * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule,
- * for example 0xf000. When an assigned device has page size
+ * The default mask depends on the "granule" property. For example, with
+ * 4k granule, it is -(4 * KiB). When an assigned device has page size
  * restrictions due to the hardware IOMMU configuration, apply this restriction
  * to the mask.
  */
@@ -1313,8 +1314,27 @@ static void virtio_iommu_device_realize(DeviceState 
*dev, Error **errp)
  * in vfio realize
  */
 s->config.bypass = s->boot_bypass;
-s->config.page_size_mask = qemu_target_page_mask();
 s->config.input_range.end = UINT64_MAX;
+
+switch (s->granule_mode) {
+case GRANULE_MODE_4K:
+s->config.page_size_mask = -(4 * KiB);
+break;
+case GRANULE_MODE_8K:
+s->config.page_size_mask = -(8 * KiB);
+break;
+case GRANULE_MODE_16K:
+s->config.page_size_mask = -(16 * KiB);
+break;
+case GRANULE_MODE_64K:
+s->config.page_size_mask = -(64 * KiB);
+break;
+case GRANULE_MODE_HOST:
+s->config.page_size_mask = qemu_real_host_page_mask();
+break;
+default:
+error_setg(errp, "Unsupported granule mode");
+}
 s->config.domain_range.end = UINT32_MAX;
 s->config.probe_size = VIOMMU_PROBE_SIZE;
 
@@ -1522,6 +1542,8 @@ static Property virtio_iommu_properties[] = {
 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
  TYPE_PCI_BUS, PCIBus *),
 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
+DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode,
+ GRANULE_MODE_4K),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.41.0

[PATCH v7 1/9] qdev: Add a granule_mode property

2024-03-06 Thread Eric Auger

Introduce a new enum type property allowing to set an
IOMMU granule. Values are 4k, 8k, 16k, 64k and host.
This latter indicates the vIOMMU granule will match
the host page size.

A subsequent patch will add such a property to the
virtio-iommu device.

Signed-off-by: Eric Auger 
Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Zhenzhong Duan 

---
v5 -> v6
- remove #include "hw/virtio/virtio-iommu.h" (Zhenzhong)

v4 -> v5
- remove code that can be automatically generated
  and add the new enum in qapi/virtio.json (Philippe).
  Added Phild's SOB. low case needs to be used due to
  the Jason generation.

v3 -> v4:
- Add 8K
---
 qapi/virtio.json| 18 ++
 include/hw/qdev-properties-system.h |  3 +++
 hw/core/qdev-properties-system.c| 14 ++
 3 files changed, 35 insertions(+)

diff --git a/qapi/virtio.json b/qapi/virtio.json
index a79013fe89..95745fdfd7 100644
--- a/qapi/virtio.json
+++ b/qapi/virtio.json
@@ -957,3 +957,21 @@
 
 { 'struct': 'DummyVirtioForceArrays',
   'data': { 'unused-iothread-vq-mapping': ['IOThreadVirtQueueMapping'] } }
+
+##
+# @GranuleMode:
+#
+# @4k: granule page size of 4KiB
+#
+# @8k: granule page size of 8KiB
+#
+# @16k: granule page size of 16KiB
+#
+# @64k: granule page size of 64KiB
+#
+# @host: granule matches the host page size
+#
+# Since: 9.0
+##
+{ 'enum': 'GranuleMode',
+  'data': [ '4k', '8k', '16k', '64k', 'host' ] }
diff --git a/include/hw/qdev-properties-system.h 
b/include/hw/qdev-properties-system.h
index 06c359c190..626be87dd3 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -8,6 +8,7 @@ extern const PropertyInfo qdev_prop_macaddr;
 extern const PropertyInfo qdev_prop_reserved_region;
 extern const PropertyInfo qdev_prop_multifd_compression;
 extern const PropertyInfo qdev_prop_mig_mode;
+extern const PropertyInfo qdev_prop_granule_mode;
 extern const PropertyInfo qdev_prop_losttickpolicy;
 extern const PropertyInfo qdev_prop_blockdev_on_error;
 extern const PropertyInfo qdev_prop_bios_chs_trans;
@@ -47,6 +48,8 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
 #define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \
MigMode)
+#define DEFINE_PROP_GRANULE_MODE(_n, _s, _f, _d) \
+DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_granule_mode, GranuleMode)
 #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
 DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
 LostTickPolicy)
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 1a396521d5..b45e90edb2 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -679,6 +679,20 @@ const PropertyInfo qdev_prop_mig_mode = {
 .set_default_value = qdev_propinfo_set_default_value_enum,
 };
 
+/* --- GranuleMode --- */
+
+QEMU_BUILD_BUG_ON(sizeof(GranuleMode) != sizeof(int));
+
+const PropertyInfo qdev_prop_granule_mode = {
+.name = "GranuleMode",
+.description = "granule_mode values, "
+   "4k, 8k, 16k, 64k, host",
+.enum_table = _lookup,
+.get = qdev_propinfo_get_enum,
+.set = qdev_propinfo_set_enum,
+.set_default_value = qdev_propinfo_set_default_value_enum,
+};
+
 /* --- Reserved Region --- */
 
 /*
-- 
2.41.0

[PATCH v7 5/9] virtio-iommu: Trace domain range limits as unsigned int

2024-03-06 Thread Eric Auger

Use %u format to trace domain_range limits.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Cédric Le Goater 
---
 hw/virtio/trace-events | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 77905d1994..2350849fbd 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -111,7 +111,7 @@ virtio_iommu_device_reset(void) "reset!"
 virtio_iommu_system_reset(void) "system reset!"
 virtio_iommu_get_features(uint64_t features) "device supports 
features=0x%"PRIx64
 virtio_iommu_device_status(uint8_t status) "driver status = %d"
-virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, 
uint32_t domain_start, uint32_t domain_end, uint32_t probe_size, uint8_t 
bypass) "page_size_mask=0x%"PRIx64" input range start=0x%"PRIx64" input range 
end=0x%"PRIx64" domain range start=%d domain range end=%d probe_size=0x%x 
bypass=0x%x"
+virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, 
uint32_t domain_start, uint32_t domain_end, uint32_t probe_size, uint8_t 
bypass) "page_size_mask=0x%"PRIx64" input range start=0x%"PRIx64" input range 
end=0x%"PRIx64" domain range start=%u domain range end=%u probe_size=0x%x 
bypass=0x%x"
 virtio_iommu_set_config(uint8_t bypass) "bypass=0x%x"
 virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
 virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
-- 
2.41.0

[PATCH v7 8/9] hw/arm/virt: Set virtio-iommu aw-bits default value to 48

2024-03-06 Thread Eric Auger

On ARM we set 48b as a default (matching SMMUv3 SMMU_IDR5.VAX == 0).

hw_compat_8_2 is used to handle the compatibility for machine types
before 9.0 (default was 64 bits).

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 

---

v6 -> v7
turn arm_virt_compat and arm_virt_compat_len static
---
 hw/arm/virt.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 0af1943697..e5cd935232 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -85,11 +85,28 @@
 #include "hw/char/pl011.h"
 #include "qemu/guest-random.h"
 
+static GlobalProperty arm_virt_compat[] = {
+{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" },
+};
+static const size_t arm_virt_compat_len = G_N_ELEMENTS(arm_virt_compat);
+
+/*
+ * This cannot be called from the virt_machine_class_init() because
+ * TYPE_VIRT_MACHINE is abstract and mc->compat_props g_ptr_array_new()
+ * only is called on virt non abstract class init.
+ */
+static void arm_virt_compat_set(MachineClass *mc)
+{
+compat_props_add(mc->compat_props, arm_virt_compat,
+ arm_virt_compat_len);
+}
+
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
 static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
 void *data) \
 { \
 MachineClass *mc = MACHINE_CLASS(oc); \
+arm_virt_compat_set(mc); \
 virt_machine_##major##_##minor##_options(mc); \
 mc->desc = "QEMU " # major "." # minor " ARM Virtual Machine"; \
 if (latest) { \
-- 
2.41.0

Re: no target for a link in the post "How to access libvirt domains in KubeVirt"

2024-03-06 Thread Stefan Hajnoczi

On Wed, 6 Mar 2024 at 15:09,  wrote:
> BTW thank you for your meaningful posts.

Thanks for the kind words!

Stefan

Re: no target for a link in the post "How to access libvirt domains in KubeVirt"

2024-03-06 Thread Stefan Hajnoczi

On Wed, 6 Mar 2024 at 14:52,  wrote:
>
> Hello, thank you for the post.
>
> The href in the link
> https://kubevirt.io/user-guide/debug_virt_stack/;>Virtualization 
> Debugging
>
> should be replaced by
> https://kubevirt.io/user-guide/debug_virt_stack/logging/

Hi Alexei,
Thanks for pointing out the broken link! The blog post has been updated.

Stefan

Re: [PATCH v2 1/2] hw/arm/virt-acpi-build.c: Migrate SPCR creation to common location

2024-03-06 Thread Daniel Henrique Barboza





On 3/6/24 15:57, Daniel Henrique Barboza wrote:

As a side note, it seems like 'bios-table-test' isn't being run for RISC-V. Not 
sure if this
is intentional or a foresight.


s/foresight/hindsight

There's no 'make check' for what we want to say in the ML but hopefully there's
a way to enable 'bios-table-test' for RISC-V :D


Daniel

Re: [PATCH v3 4/7] Add migrate_set_ports into migrate_qmp to change migration port number

2024-03-06 Thread Het Gala




On 06/03/24 9:31 pm, Fabiano Rosas wrote:

Het Gala  writes:


On 06/03/24 8:06 pm, Fabiano Rosas wrote:

Het Gala  writes:


Add a migrate_set_ports() function that from each QDict, fills in
the port in case it was 0 in the test.
Handle a list of channels so we can add a negative test that
passes more than one channel.

Signed-off-by: Het Gala
Suggested-by: Fabiano Rosas
---
   tests/qtest/migration-helpers.c | 26 ++
   1 file changed, 26 insertions(+)

diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index 478c1f259b..df4978bf17 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -17,6 +17,8 @@
   #include "qapi/qapi-visit-sockets.h"
   #include "qapi/qobject-input-visitor.h"
   #include "qapi/error.h"
+#include "qapi/qmp/qlist.h"
+

Extra line here. This is unwanted because it sometimes trips git into
thinking there's a conflict here when another patch changes the
surrounding lines.

Ack, that makes sense
   
   #include "migration-helpers.h"
   
@@ -73,6 +75,29 @@ migrate_get_socket_address(QTestState *who, const char *parameter)

   return result;
   }
   
+static void migrate_set_ports(QTestState *to, QList *channelList)

+{
+g_autofree char *addr = NULL;
+g_autofree char *addr_port = NULL;
+QListEntry *entry;
+
+addr = migrate_get_socket_address(to, "socket-address");
+addr_port = g_strsplit(addr, ":", 3)[2];

Will this always do the right thing when the src/dst use different types
of channels? If there is some kind of mismatch (say one side uses vsock
and the other inet), it's better that this function doesn't touch the
channels dict instead of putting garbage in the port field.

Yes you are right. This will fail if there is a mismatch in type of
channels.

Better idea would be to check if 'port' key is present in both, i.e. in
'addr'
as well as 'addrdict' and only then change the port ?


Yep, either parse the type from string or add a version of
migrate_get_socket_address that returns a dict. Then check if type
matches and port exists.


Also what happens if the dst is using unix: or fd:?

yes in that case - how should the migration behaviour be ? src and dst
should be of the same type right

Remember this is test code. If the test was written incorrectly either
by developer mistake or on purpose to test some condition, then it's not
this function's reponsibility to fix that.

Replace the port only if the transport type allows for a port, there is
a port in both addr and addrdict and port is 0. Anything else, leave the
channelList untouched and let QEMU deal with the bad input.


+
+QLIST_FOREACH_ENTRY(channelList, entry) {
+QDict *channel = qobject_to(QDict, qlist_entry_obj(entry));
+QObject *addr_obj = qdict_get(channel, "addr");
+
+if (qobject_type(addr_obj) == QTYPE_QDICT) {
+QDict *addrdict = qobject_to(QDict, addr_obj);

You might not need these two lines if at the start you use:

QDict *addr = qdict_get_dict(channel, "addr");

If you are commenting regarding this two lines:

+if (qobject_type(addr_obj) == QTYPE_QDICT) {
+QDict *addrdict = qobject_to(QDict, addr_obj);

then, I am not sure, because addrdict and addr is different right? Also 
addrdict can also
be a QList, like in the case of exec migration, it can be a list instead of dict
ex:
# -> { "execute": "migrate",
#  "arguments": {
#  "channels": [ { "channel-type": "main",
#  "addr": { "transport": "exec",
#"args": [ "/bin/nc", "-p", "6000",
#  "/some/sock" ] } } ] } }

"addr" is always a dict, no? Even in the example you just gave.


Sorry, my apologies. I had args <-> addrdict in back of my mind.
You are correct. Will make the changes in next patchset.


+if (qdict_haskey(addrdict, "port") &&
+(strcmp(qdict_get_str(addrdict, "port"), "0") == 0)) {
+qdict_put_str(addrdict, "port", addr_port);
+}
+}
+}
+}
+
   bool migrate_watch_for_events(QTestState *who, const char *name,
 QDict *event, void *opaque)
   {
@@ -143,6 +168,7 @@ void migrate_qmp(QTestState *who, QTestState *to, const 
char *uri,
   if (!uri) {
   connect_uri = migrate_get_socket_address(to, "socket-address");
   }
+migrate_set_ports(to, NULL);

migrate_set_ports is not prepared to take NULL. This breaks the tests in
this commit. All individual commits should work, otherwise it will break
bisecting.

Okay, so in that case, is it better to merge this with the next patch ?
because if I just
define the migrate_set_ports function and not use it anywhere, it gives
a warning/error
(depending upon what compiler is used)

You can return early at migrate_set_ports if channelList is NULL.

Also, I just noticed: s/channelList/channel_list/

Ack. Thanks


Regards,

Het Gala

Re: [PATCH v5 06/13] hw/mem/cxl_type3: Add host backend and address space handling for DC regions

2024-03-06 Thread fan

On Wed, Mar 06, 2024 at 04:28:16PM +, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:01 -0800
> nifan@gmail.com wrote:
> 
> > From: Fan Ni 
> > 
> > Add (file/memory backed) host backend, all the dynamic capacity regions
> > will share a single, large enough host backend. Set up address space for
> > DC regions to support read/write operations to dynamic capacity for DCD.
> > 
> > With the change, following supports are added:
> > 1. Add a new property to type3 device "volatile-dc-memdev" to point to host
> >memory backend for dynamic capacity. Currently, all dc regions share one
> >host backend.
> > 2. Add namespace for dynamic capacity for read/write support;
> > 3. Create cdat entries for each dynamic capacity region;
> > 4. Fix dvsec range registers to include DC regions.
> > 
> > Signed-off-by: Fan Ni 
> Hi Fan, 
> 
> This one has a few more significant comments inline.
> 
> thanks,
> 
> Jonathan
> 
> > ---
Hi Jonathan,

Thanks for the review. See below,

> 
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index c045fee32d..2b380a260b 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -45,7 +45,8 @@ enum {
> >  
> >  static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> >int dsmad_handle, uint64_t size,
> > -  bool is_pmem, uint64_t dpa_base)
> > +  bool is_pmem, bool is_dynamic,
> > +  uint64_t dpa_base)
> >  {
> >  g_autofree CDATDsmas *dsmas = NULL;
> >  g_autofree CDATDslbis *dslbis0 = NULL;
> 
> There is a fixlet going through for these as the autofree doesn't do anything.
> Will require a rebase.  I'll do it on my tree, but might not push that out 
> for a
> few days so this is just a heads up for anyone using these.
> 
> https://lore.kernel.org/qemu-devel/20240304104406.59855-1-th...@redhat.com/
> 
> It went in clean for me, so may not even be something anyone notices!
> 

OK. So I will not rebase for v6 until there is a break.

> > @@ -61,7 +62,8 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
> > **cdat_table,
> >  .length = sizeof(*dsmas),
> >  },
> >  .DSMADhandle = dsmad_handle,
> > -.flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
> > +.flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
> > + (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
> >  .DPA_base = dpa_base,
> >  .DPA_length = size,
> >  };
> > @@ -149,12 +151,13 @@ static int ct3_build_cdat_table(CDATSubHeader 
> > ***cdat_table, void *priv)
> >  g_autofree CDATSubHeader **table = NULL;
> >  
> >  
> > @@ -176,21 +179,55 @@ static int ct3_build_cdat_table(CDATSubHeader 
> > ***cdat_table, void *priv)
> >  pmr_size = memory_region_size(nonvolatile_mr);
> >  }
> >  
> > +if (ct3d->dc.num_regions) {
> > +if (ct3d->dc.host_dc) {
> > +dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +if (!dc_mr) {
> > +return -EINVAL;
> > +}
> > +len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
> > +} else {
> > +return -EINVAL;
> 
> Flip logic to get the error out the way first and reduce indent.
> 
>  if (ct3d->dc.num_regions) {
> if (!ct3d->dc.host_dc) {
> return -EINVAL;
> }
> dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> if (!dc_mr) {
> return -EINVAL;
> }
> len += CT3...
>  }
Will do.
> 
> > +}
> > +}
> > +
> 
> >  
> >  *cdat_table = g_steal_pointer();
> > @@ -300,11 +337,24 @@ static void build_dvsecs(CXLType3Dev *ct3d)
> >  range2_size_hi = ct3d->hostpmem->size >> 32;
> >  range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> >   (ct3d->hostpmem->size & 0xF000);
> > +} else if (ct3d->dc.host_dc) {
> > +range2_size_hi = ct3d->dc.host_dc->size >> 32;
> > +range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> > + (ct3d->dc.host_dc->size & 0xF000);
> >  }
> > -} else {
> > +} else if (ct3d->hostpmem) {
> >  range1_size_hi = ct3d->hostpmem->size >> 32;
> >  range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> >   (ct3d->hostpmem->size & 0xF000);
> > +if (ct3d->dc.host_dc) {
> > +range2_size_hi = ct3d->dc.host_dc->size >> 32;
> > +range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> > + (ct3d->dc.host_dc->size & 0xF000);
> > +}
> > +} else {
> > +range1_size_hi = ct3d->dc.host_dc->size >> 32;
> > +range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> > + (ct3d->dc.host_dc->size & 0xF000);
> 
> I've forgotten if we ever closed out on

Re: [RFC 1/2] hw/riscv: Add server platform reference machine

2024-03-06 Thread Atish Kumar Patra

On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei  wrote:
>
> On 3/6/2024 8:19 AM, Alistair Francis wrote:
> > On Mon, Mar 4, 2024 at 8:28 PM Fei Wu  wrote:
> >>
> >> The RISC-V Server Platform specification[1] defines a standardized set
> >> of hardware and software capabilities, that portable system software,
> >> such as OS and hypervisors can rely on being present in a RISC-V server
> >> platform.
> >>
> >> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
> >> short) machine type is added to provide a environment for firmware/OS
> >> development and testing. The main features included in rvsp-ref are:
> >>
> >>  - Based on riscv virt machine type
> >>  - A new memory map as close as virt machine as possible
> >>  - A new virt CPU type rvsp-ref-cpu for server platform compliance
> >>  - AIA
> >>  - PCIe AHCI
> >>  - PCIe NIC
> >>  - No virtio device
> >>  - No fw_cfg device
> >>  - No ACPI table provided
> >>  - Only minimal device tree nodes
> >>
> >> [1] https://github.com/riscv-non-isa/riscv-server-platform
> >
> > + Atish
> >
> >>
> >> Signed-off-by: Fei Wu 
> >> ---
> >>  configs/devices/riscv64-softmmu/default.mak |1 +
> >>  hw/riscv/Kconfig|   13 +
> >>  hw/riscv/meson.build|1 +
> >>  hw/riscv/server_platform_ref.c  | 1244 +++
> >>  4 files changed, 1259 insertions(+)
> >>  create mode 100644 hw/riscv/server_platform_ref.c
> >>
> >> diff --git a/configs/devices/riscv64-softmmu/default.mak 
> >> b/configs/devices/riscv64-softmmu/default.mak
> >> index 3f68059448..a1d98e49ef 100644
> >> --- a/configs/devices/riscv64-softmmu/default.mak
> >> +++ b/configs/devices/riscv64-softmmu/default.mak
> >> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
> >>  CONFIG_SIFIVE_E=y
> >>  CONFIG_SIFIVE_U=y
> >>  CONFIG_RISCV_VIRT=y
> >> +CONFIG_SERVER_PLATFORM_REF=y
> >>  CONFIG_MICROCHIP_PFSOC=y
> >>  CONFIG_SHAKTI_C=y
> >> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
> >> index 5d644eb7b1..debac5a7f5 100644
> >> --- a/hw/riscv/Kconfig
> >> +++ b/hw/riscv/Kconfig
> >> @@ -48,6 +48,19 @@ config RISCV_VIRT
> >>  select ACPI
> >>  select ACPI_PCI
> >>
> >> +config SERVER_PLATFORM_REF
> >> +bool
> >> +select RISCV_NUMA
> >> +select GOLDFISH_RTC
> >> +select PCI
> >> +select PCI_EXPRESS_GENERIC_BRIDGE
> >> +select PFLASH_CFI01
> >> +select SERIAL
> >> +select RISCV_ACLINT
> >> +select RISCV_APLIC
> >> +select RISCV_IMSIC
> >> +select SIFIVE_TEST
> >
> > Do we really need SiFive Test in the server platform?
> >
> It's used to reset the system, is there any better choice?
>
> Probably I can remove the "sifive,test1 sifive,test0" from the
> compatible list in fdt, and only keep "syscon", I see opensbi has
> already removed that support in commit c2e602707.
>
> > Same with the goldfish RTC?
> >
> Although the spec doesn't make RTC mandatory, it should be a common
> practice having a RTC on server, so I add a RTC here no matter it's
> goldfish or not.
>

The platform spec says
HPER_070 : A battery-backed RTC or analogous timekeeping mechanism
MUST be implemented.

Can we consider goldfish RTC in this category ?

But I want to discuss a larger point as the server platform/SoC spec
defines a bunch of optional requirement.
Does this platform intend to be a platform that is a superset of all
those options or allow optionality in
the platform as well ?

> >> +
> >>  config SHAKTI_C
> >>  bool
> >>  select RISCV_ACLINT
> >> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
> >> index 2f7ee81be3..bb3aff91ea 100644
> >> --- a/hw/riscv/meson.build
> >> +++ b/hw/riscv/meson.build
> >> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: 
> >> files('numa.c'))
> >>  riscv_ss.add(files('riscv_hart.c'))
> >>  riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
> >>  riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
> >> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: 
> >> files('server_platform_ref.c'))
> >>  riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
> >>  riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
> >>  riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
> >> diff --git a/hw/riscv/server_platform_ref.c 
> >> b/hw/riscv/server_platform_ref.c
> >> new file mode 100644
> >> index 00..ae90c4b27a
> >> --- /dev/null
> >> +++ b/hw/riscv/server_platform_ref.c
> >> @@ -0,0 +1,1244 @@
> >> +/*
> >> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board
> >
> > Platform
> >
> OK.
>
> >> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
> >> +  DeviceState *irqchip,
> >> +  RVSPMachineState *s)
> >> +{
> >> +DeviceState *dev;
> >> +PCIHostState *pci;
> >> +PCIDevice *pdev_ahci;
> >> +AHCIPCIState *ich9;
> >> +DriveInfo *hd[NUM_SATA_PORTS];
> >> +

Re: Enabling internal errors for VH CXL devices: [was: Re: Questions about CXL RAS injection test in qemu]

2024-03-06 Thread Terry Bowman

HI Yuquan,

For your test, the first logging will come from the AER driver if 
everything is working correctly.

You may want to check if the upstream pci bridge's AER UIE/CIE 
masks are set. This could prevent the error from handled by the OS's
aer driver.

Regards,
Terry

On 3/6/24 11:12, Terry Bowman wrote:
> Hi Yuquan an Jon,
> 
> I added responses inline below.
> 
> On 3/6/24 07:23, Jonathan Cameron wrote:
>> On Wed, 6 Mar 2024 19:27:07 +0800
>> Yuquan Wang  wrote:
>>
>>> Hello, Jonathan
>>>
>>> Recently I met some problems on CXL RAS tests. 
>>>
>>> I tried to use "cxl-inject-uncorrectable-errors" and 
>>> "cxl-inject-correctable-error"
>>> qmp to inject CXL errors, however, there was no any kernel printing 
>>> information in 
>>> my qemu machine. And the qmp connection was unstable that made the machine 
>>> always "terminating on signal 2".
>>
>> The qmp connection being unstable is odd - might be related to the CXL code, 
>> but
>> I'm not sure how..
>>
>>>
>>> In addition, I successfully used the hmp "pcie_aer_inject_error" in the 
>>> same conditions.
>>> The kernel showed relevant print information.
>>
>> IIRC the AER paths print under all circumstances whereas CXL errors do not, 
>> they simply
>> trigger tracepoints - but you should have seen device resets.
>>
>> However I span up a test and I think the issue is more straight forward.
>> The uncorrectable internal error and correctable internal errors are masked 
>> on the device.
>> I thought we changed the default on this in linux but maybe not :(
>>
> 
> Device AER UIE/CIE mask can be set and still expect to handle device AER 
> errors. The device reports 
> AER UIE/CIE to the root port/RCEC on behalf of device AER CRC, TLP, etc 
> errors. 
> 
> In earlier changes we added logic to clear the RCEC UIE/CIE mask inorder to 
> properly receive 
> AER UIE/CI notifications from devices and RCH dports.
> 
> "CXL Protocol and Link errors detected by components that are part of a CXL 
> VH are
> escalated and reported using standard PCIe error reporting mechanisms over 
> CXL.io as
> UIEs and/or CIEs. See PCIe Base Specification for details."[1]
> 
> [1] CXL3.1 12.2.1 - Protocol and Link Layer Error Reporting
> 
>> Hack is fine the relevant device with lspci -tv and then use
>> setpci -s 0d:00.0 0x208.l=0
>> to clear all the mask bits for uncorrectable errors.
>>
>> Note I tested this on a convenient arm64 setup so always possible there is 
>> yet
>> another problem on x86.
>>
>> Robert / Terry, I tracked down the patch where you enabled this for RCHs and 
>> there was
>> some discussion on walking out on VH as well to enable this, but seems it
>> never happened. Can you remember why?  Just kicked back for a future 
>> occasion?
>>
>> Jonathan
>>
>>
> 
> I tested (qemu x86) using the aer-inject tool and found it to work. Below 
> shows the 
> endpoint CIE is masked (0xe000 @ AER+0x14) and the injected error is properly 
> handled
> with root port logging and cxl_pci handler trace logs.
> 
>  # lspci | grep -i cxl
>  
> 0d:00.0 CXL: Intel Corporation Device 0d93 (rev 01)   
>   
>   
>   
>   
>   
> # lspci -s 0d:00.0 -vvv | grep Advanced   
>   
>   
> Capabilities: [200 v2] Advanced Error Reporting   
>   
>   
>   
>   
>   
> # setpci -s 0d:00.0 0x208.l   
>   
>   
> 0240  
>   
>   
>   
>   
>   
> # setpci -s 0d:00.0 0x214.l   
>   
>   
> e000  
>   
>   
>   
>   
>

Re: [PATCH v2 1/2] hw/arm/virt-acpi-build.c: Migrate SPCR creation to common location

2024-03-06 Thread Daniel Henrique Barboza


Hi,

This patch break check-qtest, most specifically 'bios-table'test', for aarch64.
I found this while running riscv-to-apply.next in the Gitlab pipeline.


Here's the output:

$ make -j && QTEST_QEMU_BINARY=./qemu-system-aarch64 V=1 
./tests/qtest/bios-tables-test
TAP version 13
# random seed: R02Sf0f2fa0a3fac5d540b1681c820621b7d
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-591353.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-591353.qmp,id=char0 -mon 
chardev=char0,mode=control -display none -audio none -machine none -accel qtest
1..8
# Start of aarch64 tests
# Start of acpi tests
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-591353.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-591353.qmp,id=char0 -mon 
chardev=char0,mode=control -display none -audio none -machine virt  -accel tcg 
-nodefaults -nographic -drive 
if=pflash,format=raw,file=pc-bios/edk2-aarch64-code.fd,readonly=on -drive 
if=pflash,format=raw,file=pc-bios/edk2-arm-vars.fd,snapshot=on -cdrom 
tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2 -cpu cortex-a57 
-smbios type=4,max-speed=2900,current-speed=2700 -accel qtest
acpi-test: Warning! SPCR binary file mismatch. Actual [aml:/tmp/aml-9G53J2], 
Expected [aml:tests/data/acpi/virt/SPCR].
See source file tests/qtest/bios-tables-test.c for instructions on how to 
update expected files.
acpi-test: Warning! SPCR mismatch. Actual [asl:/tmp/asl-SR53J2.dsl, 
aml:/tmp/aml-9G53J2], Expected [asl:/tmp/asl-4Z33J2.dsl, 
aml:tests/data/acpi/virt/SPCR].

The diff is here:

--- /tmp/asl-4Z33J2.dsl 2024-03-06 15:40:24.879879348 -0300
+++ /tmp/asl-SR53J2.dsl 2024-03-06 15:40:24.877879347 -0300
@@ -1,57 +1,49 @@
 /*
  * Intel ACPI Component Architecture
  * AML/ASL+ Disassembler version 20220331 (64-bit version)
  * Copyright (c) 2000 - 2022 Intel Corporation

(...)

 [000h    4]Signature : "SPCR"[Serial Port Console 
Redirection Table]
-[004h 0004   4] Table Length : 0050
+[004h 0004   4] Table Length : 004F
 [008h 0008   1] Revision : 02
-[009h 0009   1] Checksum : B1
+[009h 0009   1] Checksum : B2
 [00Ah 0010   6]   Oem ID : "BOCHS "

(...)

-[042h 0066   2]PCI Vendor ID : 
+[042h 0066   2]PCI Vendor ID : 00FF


After inspecting the common helper and what the original ARM code was doing
I found out that we're missing something down there:


On 1/15/24 22:09, Sia Jee Heng wrote:

RISC-V should also generate the SPCR in a manner similar to ARM.
Therefore, instead of replicating the code, relocate this function
to the common AML build.

Signed-off-by: Sia Jee Heng 
---
  hw/acpi/aml-build.c | 51 
  hw/arm/virt-acpi-build.c| 68 +++--
  include/hw/acpi/acpi-defs.h | 33 ++
  include/hw/acpi/aml-build.h |  4 +++
  4 files changed, 115 insertions(+), 41 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index af66bde0f5..f3904650e4 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1994,6 +1994,57 @@ static void build_processor_hierarchy_node(GArray *tbl, 
uint32_t flags,
  }
  }
  
+void build_spcr(GArray *table_data, BIOSLinker *linker,

+const AcpiSpcrData *f, const uint8_t rev,
+const char *oem_id, const char *oem_table_id)
+{
+AcpiTable table = { .sig = "SPCR", .rev = rev, .oem_id = oem_id,
+.oem_table_id = oem_table_id };
+
+acpi_table_begin(, table_data);
+/* Interface type */
+build_append_int_noprefix(table_data, f->interface_type, 1);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 3);
+/* Base Address */
+build_append_gas(table_data, f->base_addr.id, f->base_addr.width,
+ f->base_addr.offset, f->base_addr.size,
+ f->base_addr.addr);
+/* Interrupt type */
+build_append_int_noprefix(table_data, f->interrupt_type, 1);
+/* IRQ */
+build_append_int_noprefix(table_data, f->pc_interrupt, 1);
+/* Global System Interrupt */
+build_append_int_noprefix(table_data, f->interrupt, 4);
+/* Baud Rate */
+build_append_int_noprefix(table_data, f->baud_rate, 1);
+/* Parity */
+build_append_int_noprefix(table_data, f->parity, 1);
+/* Stop Bits */
+build_append_int_noprefix(table_data, f->stop_bits, 1);
+/* Flow Control */
+build_append_int_noprefix(table_data, f->flow_control, 1);


Here. We're missing the "Language" entry.


This diff fixes the broken test:


$ git diff
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index f3904650e4..6d4517cfbe 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2024,6 +2024,8 @@ void build_spcr(GArray *table_data, BIOSLinker *linker,
 build_append_int_noprefix(table_data, f->stop_bits, 1);

Re: [PATCH v2 2/2] vhost: Perform memory section dirty scans once per iteration

2024-03-06 Thread Eugenio Perez Martin

On Wed, Feb 14, 2024 at 2:02 PM Si-Wei Liu  wrote:
>
> On setups with one or more virtio-net devices with vhost on,
> dirty tracking iteration increases cost the bigger the number
> amount of queues are set up e.g. on idle guests migration the
> following is observed with virtio-net with vhost=on:
>
> 48 queues -> 78.11%  [.] vhost_dev_sync_region.isra.13
> 8 queues -> 40.50%   [.] vhost_dev_sync_region.isra.13
> 1 queue -> 6.89% [.] vhost_dev_sync_region.isra.13
> 2 devices, 1 queue -> 18.60%  [.] vhost_dev_sync_region.isra.14
>

I think the after benchmark should also be included.

> With high memory rates the symptom is lack of convergence as soon
> as it has a vhost device with a sufficiently high number of queues,
> the sufficient number of vhost devices.
>
> On every migration iteration (every 100msecs) it will redundantly
> query the *shared log* the number of queues configured with vhost
> that exist in the guest. For the virtqueue data, this is necessary,
> but not for the memory sections which are the same. So
> essentially we end up scanning the dirty log too often.
>
> To fix that, select a vhost device responsible for scanning the
> log with regards to memory sections dirty tracking. It is selected
> when we enable the logger (during migration) and cleared when we
> disable the logger. If the vhost logger device goes away for some
> reason, the logger will be re-selected from the rest of vhost
> devices.
>
> Co-developed-by: Joao Martins 
> Signed-off-by: Joao Martins 
> Signed-off-by: Si-Wei Liu 
> ---
>  hw/virtio/vhost.c | 75 
> +++
>  include/hw/virtio/vhost.h |  1 +
>  2 files changed, 70 insertions(+), 6 deletions(-)
>
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index ef6d9b5..997d560 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -45,6 +45,9 @@
>
>  static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
>  static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
> +static struct vhost_dev *vhost_mem_logger[VHOST_BACKEND_TYPE_MAX];
> +static QLIST_HEAD(, vhost_dev) vhost_mlog_devices =
> +QLIST_HEAD_INITIALIZER(vhost_mlog_devices);
>
>  /* Memslots used by backends that support private memslots (without an fd). 
> */
>  static unsigned int used_memslots;
> @@ -149,6 +152,53 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev)
>  }
>  }
>
> +static bool vhost_log_dev_enabled(struct vhost_dev *dev)

"Enabled" sounds misleading to me. Maybe vhost_dev_should_log? More
suggestions below.

> +{
> +assert(dev->vhost_ops);
> +assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
> +assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
> +
> +return dev == vhost_mem_logger[dev->vhost_ops->backend_type];
> +}
> +
> +static void vhost_mlog_set_dev(struct vhost_dev *hdev, bool enable)
> +{
> +struct vhost_dev *logdev = NULL;
> +VhostBackendType backend_type;
> +bool reelect = false;
> +
> +assert(hdev->vhost_ops);
> +assert(hdev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
> +assert(hdev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
> +
> +backend_type = hdev->vhost_ops->backend_type;
> +
> +if (enable && !QLIST_IS_INSERTED(hdev, logdev_entry)) {
> +reelect = !vhost_mem_logger[backend_type];
> +QLIST_INSERT_HEAD(_mlog_devices, hdev, logdev_entry);
> +} else if (!enable && QLIST_IS_INSERTED(hdev, logdev_entry)) {
> +reelect = vhost_mem_logger[backend_type] == hdev;
> +QLIST_REMOVE(hdev, logdev_entry);
> +}
> +
> +if (!reelect)
> +return;
> +
> +QLIST_FOREACH(hdev, _mlog_devices, logdev_entry) {
> +if (!hdev->vhost_ops ||
> +hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_NONE ||
> +hdev->vhost_ops->backend_type >= VHOST_BACKEND_TYPE_MAX)

Aren't comparisons with ops->backend_type already contained in the
following "hdev->vhost_ops->backend_type == backend_type" ?

> +continue;
> +
> +if (hdev->vhost_ops->backend_type == backend_type) {
> +logdev = hdev;
> +break;
> +}

Why not use VHOST_BACKEND_TYPE_MAX QLISTs, and then simply check if
*dev is the head at vhost_log_dev_enabled?

That way we can remove this foreach and vhost_log_dev_enabled
entirely, as the check is simpler. I think it could even remove this
function entirely and inline QLIST_INSERT / QLIST_REMOVE at callers.
What do you think?

Thanks!

> +}
> +
> +vhost_mem_logger[backend_type] = logdev;
> +}
> +
>  static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
> MemoryRegionSection *section,
> hwaddr first,
> @@ -166,12 +216,14 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev 
> *dev,
>  start_addr = MAX(first, start_addr);
>  end_addr = MIN(last, end_addr);
>
> -for (i = 0; i < dev->mem->nregions; ++i) {
> -

Re: [PATCH v3 6/7] Add multifd_tcp_plain test using list of channels instead of uri

2024-03-06 Thread Het Gala



On 06/03/24 8:37 pm, Fabiano Rosas wrote:

Het Gala  writes:


Add a positive test to check multifd live migration but this time
using list of channels (restricted to 1) as the starting point
instead of simple uri string.

Signed-off-by: Het Gala
Suggested-by: Fabiano Rosas
---
  tests/qtest/migration-test.c | 29 ++---
  1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index f94fe713b2..05e5f3ebe5 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -659,6 +659,12 @@ typedef struct {
   */
  const char *connect_uri;
  
+/*

+ * Optional: the JSON formatted list of URIs for the src
+ * QEMU to connect to
+ */

You could add some words here mentioning that a port of '0' will be
automatically converted to the correct port that the destination is
using.

Ack, will add these while defining connect_channels.

+const char *connect_channels;
+
  /* Optional: callback to run at start to set migration parameters */
  TestMigrateStartHook start_hook;
  /* Optional: callback to run at finish to cleanup */
@@ -2623,7 +2629,7 @@ test_migrate_precopy_tcp_multifd_zstd_start(QTestState 
*from,
  }
  #endif /* CONFIG_ZSTD */
  
-static void test_multifd_tcp_none(void)

+static void test_multifd_tcp_uri_none(void)
  {
  MigrateCommon args = {
  .listen_uri = "defer", @@ -2638,6 +2644,21 @@ static void test_multifd_tcp_none(void) 
test_precopy_common(); } +static void 
test_multifd_tcp_channels_none(void) +{ + MigrateCommon args = { + 
.listen_uri = "defer",

+.start_hook = test_migrate_precopy_tcp_multifd_start,
+.live = true,
+.connect_channels = "[ { 'channel-type': 'main',"
+"'addr': { 'transport': 'socket',"
+"  'type': 'inet',"
+"  'host': '127.0.0.1',"
+"  'port': '0' } } ]",
+};
+test_precopy_common();
+}
+
  static void test_multifd_tcp_zlib(void)
  {
  MigrateCommon args = {
@@ -3531,8 +3552,10 @@ int main(int argc, char **argv)
 test_migrate_dirty_limit);
  }
  }
-migration_test_add("/migration/multifd/tcp/plain/none",
-   test_multifd_tcp_none);
+migration_test_add("/migration/multifd/tcp/uri/plain/none",
+   test_multifd_tcp_uri_none);
+migration_test_add("/migration/multifd/tcp/channels/plain/none",
+   test_multifd_tcp_channels_none);

We should eventually make a pass to standardize/simplify these
strings. We could have a little "grammar" defined on how to construct
them.

//

test-type  :: migrate | validate
migration-mode :: multifd | precopy | postcopy
transport  :: tcp | fd | unix | file
invocation :: uri | channels
compression:: zlib | zstd | none
encryption :: tls | plain

Anyway, work for the future.


Yes, completely agree with you. It makes it much easier for people to 
identify and define every test.


I can take this up as a separate patchset after this one gets merged maybe


Regards,

Het Gala

Re: [PATCH-for-9.1 18/18] hw/i386/pc: Replace PCMachineClass::acpi_data_size by PC_ACPI_DATA_SIZE

2024-03-06 Thread Thomas Huth


On 05/03/2024 14.42, Philippe Mathieu-Daudé wrote:

PCMachineClass::acpi_data_size was only used by the pc-i440fx-2.0
machine, which got removed. Since it is constant, replace the class
field by a definition.

Signed-off-by: Philippe Mathieu-Daudé 
---
  include/hw/i386/pc.h |  4 
  hw/i386/pc.c | 19 ---
  2 files changed, 12 insertions(+), 11 deletions(-)


Reviewed-by: Thomas Huth

Re: [PATCH-for-9.1 17/18] target/i386: Remove X86CPU::kvm_no_smi_migration field

2024-03-06 Thread Thomas Huth


On 05/03/2024 14.42, Philippe Mathieu-Daudé wrote:

X86CPU::kvm_no_smi_migration was only used by the
pc-i440fx-2.3 machine, which got removed. Remove it
and simplify kvm_put_vcpu_events().

Signed-off-by: Philippe Mathieu-Daudé 
---
  target/i386/cpu.h | 3 ---
  target/i386/cpu.c | 2 --
  target/i386/kvm/kvm.c | 6 --
  3 files changed, 11 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 952174bb6f..bdc640e844 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2018,9 +2018,6 @@ struct ArchCPU {
  /* if set, limit maximum value for phys_bits when host_phys_bits is true 
*/
  uint8_t host_phys_bits_limit;
  
-/* Stop SMI delivery for migration compatibility with old machines */

-bool kvm_no_smi_migration;
-
  /* Forcefully disable KVM PV features not exposed in guest CPUIDs */
  bool kvm_pv_enforce_cpuid;
  
diff --git a/target/i386/cpu.c b/target/i386/cpu.c

index 2666ef3808..0e3ad8db2b 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7905,8 +7905,6 @@ static Property x86_cpu_properties[] = {
  DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true),
  DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
  DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true),
-DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration,
- false),
  DEFINE_PROP_BOOL("kvm-pv-enforce-cpuid", X86CPU, kvm_pv_enforce_cpuid,
   false),
  DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true),
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 42970ab046..571cbbf1fc 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4344,12 +4344,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
  events.smi.pending = 0;
  events.smi.latched_init = 0;
  }
-/* Stop SMI delivery on old machine types to avoid a reboot
- * on an inward migration of an old VM.
- */
-if (!cpu->kvm_no_smi_migration) {
-events.flags |= KVM_VCPUEVENT_VALID_SMM;
-}


Shouldn't it be the other way round, i.e. that the flag is now always set?

pc_compat_2_3[] had:

{ TYPE_X86_CPU, "kvm-no-smi-migration", "on" },

... so I think kvm_no_smi_migration was set to true for the old machines?

 Thomas

Re: [PATCH-for-9.1 12/18] hw/i386/pc: Remove deprecated pc-i440fx-2.2 machine

2024-03-06 Thread Thomas Huth


On 05/03/2024 14.42, Philippe Mathieu-Daudé wrote:

The pc-i440fx-2.2 machine was deprecated for the 8.2
release (see commit c7437f0ddb "docs/about: Mark the
old pc-i440fx-2.0 - 2.3 machine types as deprecated"),
time to remove it.

Signed-off-by: Philippe Mathieu-Daudé 
---
  docs/about/deprecated.rst   |  6 +++---
  docs/about/removed-features.rst |  2 +-
  include/hw/i386/pc.h|  3 ---
  hw/i386/pc.c| 23 ---
  hw/i386/pc_piix.c   | 21 -
  5 files changed, 4 insertions(+), 51 deletions(-)


Reviewed-by: Thomas Huth

Re: [PATCH v5 12/13] hw/mem/cxl_type3: Allow to release partial extent and extent superset in QMP interface

2024-03-06 Thread Jonathan Cameron via

On Mon,  4 Mar 2024 11:34:07 -0800
nifan@gmail.com wrote:

> From: Fan Ni 
> 
> Before the change, the QMP interface used for add/release DC extents
> only allows to release extents that exist in either pending-to-add list
> or accepted list in the device, which means the DPA range of the extent must
> match exactly that of an extent in either list. Otherwise, the release
> request will be ignored.
> 
> With the change, we relax the constraints. As long as the DPA range of the
> extent to release is covered by extents in one of the two lists
> mentioned above, we allow the release.
> 
> Signed-off-by: Fan Ni 
Run out of time today,  so just took a very quick look at this.

Seemed fine but similar comments on exit conditions and retry gotos as
earlier patches.

> +/*
> + * Remove all extents whose DPA range has overlaps with  the DPA range
> + * [dpa, dpa + len) from the list, and delete the overlapped portion.
> + * Note:
> + * 1. If the removed extents is fully within the DPA range, delete the 
> extent;
> + * 2. Otherwise, keep the portion that does not overlap, insert new extents 
> to
> + * the list if needed for the un-coverlapped part.
> + */
> +static void cxl_delist_extent_by_dpa_range(CXLDCExtentList *list,
> +   uint64_t dpa, uint64_t len)
> +{
> +CXLDCExtent *ent;
>  
> -return NULL;
> +process_leftover:

As before can we turn this into a while loop so the exit conditions are 
more obvious?  Based on len I think.


> +QTAILQ_FOREACH(ent, list, node) {
> +if (ent->start_dpa <= dpa && dpa < ent->start_dpa + ent->len) {
> +uint64_t ent_start_dpa = ent->start_dpa;
> +uint64_t ent_len = ent->len;
> +uint64_t len1 = dpa - ent_start_dpa;
> +
> +cxl_remove_extent_from_extent_list(list, ent);
> +if (len1) {
> +cxl_insert_extent_to_extent_list(list, ent_start_dpa,
> + len1, NULL, 0);
> +}
> +
> +if (dpa + len <= ent_start_dpa + ent_len) {
> +uint64_t len2 = ent_start_dpa + ent_len - dpa - len;
> +if (len2) {
> +cxl_insert_extent_to_extent_list(list, dpa + len,
> + len2, NULL, 0);
> +}
> +} else {
> +len = dpa + len - ent_start_dpa - ent_len;
> +dpa = ent_start_dpa + ent_len;
> +goto process_leftover;
> +}
> +}
> +}
>  }
>  
>  /*
> @@ -1915,8 +1966,8 @@ static void qmp_cxl_process_dynamic_capacity(const char 
> *path, CxlEventLog log,
>  list = records;
>  extents = g_new0(CXLDCExtentRaw, num_extents);
>  while (list) {
> -CXLDCExtent *ent;
>  bool skip_extent = false;
> +CXLDCExtentList *extent_list;
>  
>  offset = list->value->offset;
>  len = list->value->len;
> @@ -1933,15 +1984,32 @@ static void qmp_cxl_process_dynamic_capacity(const 
> char *path, CxlEventLog log,
>   * remove it from the pending extent list, so later when the add
>   * response for the extent arrives, the device can reject the
>   * extent as it is not in the pending list.
> + * Now, we can handle the case where the extent covers the DPA

No need for Now. Anyone reading it is look at the cod here.

> + * range of multiple extents in the pending_to_add list.
> + * TODO: we do not allow the extent covers range of extents in
> + * pending_to_add list and accepted list at the same time for 
> now.
>   */
> -ent = cxl_dc_extent_exists(>dc.extents_pending_to_add,
> -[i]);
> -if (ent) {
> -QTAILQ_REMOVE(>dc.extents_pending_to_add, ent, node);
> -g_free(ent);
> +extent_list = >dc.extents_pending_to_add;
> +if (cxl_test_dpa_range_covered_by_extents(extent_list,
> +  extents[i].start_dpa,
> +  extents[i].len)) {
> +cxl_delist_extent_by_dpa_range(extent_list,
> +   extents[i].start_dpa,
> +   extents[i].len);
> +} else if (!ct3_test_region_block_backed(dcd, 
> extents[i].start_dpa,
> + extents[i].len)) {
> +/*
> + * If the DPA range of the extent is not covered by extents
> + * in the accepted list, skip
> + */
>  skip_extent = true;
> -} else if (!cxl_dc_extent_exists(>dc.extents, [i])) 
> {
> -/* If the exact extent is not in the accepted list, skip */
> +}
> +} else if (type ==

Re: [PATCH v5 11/13] hw/cxl/cxl-mailbox-utils: Add partial and superset extent release mailbox support

2024-03-06 Thread Jonathan Cameron via

On Mon,  4 Mar 2024 11:34:06 -0800
nifan@gmail.com wrote:

> From: Fan Ni 
> 
> With the change, we extend the extent release mailbox command processing
> to allow more flexible release. As long as the DPA range of the extent to
> release is covered by valid extent(s) in the device, the release can be
> performed.
> 
> Signed-off-by: Fan Ni 

Ouch this is more complex than I was thinking, but seems correct to me.

A few minor comments inline

Jonathan

> +/*
> + * Detect potential extent overflow caused by extent split during processing
> + * extent release requests, also allow releasing superset of extents where 
> the
> + * extent to release covers the range of multiple extents in the device.
> + * Note:
> + * 1.we will reject releasing an extent if some portion of its rang is

range

> + * not covered by valid extents.
> + * 2.This function is called after cxl_detect_malformed_extent_list so checks
> + * already performed there will be skipped.
> + */
> +static CXLRetCode cxl_detect_extent_overflow(const CXLType3Dev *ct3d,
> +const CXLUpdateDCExtentListInPl *in)

This code is basically dry running the actual removal.  Can we just
make the core code the same for both cases?  The bit where you update bitmaps
and extent lists at least.

> +{
> +uint64_t nbits, offset;
> +const CXLDCRegion *region;
> +unsigned long **bitmaps_copied;
> +uint64_t dpa, len;
> +int i, rid;
> +CXLRetCode ret = CXL_MBOX_SUCCESS;
> +long extent_cnt_delta = 0;
> +CXLDCExtentList tmp_list;
> +CXLDCExtent *ent;
> +
> +QTAILQ_INIT(_list);
> +copy_extent_list(_list, >dc.extents);
> +
> +bitmaps_copied = g_new0(unsigned long *, ct3d->dc.num_regions);
> +for (i = 0; i < ct3d->dc.num_regions; i++) {
> +region = >dc.regions[i];
> +nbits = region->len / region->block_size;
> +bitmaps_copied[i] = bitmap_new(nbits);
> +bitmap_copy(bitmaps_copied[i], region->blk_bitmap, nbits);
> +}
> +
> +for (i = 0; i < in->num_entries_updated; i++) {
> +dpa = in->updated_entries[i].start_dpa;
> +len = in->updated_entries[i].len;
> +
> +rid = cxl_find_dc_region_id(ct3d, dpa, len);
> +region = >dc.regions[rid];
> +offset = (dpa - region->base) / region->block_size;
> +nbits = len / region->block_size;
> +
> +/* Check whether range [dpa, dpa + len) is covered by valid range */
> +if (find_next_zero_bit(bitmaps_copied[rid], offset + nbits, offset) <
> +   offset + nbits) {
> +ret = CXL_MBOX_INVALID_PA;
> +goto free_and_exit;
> +}
> +
> +QTAILQ_FOREACH(ent, _list, node) {
> +/* Only split within an extent can cause extent count increase */
> +if (ent->start_dpa <= dpa &&
> +dpa + len <= ent->start_dpa + ent->len) {
> +uint64_t ent_start_dpa = ent->start_dpa;
> +uint64_t ent_len = ent->len;
> +uint64_t len1 = dpa - ent_start_dpa;
> +uint64_t len2 = ent_start_dpa + ent_len - dpa - len;
> +
> +extent_cnt_delta += len1 && len2 ? 2 : (len1 || len2 ? 1 : 
> 0);
I think this is the same as

if (len1)
extent_cnt_delta++;
if (len2)
extent_cnt_delta++;
extent_cnt_delta--;



> +extent_cnt_delta -= 1;
> +if (ct3d->dc.total_extent_count + extent_cnt_delta >
> +CXL_NUM_EXTENTS_SUPPORTED) {

This early overflow detect seems valid to me because a device might run
out or resource mid processing the list even if it would fit at the end.
Good.
> +ret = CXL_MBOX_RESOURCES_EXHAUSTED;
> +goto free_and_exit;
> +}
> +
> +offset = (ent->start_dpa - region->base) / 
> region->block_size;
> +nbits = ent->len / region->block_size;
> +bitmap_clear(bitmaps_copied[rid], offset, nbits);
> +cxl_remove_extent_from_extent_list(_list, ent);
> +
> + if (len1) {
> +offset = (dpa - region->base) / region->block_size;
> +nbits = len1 / region->block_size;
> +bitmap_set(bitmaps_copied[rid], offset, nbits);
> +cxl_insert_extent_to_extent_list(_list,
> + ent_start_dpa, len1,
> + NULL, 0);
> + }
> +
> + if (len2) {
> +offset = (dpa + len - region->base) / region->block_size;
> +nbits = len2 / region->block_size;
> +bitmap_set(bitmaps_copied[rid], offset, nbits);
> +cxl_insert_extent_to_extent_list(_list, dpa + len,
> + len2, NULL, 0);

Re: [PATCH v5 10/13] hw/mem/cxl_type3: Add dpa range validation for accesses to DC regions

2024-03-06 Thread Jonathan Cameron via

On Mon,  4 Mar 2024 11:34:05 -0800
nifan@gmail.com wrote:

> From: Fan Ni 
> 
> Not all dpa range in the DC regions is valid to access until an extent

All DPA ranges in the DC regions are invalid to access until an extent
covering the range has been added.

> covering the range has been added. Add a bitmap for each region to
> record whether a DC block in the region has been backed by DC extent.
> For the bitmap, a bit in the bitmap represents a DC block. When a DC
> extent is added, all the bits of the blocks in the extent will be set,
> which will be cleared when the extent is released.
> 
> Signed-off-by: Fan Ni 
Reviewed-by: Jonathan Cameron

Re: [PATCH v5 09/13] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-03-06 Thread Jonathan Cameron via

On Mon,  4 Mar 2024 11:34:04 -0800
nifan@gmail.com wrote:

> From: Fan Ni 
> 
> Since fabric manager emulation is not supported yet, the change implements
> the functions to add/release dynamic capacity extents as QMP interfaces.

We'll need them anyway, or to implement an fm interface via QMP which is
going to be ugly and complex.

> 
> Note: we skips any FM issued extent release request if the exact extent
> does not exist in the extent list of the device. We will loose the
> restriction later once we have partial release support in the kernel.

Maybe the kernel will treat it as a request to release the extent it
is tracking that contains it.  So we may want to add a way to poke that.
Not today though!

> 
> 1. Add dynamic capacity extents:
> 
> For example, the command to add two continuous extents (each 128MiB long)
> to region 0 (starting at DPA offset 0) looks like below:
> 
> { "execute": "qmp_capabilities" }
> 
> { "execute": "cxl-add-dynamic-capacity",
>   "arguments": {
>   "path": "/machine/peripheral/cxl-dcd0",
>   "region-id": 0,
>   "extents": [
>   {
>   "dpa": 0,
>   "len": 134217728
>   },
>   {
>   "dpa": 134217728,
>   "len": 134217728
>   }
>   ]
>   }
> }
> 
> 2. Release dynamic capacity extents:
> 
> For example, the command to release an extent of size 128MiB from region 0
> (DPA offset 128MiB) look like below:
> 
> { "execute": "cxl-release-dynamic-capacity",
>   "arguments": {
>   "path": "/machine/peripheral/cxl-dcd0",
>   "region-id": 0,
>   "extents": [
>   {
>   "dpa": 134217728,
>   "len": 134217728
>   }
>   ]
>   }
> }
> 
> Signed-off-by: Fan Ni 

...
  
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index dccfaaad3a..e9c8994cdb 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -674,6 +674,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, 
> Error **errp)
>  ct3d->dc.total_capacity += region->len;
>  }
>  QTAILQ_INIT(>dc.extents);
> +QTAILQ_INIT(>dc.extents_pending_to_add);
>  
>  return true;
>  }
> @@ -686,6 +687,12 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
>  ent = QTAILQ_FIRST(>dc.extents);
>  cxl_remove_extent_from_extent_list(>dc.extents, ent);
>  }
> +
> +while (!QTAILQ_EMPTY(>dc.extents_pending_to_add)) {

QTAILQ_FOR_EACHSAFE

> +ent = QTAILQ_FIRST(>dc.extents_pending_to_add);
> +cxl_remove_extent_from_extent_list(>dc.extents_pending_to_add,
> +   ent);
> +}
>  }

> +/*
> + * The main function to process dynamic capacity event. Currently DC extents
> + * add/release requests are processed.
> + */
> +static void qmp_cxl_process_dynamic_capacity(const char *path, CxlEventLog 
> log,
> + CXLDCEventType type, uint16_t 
> hid,
> + uint8_t rid,
> + CXLDCExtentRecordList *records,
> + Error **errp)
> +{
> +Object *obj;
> +CXLEventDynamicCapacity dCap = {};
> +CXLEventRecordHdr *hdr = 
> +CXLType3Dev *dcd;
> +uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> +uint32_t num_extents = 0;
> +CXLDCExtentRecordList *list;
> +g_autofree CXLDCExtentRaw *extents = NULL;
> +uint8_t enc_log;
> +uint64_t offset, len, block_size;
> +int i;
> +int rc;

Combine the two lines above.

> +g_autofree unsigned long *blk_bitmap = NULL;
> +
> +obj = object_resolve_path(path, NULL);
> +if (!obj) {
> +error_setg(errp, "Unable to resolve path");
> +return;
> +}

object_resolve_path_type() and skip a step (should do this in various places
in our existing code!)

> +if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +error_setg(errp, "Path not point to a valid CXL type3 device");
> +return;
> +}
> +
> +dcd = CXL_TYPE3(obj);
> +if (!dcd->dc.num_regions) {
> +error_setg(errp, "No dynamic capacity support from the device");
> +return;
> +}
> +
> +rc = ct3d_qmp_cxl_event_log_enc(log);
> +if (rc < 0) {
> +error_setg(errp, "Unhandled error log type");
> +return;
> +}
> +enc_log = rc;
> +
> +if (rid >= dcd->dc.num_regions) {
> +error_setg(errp, "region id is too large");
> +return;
> +}
> +block_size = dcd->dc.regions[rid].block_size;
> +
> +/* Sanity check and count the extents */
> +list = records;
> +while (list) {
> +offset = list->value->offset;
> +len = list->value->len;
> +
> +if (len == 0) {
> +error_setg(errp, "extent with 0 length is not allowed");
> +return;
> +}
> +
> +if (offset % block_size || len % block_size) {
> +error_setg(errp, "dpa or len is not aligned to region block 
> size");
> +

Re: [PATCH V2 1/1] target/loongarch: Fixed tlb huge page loading issue

2024-03-06 Thread Richard Henderson


On 3/5/24 21:38, maobibo wrote:

Sorry, manual is updated already and we do not notice that still.

https://www.loongson.cn/uploads/images/2023102309132647981.%E9%BE%99%E8%8A%AF%E6%9E%B6%E6%9E%84%E5%8F%82%E8%80%83%E6%89%8B%E5%86%8C%E5%8D%B7%E4%B8%80_r1p10.pdf

It is Chinese web link, English manual is not updated. Here is English translation by 
manual with instruction  "lddir rd, rj, level"


If the bit[14:13] of the register rj is not equal to 0 and its bit[6] is 1, the value of 
the register rj is a marked as HugePage page entries. In this case, the value from 
register rj is written directly to register rd.


If the bit[14:13] of the register rj is equal to 0 and its bit[6] is 1, the value of the 
register rj is an Hugepage table entry. In this case, replace the bit[14:13] of the 
register RJ value with level[1:0], the val is written to the register rd.


If the bit[6] bit of register rj is 0, the value of the universal register rj is the page 
table entry, it is  physical address base page table. In this case, if the LDDIR command 
is executed, the address will be refilled according to the TLB currently processed. 
Retrieve the base address of the next-level page table and write it to the common register 
rd.


We will remove temporary lddir_ps, and record page size with bit[14:13] in next 
version.


Excellent, thank you for that translation.


r~

Re: Enabling internal errors for VH CXL devices: [was: Re: Questions about CXL RAS injection test in qemu]

2024-03-06 Thread Terry Bowman

Hi Jon,

This appears to partially address the same problem myself and Robert are 
working on. We 
are working to add support for CXL port devices to include root ports, RCECs, 
USPs, 
and DSPs. This was covered with LPC presentation and discussion.

We did not originally include RCEC error handling support because the same is 
needed 
for all CXL port devices. Also, we wanted to avoid adding more CXL specifics to 
aer.c and 
were looking for a more general solution. This led to the discussion about 
changes to 
the PCIe port bus driver.

Regards,
Terry

On 3/6/24 11:16, Dan Williams wrote:
> [ add Li Ming ]
> 
> Jonathan Cameron wrote:
> [..]
>> Robert / Terry, I tracked down the patch where you enabled this for RCHs and 
>> there was
>> some discussion on walking out on VH as well to enable this, but seems it
>> never happened. Can you remember why?  Just kicked back for a future 
>> occasion?
>>
> 
> Li Ming has this patch below waiting in wings. Li Ming, this patch is
> timely for this dicussion, care to send out the full series? I expect it
> needs to be an RFC given concerns with integrating with the pending port
> switch error handling work.
> 
> -- 8< --
> From: Li Ming 
> Subject: [PATCH RFC v3 3/6] PCI/AER: Enable RCEC to report internal error for 
> CXL root port
> Date: Thu, 1 Feb 2024 05:58:08 +
> 
> Per CXL r3.1 section 12.2.2, RCEC is possible to log the CXL.cachemem
> protocol errors detected by CXL root port as PCI_ERR_UNC_INTN or
> PCI_ERR_COR_INTERNAL in AER Capability. So unmask PCI_ERR_UNC_INTN and
> PCI_ERR_COR_INTERNAL for that case.
> 
> Signed-off-by: Li Ming 
> ---
>  drivers/pci/pcie/aer.c | 25 ++---
>  1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 42a3bd35a3e1..ef8fd77cb920 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -985,7 +985,7 @@ static bool cxl_error_is_native(struct pci_dev *dev)
>  {
>   struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
>  
> - return (pcie_ports_native || host->native_aer);
> + return (pcie_ports_native || host->native_aer) && host->is_cxl;
>  }
>  
>  static bool is_internal_error(struct aer_err_info *info)
> @@ -1041,8 +1041,14 @@ static int handles_cxl_error_iter(struct pci_dev *dev, 
> void *data)
>  {
>   bool *handles_cxl = data;
>  
> - if (!*handles_cxl)
> - *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
> + if (!*handles_cxl) {
> + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END &&
> + is_cxl_mem_dev(dev) && cxl_error_is_native(dev))
> + *handles_cxl = true;
> + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT &&
> + cxl_error_is_native(dev))
> + *handles_cxl = true;
> + }
>  
>   /* Non-zero terminates iteration */
>   return *handles_cxl;
> @@ -1054,13 +1060,18 @@ static bool handles_cxl_errors(struct pci_dev *rcec)
>  
>   if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
>   pcie_aer_is_native(rcec))
> - pcie_walk_rcec(rcec, handles_cxl_error_iter, _cxl);
> + pcie_walk_rcec_all(rcec, handles_cxl_error_iter, _cxl);
>  
>   return handles_cxl;
>  }
>  
> -static void cxl_rch_enable_rcec(struct pci_dev *rcec)
> +static void cxl_enable_rcec(struct pci_dev *rcec)
>  {
> + /*
> +  * Enable RCEC's internal error report for two cases:
> +  * 1. RCiEP detected CXL.cachemem protocol errors
> +  * 2. CXL root port detected CXL.cachemem protocol errors.
> +  */
>   if (!handles_cxl_errors(rcec))
>   return;
>  
> @@ -1069,7 +1080,7 @@ static void cxl_rch_enable_rcec(struct pci_dev *rcec)
>  }
>  
>  #else
> -static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
> +static inline void cxl_enable_rcec(struct pci_dev *dev) { }
>  static inline void cxl_rch_handle_error(struct pci_dev *dev,
>   struct aer_err_info *info) { }
>  #endif
> @@ -1494,7 +1505,7 @@ static int aer_probe(struct pcie_device *dev)
>   return status;
>   }
>  
> - cxl_rch_enable_rcec(port);
> + cxl_enable_rcec(port);
>   aer_enable_rootport(rpc);
>   pci_info(port, "enabled with IRQ %d\n", dev->irq);
>   return 0;

Re: Enabling internal errors for VH CXL devices: [was: Re: Questions about CXL RAS injection test in qemu]

2024-03-06 Thread Terry Bowman

Hi Yuquan an Jon,

I added responses inline below.

On 3/6/24 07:23, Jonathan Cameron wrote:
> On Wed, 6 Mar 2024 19:27:07 +0800
> Yuquan Wang  wrote:
> 
>> Hello, Jonathan
>>
>> Recently I met some problems on CXL RAS tests. 
>>
>> I tried to use "cxl-inject-uncorrectable-errors" and 
>> "cxl-inject-correctable-error"
>> qmp to inject CXL errors, however, there was no any kernel printing 
>> information in 
>> my qemu machine. And the qmp connection was unstable that made the machine 
>> always "terminating on signal 2".
> 
> The qmp connection being unstable is odd - might be related to the CXL code, 
> but
> I'm not sure how..
> 
>>
>> In addition, I successfully used the hmp "pcie_aer_inject_error" in the same 
>> conditions.
>> The kernel showed relevant print information.
> 
> IIRC the AER paths print under all circumstances whereas CXL errors do not, 
> they simply
> trigger tracepoints - but you should have seen device resets.
> 
> However I span up a test and I think the issue is more straight forward.
> The uncorrectable internal error and correctable internal errors are masked 
> on the device.
> I thought we changed the default on this in linux but maybe not :(
> 

Device AER UIE/CIE mask can be set and still expect to handle device AER 
errors. The device reports 
AER UIE/CIE to the root port/RCEC on behalf of device AER CRC, TLP, etc errors. 

In earlier changes we added logic to clear the RCEC UIE/CIE mask inorder to 
properly receive 
AER UIE/CI notifications from devices and RCH dports.

"CXL Protocol and Link errors detected by components that are part of a CXL VH 
are
escalated and reported using standard PCIe error reporting mechanisms over 
CXL.io as
UIEs and/or CIEs. See PCIe Base Specification for details."[1]

[1] CXL3.1 12.2.1 - Protocol and Link Layer Error Reporting

> Hack is fine the relevant device with lspci -tv and then use
> setpci -s 0d:00.0 0x208.l=0
> to clear all the mask bits for uncorrectable errors.
> 
> Note I tested this on a convenient arm64 setup so always possible there is yet
> another problem on x86.
> 
> Robert / Terry, I tracked down the patch where you enabled this for RCHs and 
> there was
> some discussion on walking out on VH as well to enable this, but seems it
> never happened. Can you remember why?  Just kicked back for a future occasion?
> 
> Jonathan
> 
> 

I tested (qemu x86) using the aer-inject tool and found it to work. Below shows 
the 
endpoint CIE is masked (0xe000 @ AER+0x14) and the injected error is properly 
handled
with root port logging and cxl_pci handler trace logs.

 # lspci | grep -i cxl  
   
0d:00.0 CXL: Intel Corporation Device 0d93 (rev 01) 
  

  
# lspci -s 0d:00.0 -vvv | grep Advanced 
  
Capabilities: [200 v2] Advanced Error Reporting 
  

  
# setpci -s 0d:00.0 0x208.l 
  
0240
  

  
# setpci -s 0d:00.0 0x214.l 
  
e000
  

  
# cat aer-input.txt 
  
# Inject a correctable bad TLP error into the device with header log
  
# words 0 1 2 3.
  
#

1 2 3 4 >

1 - 100 of 319 matches

Mail list logo