RE: [PATCH v1 05/11] vfio: Introduce host_iommu_device_create callback

2024-03-18 Thread Duan, Zhenzhong


>-Original Message-
>From: Eric Auger 
>Subject: Re: [PATCH v1 05/11] vfio: Introduce host_iommu_device_create
>callback
>
>
>
>On 2/28/24 04:58, Zhenzhong Duan wrote:
>> Introduce host_iommu_device_create callback and a wrapper for it.
>>
>> This callback is used to allocate a host iommu device instance and
>> initialize it based on type.
>>
>> Signed-off-by: Zhenzhong Duan 
>> ---
>>  include/hw/vfio/vfio-common.h | 1 +
>>  include/hw/vfio/vfio-container-base.h | 1 +
>>  hw/vfio/common.c  | 8 
>>  3 files changed, 10 insertions(+)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>> index b6676c9f79..9fefea4b89 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -208,6 +208,7 @@ struct vfio_device_info *vfio_get_device_info(int
>fd);
>>  int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> AddressSpace *as, Error **errp);
>>  void vfio_detach_device(VFIODevice *vbasedev);
>> +void host_iommu_device_create(VFIODevice *vbasedev);
>>
>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-
>container-base.h
>> index b2813b0c11..dc003f6eb2 100644
>> --- a/include/hw/vfio/vfio-container-base.h
>> +++ b/include/hw/vfio/vfio-container-base.h
>> @@ -120,6 +120,7 @@ struct VFIOIOMMUClass {
>>  int (*attach_device)(const char *name, VFIODevice *vbasedev,
>>   AddressSpace *as, Error **errp);
>>  void (*detach_device)(VFIODevice *vbasedev);
>> +void (*host_iommu_device_create)(VFIODevice *vbasedev);
>Maybe return an int instead. It is common the allocation can fail and
>the deallocation cannot. While at it I would also pass an errp in case
>it fails

Currently host_iommu_device_create implementation only calls g_malloc0,
so never fails, so I returned void.

I'm fine to return an int, will be like below, take iommufd for example:

--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -651,7 +651,7 @@ static IOMMUFDDeviceOps vfio_iommufd_device_ops = {
 .detach_hwpt = vfio_iommufd_device_detach_hwpt,
 };

-static void vfio_cdev_host_iommu_device_create(VFIODevice *vbasedev)
+static int vfio_cdev_host_iommu_device_create(VFIODevice *vbasedev, Error 
**errp)
 {
 IOMMUFDDevice *idev = g_malloc0(sizeof(IOMMUFDDevice));
 VFIOIOMMUFDContainer *container = container_of(vbasedev->bcontainer,
@@ -661,6 +661,8 @@ static void vfio_cdev_host_iommu_device_create(VFIODevice 
*vbasedev)

 iommufd_device_init(idev, vbasedev->iommufd, vbasedev->devid,
 container->ioas_id, vbasedev, 
_iommufd_device_ops);
+
+return 0;
 }

Thanks
Zhenzhong

>
>Eric
>>  /* migration feature */
>>  int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
>> bool start);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 059bfdc07a..41e9031c59 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1521,3 +1521,11 @@ void vfio_detach_device(VFIODevice
>*vbasedev)
>>  }
>>  vbasedev->bcontainer->ops->detach_device(vbasedev);
>>  }
>> +
>> +void host_iommu_device_create(VFIODevice *vbasedev)
>> +{
>> +const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops;
>> +
>> +assert(ops->host_iommu_device_create);
>> +ops->host_iommu_device_create(vbasedev);
>> +}



[PATCH] target/tricore/helper: Use correct string format in cpu_tlb_fill()

2024-03-18 Thread Philippe Mathieu-Daudé
'address' got converted from target_ulong to vaddr in commit
68d6eee73c ("target/tricore: Convert to CPUClass::tlb_fill").
Use the corresponding format string to avoid casting.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/tricore/helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/tricore/helper.c b/target/tricore/helper.c
index 6d9e80cc0c..76bd226370 100644
--- a/target/tricore/helper.c
+++ b/target/tricore/helper.c
@@ -76,9 +76,9 @@ bool tricore_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
 ret = get_physical_address(env, , ,
address, rw, mmu_idx);
 
-qemu_log_mask(CPU_LOG_MMU, "%s address=" TARGET_FMT_lx " ret %d physical "
+qemu_log_mask(CPU_LOG_MMU, "%s address=0x%" VADDR_PRIx " ret %d physical "
   HWADDR_FMT_plx " prot %d\n",
-  __func__, (target_ulong)address, ret, physical, prot);
+  __func__, address, ret, physical, prot);
 
 if (ret == TLBRET_MATCH) {
 tlb_set_page(cs, address & TARGET_PAGE_MASK,
-- 
2.41.0




答复: [PATCH v1 2/2] system/cpus: Fix resume_all_vcpus() under vCPU hotplug condition

2024-03-18 Thread zhukeqian via
Hi David,

On 17.03.24 09:37, Keqian Zhu via wrote:
>> For vCPU being hotplugged, qemu_init_vcpu() is called. In this 
>> function, we set vcpu state as stopped, and then wait vcpu thread to 
>> be created.
>> 
>> As the vcpu state is stopped, it will inform us it has been created 
>> and then wait on halt_cond. After we has realized vcpu object, we will 
>> resume the vcpu thread.
>> 
>> However, during we wait vcpu thread to be created, the bql is 
>> unlocked, and other thread is allowed to call resume_all_vcpus(), 
>> which will resume the un-realized vcpu.
>> 
>> This fixes the issue by filter out un-realized vcpu during 
>> resume_all_vcpus().
>
>Similar question: is there a reproducer? 
>
>How could we currently hotplug a VCPU, and while it is being created, see 
>pause_all_vcpus()/resume_all_vcpus() getting claled. 
>
I described the reason for this at patch 1.

>If I am not getting this wrong, there seems to be some other mechanism missing 
>that makes sure that this cannot happen. Dropping the BQL half-way through 
>creating a VCPU might be the problem.
>
When we add retry mechanism in pause_all_vcpus(), we can solve this problem. 
With the sematic unchanged for user, which means:
With bql, we can make sure all vcpus are paused after pause_all_vcpus() finish, 
 and all vcpus are resumed after resume_all_vcpus() finish.

Thanks,
Keqian

>
>
--
Cheers,

David / dhildenb



[PATCH] target/ppc/mmu-radix64: Use correct string format in walk_tree()

2024-03-18 Thread Philippe Mathieu-Daudé
'mask', 'nlb' and 'base_addr' are all uin64_t types.
Use the corresponding PRIx64 format.

Fixes: d2066bc50d ("target/ppc: Check page dir/table base alignment")
Signed-off-by: Philippe Mathieu-Daudé 
---
 target/ppc/mmu-radix64.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 5823e039e6..690dff7a49 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -300,8 +300,8 @@ static int ppc_radix64_next_level(AddressSpace *as, vaddr 
eaddr,
 
 if (nlb & mask) {
 qemu_log_mask(LOG_GUEST_ERROR,
-"%s: misaligned page dir/table base: 0x"TARGET_FMT_lx
-" page dir size: 0x"TARGET_FMT_lx"\n",
+"%s: misaligned page dir/table base: 0x%" PRIx64
+" page dir size: 0x%" PRIx64 "\n",
 __func__, nlb, mask + 1);
 nlb &= ~mask;
 }
@@ -324,8 +324,8 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr 
eaddr,
 
 if (base_addr & mask) {
 qemu_log_mask(LOG_GUEST_ERROR,
-"%s: misaligned page dir base: 0x"TARGET_FMT_lx
-" page dir size: 0x"TARGET_FMT_lx"\n",
+"%s: misaligned page dir base: 0x%" PRIx64
+" page dir size: 0x%" PRIx64 "\n",
 __func__, base_addr, mask + 1);
 base_addr &= ~mask;
 }
-- 
2.41.0




答复: [PATCH v1 1/2] system/cpus: Fix pause_all_vcpus() under concurrent environment

2024-03-18 Thread zhukeqian via
Hi David,

Thanks for reviewing.

On 17.03.24 09:37, Keqian Zhu via wrote:
>> Both main loop thread and vCPU thread are allowed to call 
>> pause_all_vcpus(), and in general resume_all_vcpus() is called after 
>> it. Two issues live in pause_all_vcpus():
>
>In general, calling pause_all_vcpus() from VCPU threads is quite dangerous.
>
>Do we have reproducers for the cases below? 
>

I produce the issues by testing ARM vCPU hotplug feature:
QEMU changes for vCPU hotplug could be cloned from below site,
 https://github.com/salil-mehta/qemu.git virt-cpuhp-armv8/rfc-v2
Guest Kernel changes (by James Morse, ARM) are available here:
 https://git.kernel.org/pub/scm/linux/kernel/git/morse/linux.git 
virtual_cpu_hotplug/rfc/v2

The procedure to produce problems:
1. Startup a Linux VM (e.g., called OS-vcpuhotplug) with 32 possible vCPUs and 
16 current vCPUs.
2. Log in guestOS and run script[1] to continuously online/offline CPU.
3. At host side, run script[2] to continuously hotplug/unhotplug vCPU.
After several minutes, we can hit these problems.

Script[1] to online/offline CPU:
for ((time=1;time<1000;time++));
do
for ((cpu=16;cpu<32;cpu++));
do
echo 1 > /sys/devices/system/cpu/cpu$cpu/online
done

for ((cpu=16;cpu<32;cpu++));
do
echo 0 > /sys/devices/system/cpu/cpu$cpu/online
done
done

Script[2] to hotplug/unhotplug vCPU:
for ((time=1;time<100;time++));
do
echo $time
for ((cpu=16;cpu<=32;cpu++));
do
echo "virsh setvcpus OS-vcpuhotplug --count  $cpu --live"
virsh setvcpus OS-vcpuhotplug --count  $cpu --live
sleep 2
done

for ((cpu=32;cpu>=16;cpu--));
do
echo "virsh setvcpus OS-vcpuhotplug --count  $cpu --live"
virsh setvcpus OS-vcpuhotplug --count  $cpu --live
sleep 2
done

for ((cpu=16;cpu<=32;cpu+=2));
do
echo "virsh setvcpus OS-vcpuhotplug --count  $cpu --live"
virsh setvcpus OS-vcpuhotplug --count  $cpu --live
sleep 2
done

for ((cpu=32;cpu>=16;cpu-=2));
do
echo "virsh setvcpus OS-vcpuhotplug --count  $cpu --live"
virsh setvcpus OS-vcpuhotplug --count  $cpu --live
sleep 2
done
done

The script[1] will call PSCI CPU_ON which emulated by QEMU, which result in 
calling cpu_reset() on vCPU thread.
For ARM architecture, it needs to reset GICC registers, which is only possible 
when all vcpus paused. So script[1]
will call pause_all_vcpus() in vCPU thread.
The script[2] also calls cpu_reset() for newly hotplugged vCPU, which is done 
in main loop thread.
So this scenario causes problems as I state in commit message.

>> 
>> 1. There is possibility that during thread T1 waits on qemu_pause_cond 
>> with bql unlocked, other thread has called
>> pause_all_vcpus() and resume_all_vcpus(), then thread T1 will stuck, 
>> because the condition all_vcpus_paused() is always false.
>
>How can this happen?
>
>Two threads calling pause_all_vcpus() is borderline broken, as you note. 
>
>IIRC, we should call pause_all_vcpus() only if some other mechanism prevents 
>these races. For example, based on runstate changes.
>

We already has bql to prevent concurrent calling of pause_all_vcpus() and 
resume_all_vcpus(). But pause_all_vcpus() will
unlock bql in the half way, which gives change for other thread to call pause 
and resume. In the  past, code does not consider
this problem, now I add retry mechanism to fix it.

>
>Just imagine one thread calling pause_all_vcpus() while another one 
>calls resume_all_vcpus(). It cannot possibly work.

With bql, we can make sure all vcpus are paused after pause_all_vcpus() finish, 
 and all vcpus are resumed after resume_all_vcpus() finish.

For example, the following situation may occur:
Thread T1: lock bql  ->pause_all_vcpus ->   wait on cond and unlock bql 
 ->   wait T2 unlock bql to lock bql
-> lock bql  &&  all_vcpu_paused ->   success and do other work -> unlock bql
Thread T2: wait T1 unlock bql to lock bql   
 ->   lock bql->  resume_all_vcpus   ->   success  and do other work   
-> unlock bql

Thanks,
Keqian

>
>
>> 
>> 2. After all_vcpus_paused() has been checked as true, we will
>> unlock bql to relock replay_mutex. During the bql was unlocked,
>> the vcpu's state may has been changed by other thread, so we
>> must retry.
>> 
>> Signed-off-by: Keqian Zhu 
>> ---
>>   system/cpus.c | 29 -
>>   1 file changed, 24 insertions(+), 5 deletions(-)
>> 
> diff --git a/system/cpus.c b/system/cpus.c
> index 68d161d96b..4e41abe23e 100644
> --- a/system/cpus.c
> +++ b/system/cpus.c
> @@ -571,12 +571,14 @@ static bool all_vcpus_paused(void)
>   return true;
>   }
>   
> -void 

Re: [PATCH] target/i386: Export RFDS bit to guests

2024-03-18 Thread Xiaoyao Li

On 3/13/2024 10:53 PM, Pawan Gupta wrote:

Register File Data Sampling (RFDS) is a CPU side-channel vulnerability
that may expose stale register value. CPUs that set RFDS_NO bit in MSR
IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS.
Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has
the microcode to help mitigate RFDS.

Make RFDS_CLEAR and RFDS_NO bits available to guests.


What's the status of KVM part?


Signed-off-by: Pawan Gupta 
---
  target/i386/cpu.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 9a210d8d9290..693a5e0fb2ce 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1158,8 +1158,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
  NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no",
  NULL, "fb-clear", NULL, NULL,
  NULL, NULL, NULL, NULL,
-"pbrsb-no", NULL, "gds-no", NULL,
-NULL, NULL, NULL, NULL,
+"pbrsb-no", NULL, "gds-no", "rfds-no",
+"rfds-clear", NULL, NULL, NULL,
  },
  .msr = {
  .index = MSR_IA32_ARCH_CAPABILITIES,

base-commit: a1932d7cd6507d4d9db2044a54731fff3e749bac





RE: [PATCH v1 01/11] Introduce a common abstract struct HostIOMMUDevice

2024-03-18 Thread Duan, Zhenzhong


>-Original Message-
>From: Eric Auger 
>Subject: Re: [PATCH v1 01/11] Introduce a common abstract struct
>HostIOMMUDevice
>
>Hi Zhenzhong,
>On 2/28/24 04:58, Zhenzhong Duan wrote:
>> HostIOMMUDevice will be inherited by two sub classes,
>> legacy and iommufd currently.
>As this patch introduces the object, you describe what the object is
>meant for and used for. Maybe reuse text from the cover letter

Sure, will do.

Thanks
Zhenzhong

>
>Thanks
>
>Eric
>>
>> Introduce a helper function host_iommu_base_device_init to initialize it.
>>
>> Suggested-by: Eric Auger 
>> Signed-off-by: Zhenzhong Duan 
>> ---
>>  include/sysemu/host_iommu_device.h | 22 ++
>>  1 file changed, 22 insertions(+)
>>  create mode 100644 include/sysemu/host_iommu_device.h
>>
>> diff --git a/include/sysemu/host_iommu_device.h
>b/include/sysemu/host_iommu_device.h
>> new file mode 100644
>> index 00..fe80ab25fb
>> --- /dev/null
>> +++ b/include/sysemu/host_iommu_device.h
>> @@ -0,0 +1,22 @@
>> +#ifndef HOST_IOMMU_DEVICE_H
>> +#define HOST_IOMMU_DEVICE_H
>> +
>> +typedef enum HostIOMMUDevice_Type {
>> +HID_LEGACY,
>> +HID_IOMMUFD,
>> +HID_MAX,
>> +} HostIOMMUDevice_Type;
>> +
>> +typedef struct HostIOMMUDevice {
>> +HostIOMMUDevice_Type type;
>> +size_t size;
>> +} HostIOMMUDevice;
>> +
>> +static inline void host_iommu_base_device_init(HostIOMMUDevice *dev,
>> +   HostIOMMUDevice_Type type,
>> +   size_t size)
>> +{
>> +dev->type = type;
>> +dev->size = size;
>> +}
>> +#endif



RE: [PATCH v1 08/11] vfio/pci: Allocate and initialize HostIOMMUDevice after attachment

2024-03-18 Thread Duan, Zhenzhong


>-Original Message-
>From: Eric Auger 
>Subject: Re: [PATCH v1 08/11] vfio/pci: Allocate and initialize
>HostIOMMUDevice after attachment
>
>
>
>On 2/28/24 04:58, Zhenzhong Duan wrote:
>> Signed-off-by: Zhenzhong Duan 
>> ---
>>  hw/vfio/pci.c | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 4fa387f043..6cc7de5d10 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3006,6 +3006,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>  goto error;
>>  }
>>
>> +/* Allocate and initialize HostIOMMUDevice after attachment succeed
>*/
>after successful attachment?
>> +host_iommu_device_create(vbasedev);
>> +
>you shall free on error: as well

I free it in vfio_instance_finalize().
Vfio-pci's design is special, it didn't free all allocated resources in 
realize's error path,
They are freed in _finalize(). e.g., vdev->emulated_config_bits, vdev->rom, 
devices and group resources(vfio_detach_device).
I'm following the same way. I'm fine to free it as you suggested something like 
below:

--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3246,6 +3246,7 @@ out_teardown:
 vfio_teardown_msi(vdev);
 vfio_bars_exit(vdev);
 error:
+g_free(vdev->vbasedev.base_hdev);
 error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
 }

@@ -3288,6 +3289,7 @@ static void vfio_exitfn(PCIDevice *pdev)
 vfio_bars_exit(vdev);
 vfio_migration_exit(vbasedev);
 pci_device_unset_iommu_device(pdev);
+g_free(vdev->vbasedev.base_hdev);
 }

>
>Eric
>>  vfio_populate_device(vdev, );
>>  if (err) {
>>  error_propagate(errp, err);
>> @@ -3244,6 +3247,7 @@ static void vfio_instance_finalize(Object *obj)
>>
>>  vfio_display_finalize(vdev);
>>  vfio_bars_finalize(vdev);
>> +g_free(vdev->vbasedev.base_hdev);

I free it here.

Thanks
Zhenzhong

>>  g_free(vdev->emulated_config_bits);
>>  g_free(vdev->rom);
>>  /*



RE: [PATCH v1 05/11] vfio: Introduce host_iommu_device_create callback

2024-03-18 Thread Duan, Zhenzhong


>-Original Message-
>From: Eric Auger 
>Subject: Re: [PATCH v1 05/11] vfio: Introduce host_iommu_device_create
>callback
>
>
>
>On 3/18/24 14:52, Eric Auger wrote:
>> Hi ZHenzhong,
>>
>> On 2/28/24 04:58, Zhenzhong Duan wrote:
>>> Introduce host_iommu_device_create callback and a wrapper for it.
>>>
>>> This callback is used to allocate a host iommu device instance and
>>> initialize it based on type.
>>>
>>> Signed-off-by: Zhenzhong Duan 
>>> ---
>>>  include/hw/vfio/vfio-common.h | 1 +
>>>  include/hw/vfio/vfio-container-base.h | 1 +
>>>  hw/vfio/common.c  | 8 
>>>  3 files changed, 10 insertions(+)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>>> index b6676c9f79..9fefea4b89 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -208,6 +208,7 @@ struct vfio_device_info *vfio_get_device_info(int
>fd);
>>>  int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>> AddressSpace *as, Error **errp);
>>>  void vfio_detach_device(VFIODevice *vbasedev);
>>> +void host_iommu_device_create(VFIODevice *vbasedev);
>>>
>>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>>> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-
>container-base.h
>>> index b2813b0c11..dc003f6eb2 100644
>>> --- a/include/hw/vfio/vfio-container-base.h
>>> +++ b/include/hw/vfio/vfio-container-base.h
>>> @@ -120,6 +120,7 @@ struct VFIOIOMMUClass {
>>>  int (*attach_device)(const char *name, VFIODevice *vbasedev,
>>>   AddressSpace *as, Error **errp);
>>>  void (*detach_device)(VFIODevice *vbasedev);
>>> +void (*host_iommu_device_create)(VFIODevice *vbasedev);
>>>  /* migration feature */
>>>  int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
>>> bool start);
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index 059bfdc07a..41e9031c59 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -1521,3 +1521,11 @@ void vfio_detach_device(VFIODevice
>*vbasedev)
>>>  }
>>>  vbasedev->bcontainer->ops->detach_device(vbasedev);
>>>  }
>>> +
>>> +void host_iommu_device_create(VFIODevice *vbasedev)
>>> +{
>>> +const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops;
>>> +
>>> +assert(ops->host_iommu_device_create);
>> at this stage ops actual implementation do not exist yet so this will
>> break the bisection
>
>Sorry it is OK at the function only is called in
>[PATCH v1 08/11] vfio/pci: Allocate and initialize HostIOMMUDevice after
>attachment
>
>Sorry for the noise

Ah, send too quickly. No problem.

Thanks
Zhenzhong


RE: [PATCH v1 05/11] vfio: Introduce host_iommu_device_create callback

2024-03-18 Thread Duan, Zhenzhong
Hi Eric,

>-Original Message-
>From: Eric Auger 
>Subject: Re: [PATCH v1 05/11] vfio: Introduce host_iommu_device_create
>callback
>
>Hi ZHenzhong,
>
>On 2/28/24 04:58, Zhenzhong Duan wrote:
>> Introduce host_iommu_device_create callback and a wrapper for it.
>>
>> This callback is used to allocate a host iommu device instance and
>> initialize it based on type.
>>
>> Signed-off-by: Zhenzhong Duan 
>> ---
>>  include/hw/vfio/vfio-common.h | 1 +
>>  include/hw/vfio/vfio-container-base.h | 1 +
>>  hw/vfio/common.c  | 8 
>>  3 files changed, 10 insertions(+)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>> index b6676c9f79..9fefea4b89 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -208,6 +208,7 @@ struct vfio_device_info *vfio_get_device_info(int
>fd);
>>  int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> AddressSpace *as, Error **errp);
>>  void vfio_detach_device(VFIODevice *vbasedev);
>> +void host_iommu_device_create(VFIODevice *vbasedev);
>>
>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-
>container-base.h
>> index b2813b0c11..dc003f6eb2 100644
>> --- a/include/hw/vfio/vfio-container-base.h
>> +++ b/include/hw/vfio/vfio-container-base.h
>> @@ -120,6 +120,7 @@ struct VFIOIOMMUClass {
>>  int (*attach_device)(const char *name, VFIODevice *vbasedev,
>>   AddressSpace *as, Error **errp);
>>  void (*detach_device)(VFIODevice *vbasedev);
>> +void (*host_iommu_device_create)(VFIODevice *vbasedev);
>>  /* migration feature */
>>  int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
>> bool start);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 059bfdc07a..41e9031c59 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1521,3 +1521,11 @@ void vfio_detach_device(VFIODevice
>*vbasedev)
>>  }
>>  vbasedev->bcontainer->ops->detach_device(vbasedev);
>>  }
>> +
>> +void host_iommu_device_create(VFIODevice *vbasedev)
>> +{
>> +const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops;
>> +
>> +assert(ops->host_iommu_device_create);
>at this stage ops actual implementation do not exist yet so this will
>break the bisection

This patch only introcudes host_iommu_device_create but no one call
into it. Patch6-7 implement callback for different backend,
patch8 call host_iommu_device_create(), so I think the order is ok.
Let me know if I missed your points.

Thanks
Zhenzhong

>
>Eric
>> +ops->host_iommu_device_create(vbasedev);
>> +}



[PATCH v2 2/4] ui/vnc: Do not use console_select()

2024-03-18 Thread Akihiko Odaki
console_select() is shared by other displays and a console_select() call
from one of them triggers console switching also in ui/curses,
circumventing key state reinitialization that needs to be performed in
preparation and resulting in stuck keys.

Use its internal state to track the current active console to prevent
such a surprise console switch.

Signed-off-by: Akihiko Odaki 
---
 include/ui/console.h   |  1 +
 include/ui/kbd-state.h | 11 +++
 ui/console.c   | 12 
 ui/kbd-state.c |  6 ++
 ui/vnc.c   | 14 +-
 5 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index a4a49ffc640c..3729d2db29c7 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -413,6 +413,7 @@ void qemu_console_early_init(void);
 
 void qemu_console_set_display_gl_ctx(QemuConsole *con, DisplayGLCtx *ctx);
 
+QemuConsole *qemu_console_lookup_default(void);
 QemuConsole *qemu_console_lookup_by_index(unsigned int index);
 QemuConsole *qemu_console_lookup_by_device(DeviceState *dev, uint32_t head);
 QemuConsole *qemu_console_lookup_by_device_name(const char *device_id,
diff --git a/include/ui/kbd-state.h b/include/ui/kbd-state.h
index fb79776128cf..1f37b932eb62 100644
--- a/include/ui/kbd-state.h
+++ b/include/ui/kbd-state.h
@@ -99,4 +99,15 @@ bool qkbd_state_modifier_get(QKbdState *kbd, QKbdModifier 
mod);
  */
 void qkbd_state_lift_all_keys(QKbdState *kbd);
 
+/**
+ * qkbd_state_switch_console: Switch console.
+ *
+ * This sends key up events to the previous console for all keys which are in
+ * down state to prevent keys being stuck, and remembers the new console.
+ *
+ * @kbd: state tracker state.
+ * @con: new QemuConsole for this state tracker.
+ */
+void qkbd_state_switch_console(QKbdState *kbd, QemuConsole *con);
+
 #endif /* QEMU_UI_KBD_STATE_H */
diff --git a/ui/console.c b/ui/console.c
index 832055675c50..fbc1b9b8b57c 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1325,6 +1325,18 @@ void graphic_console_close(QemuConsole *con)
 dpy_gfx_replace_surface(con, surface);
 }
 
+QemuConsole *qemu_console_lookup_default(void)
+{
+QemuConsole *con;
+
+QTAILQ_FOREACH(con, , next) {
+if (QEMU_IS_GRAPHIC_CONSOLE(con)) {
+return con;
+}
+}
+return QTAILQ_FIRST();
+}
+
 QemuConsole *qemu_console_lookup_by_index(unsigned int index)
 {
 QemuConsole *con;
diff --git a/ui/kbd-state.c b/ui/kbd-state.c
index 62d42a7a22e1..52ed28b8a89b 100644
--- a/ui/kbd-state.c
+++ b/ui/kbd-state.c
@@ -117,6 +117,12 @@ void qkbd_state_lift_all_keys(QKbdState *kbd)
 }
 }
 
+void qkbd_state_switch_console(QKbdState *kbd, QemuConsole *con)
+{
+qkbd_state_lift_all_keys(kbd);
+kbd->con = con;
+}
+
 void qkbd_state_set_delay(QKbdState *kbd, int delay_ms)
 {
 kbd->key_delay_ms = delay_ms;
diff --git a/ui/vnc.c b/ui/vnc.c
index fc12b343e254..b3fd78022b19 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -1872,12 +1872,16 @@ static void do_key_event(VncState *vs, int down, int 
keycode, int sym)
 /* QEMU console switch */
 switch (qcode) {
 case Q_KEY_CODE_1 ... Q_KEY_CODE_9: /* '1' to '9' keys */
-if (vs->vd->dcl.con == NULL && down &&
+if (down &&
 qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_CTRL) &&
 qkbd_state_modifier_get(vs->vd->kbd, QKBD_MOD_ALT)) {
-/* Reset the modifiers sent to the current console */
-qkbd_state_lift_all_keys(vs->vd->kbd);
-console_select(qcode - Q_KEY_CODE_1);
+QemuConsole *con = qemu_console_lookup_by_index(qcode - 
Q_KEY_CODE_1);
+if (con) {
+unregister_displaychangelistener(>vd->dcl);
+qkbd_state_switch_console(vs->vd->kbd, con);
+vs->vd->dcl.con = con;
+register_displaychangelistener(>vd->dcl);
+}
 return;
 }
 default:
@@ -4206,7 +4210,7 @@ void vnc_display_open(const char *id, Error **errp)
 goto fail;
 }
 } else {
-con = NULL;
+con = qemu_console_lookup_default();
 }
 
 if (con != vd->dcl.con) {

-- 
2.44.0




[PATCH v2 4/4] ui/curses: Do not use console_select()

2024-03-18 Thread Akihiko Odaki
ui/curses is the only user of console_select(). Move the implementation
to ui/curses.

Signed-off-by: Akihiko Odaki 
---
 include/ui/console.h  |   1 -
 ui/console-priv.h |   2 +-
 ui/console-vc-stubs.c |   2 +-
 ui/console-vc.c   |   3 +-
 ui/console.c  | 121 +-
 ui/curses.c   |  48 +++-
 6 files changed, 51 insertions(+), 126 deletions(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index 3729d2db29c7..0bc7a00ac0bb 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -433,7 +433,6 @@ int qemu_console_get_window_id(QemuConsole *con);
 /* Set the low-level window id for the console */
 void qemu_console_set_window_id(QemuConsole *con, int window_id);
 
-void console_select(unsigned int index);
 void qemu_console_resize(QemuConsole *con, int width, int height);
 DisplaySurface *qemu_console_surface(QemuConsole *con);
 void coroutine_fn qemu_console_co_wait_update(QemuConsole *con);
diff --git a/ui/console-priv.h b/ui/console-priv.h
index 88569ed2cc41..43ceb8122f13 100644
--- a/ui/console-priv.h
+++ b/ui/console-priv.h
@@ -35,7 +35,7 @@ struct QemuConsole {
 QTAILQ_ENTRY(QemuConsole) next;
 };
 
-void qemu_text_console_select(QemuTextConsole *c);
+void qemu_text_console_update_size(QemuTextConsole *c);
 const char * qemu_text_console_get_label(QemuTextConsole *c);
 void qemu_text_console_update_cursor(void);
 void qemu_text_console_handle_keysym(QemuTextConsole *s, int keysym);
diff --git a/ui/console-vc-stubs.c b/ui/console-vc-stubs.c
index 2afc52329f0c..b63e2fb2345f 100644
--- a/ui/console-vc-stubs.c
+++ b/ui/console-vc-stubs.c
@@ -10,7 +10,7 @@
 #include "chardev/char.h"
 #include "ui/console-priv.h"
 
-void qemu_text_console_select(QemuTextConsole *c)
+void qemu_text_console_update_size(QemuTextConsole *c)
 {
 }
 
diff --git a/ui/console-vc.c b/ui/console-vc.c
index f22c8e23c2ed..899fa11c9485 100644
--- a/ui/console-vc.c
+++ b/ui/console-vc.c
@@ -958,10 +958,9 @@ static void vc_chr_set_echo(Chardev *chr, bool echo)
 drv->console->echo = echo;
 }
 
-void qemu_text_console_select(QemuTextConsole *c)
+void qemu_text_console_update_size(QemuTextConsole *c)
 {
 dpy_text_resize(QEMU_CONSOLE(c), c->width, c->height);
-qemu_text_console_update_cursor();
 }
 
 static void vc_chr_open(Chardev *chr,
diff --git a/ui/console.c b/ui/console.c
index fbc1b9b8b57c..43226c5c1454 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -66,7 +66,6 @@ struct DisplayState {
 };
 
 static DisplayState *display_state;
-static QemuConsole *active_console;
 static QTAILQ_HEAD(, QemuConsole) consoles =
 QTAILQ_HEAD_INITIALIZER(consoles);
 
@@ -135,7 +134,6 @@ void graphic_hw_update_done(QemuConsole *con)
 void graphic_hw_update(QemuConsole *con)
 {
 bool async = false;
-con = con ? con : active_console;
 if (!con) {
 return;
 }
@@ -209,9 +207,6 @@ void qemu_console_set_window_id(QemuConsole *con, int 
window_id)
 
 void graphic_hw_invalidate(QemuConsole *con)
 {
-if (!con) {
-con = active_console;
-}
 if (con && con->hw_ops->invalidate) {
 con->hw_ops->invalidate(con->hw);
 }
@@ -219,9 +214,6 @@ void graphic_hw_invalidate(QemuConsole *con)
 
 void graphic_hw_text_update(QemuConsole *con, console_ch_t *chardata)
 {
-if (!con) {
-con = active_console;
-}
 if (con && con->hw_ops->text_update) {
 con->hw_ops->text_update(con->hw, chardata);
 }
@@ -265,12 +257,12 @@ static void dpy_gfx_update_texture(QemuConsole *con, 
DisplaySurface *surface,
 }
 
 static void displaychangelistener_display_console(DisplayChangeListener *dcl,
-  QemuConsole *con,
   Error **errp)
 {
 static const char nodev[] =
 "This VM has no graphic display device.";
 static DisplaySurface *dummy;
+QemuConsole *con = dcl->con;
 
 if (!con || !console_compatible_with(con, dcl, errp)) {
 if (!dummy) {
@@ -305,39 +297,8 @@ static void 
displaychangelistener_display_console(DisplayChangeListener *dcl,
 }
 }
 
-void console_select(unsigned int index)
-{
-DisplayChangeListener *dcl;
-QemuConsole *s;
-
-trace_console_select(index);
-s = qemu_console_lookup_by_index(index);
-if (s) {
-DisplayState *ds = s->ds;
-
-active_console = s;
-QLIST_FOREACH (dcl, >listeners, next) {
-if (dcl->con != NULL) {
-continue;
-}
-displaychangelistener_display_console(dcl, s, NULL);
-}
-
-if (QEMU_IS_TEXT_CONSOLE(s)) {
-qemu_text_console_select(QEMU_TEXT_CONSOLE(s));
-}
-}
-}
-
 void qemu_text_console_put_keysym(QemuTextConsole *s, int keysym)
 {
-if (!s) {
-if (!QEMU_IS_TEXT_CONSOLE(active_console)) {
-return;
-}
-s = QEMU_TEXT_CONSOLE(active_console);
-}
-
 

[PATCH v2 3/4] ui/cocoa: Do not use console_select()

2024-03-18 Thread Akihiko Odaki
ui/cocoa needs to update the UI info and reset the keyboard state
tracker when switching the console, or the new console will see the
stale UI info or keyboard state. Previously, updating the UI info was
done with cocoa_switch(), but it is meant to be called when the surface
is being replaced, and may be called even when not switching the
console. ui/cocoa never reset the keyboard state, which resulted in
stuck keys.

Add ui/cocoa's own implementation of console_select(), which updates the
UI info and resets the keyboard state tracker.

Signed-off-by: Akihiko Odaki 
---
 ui/cocoa.m | 37 ++---
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index fa879d7dcd4b..810751cf2644 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -102,6 +102,7 @@ static void cocoa_switch(DisplayChangeListener *dcl,
 static DisplayChangeListener dcl = {
 .ops = _ops,
 };
+static QKbdState *kbd;
 static int cursor_hide = 1;
 static int left_command_key_enabled = 1;
 static bool swap_opt_cmd;
@@ -309,7 +310,6 @@ @interface QemuCocoaView : NSView
 NSTrackingArea *trackingArea;
 QEMUScreen screen;
 pixman_image_t *pixman_image;
-QKbdState *kbd;
 BOOL isMouseGrabbed;
 BOOL isAbsoluteEnabled;
 CFMachPortRef eventsTap;
@@ -361,7 +361,6 @@ - (id)initWithFrame:(NSRect)frameRect
 
 screen.width = frameRect.size.width;
 screen.height = frameRect.size.height;
-kbd = qkbd_state_init(dcl.con);
 #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_14_0
 [self setClipsToBounds:YES];
 #endif
@@ -378,8 +377,6 @@ - (void) dealloc
 pixman_image_unref(pixman_image);
 }
 
-qkbd_state_free(kbd);
-
 if (eventsTap) {
 CFRelease(eventsTap);
 }
@@ -429,6 +426,20 @@ - (void) viewWillMoveToWindow:(NSWindow *)newWindow
 [self removeTrackingRect];
 }
 
+- (void) selectConsoleLocked:(unsigned int)index
+{
+QemuConsole *con = qemu_console_lookup_by_index(index);
+if (!con) {
+return;
+}
+
+unregister_displaychangelistener();
+qkbd_state_switch_console(kbd, con);
+dcl.con = con;
+register_displaychangelistener();
+[self updateUIInfo];
+}
+
 - (void) hideCursor
 {
 if (!cursor_hide) {
@@ -718,7 +729,8 @@ - (void) handleMonitorInput:(NSEvent *)event
 }
 
 if (keysym) {
-qemu_text_console_put_keysym(NULL, keysym);
+QemuTextConsole *con = QEMU_TEXT_CONSOLE(dcl.con);
+qemu_text_console_put_keysym(con, keysym);
 }
 }
 
@@ -898,7 +910,7 @@ - (bool) handleEventLocked:(NSEvent *)event
 
 // enable graphic console
 case '1' ... '9':
-console_select(key - '0' - 1); /* ascii math */
+[self selectConsoleLocked:key - '0' - 1]; /* ascii 
math */
 return true;
 
 // release the mouse grab
@@ -909,7 +921,7 @@ - (bool) handleEventLocked:(NSEvent *)event
 }
 }
 
-if (qemu_console_is_graphic(NULL)) {
+if (qemu_console_is_graphic(dcl.con)) {
 qkbd_state_key_event(kbd, keycode, true);
 } else {
 [self handleMonitorInput: event];
@@ -924,7 +936,7 @@ - (bool) handleEventLocked:(NSEvent *)event
 return true;
 }
 
-if (qemu_console_is_graphic(NULL)) {
+if (qemu_console_is_graphic(dcl.con)) {
 qkbd_state_key_event(kbd, keycode, false);
 }
 return true;
@@ -1374,7 +1386,7 @@ - (void)toggleZoomInterpolation:(id) sender
 - (void)displayConsole:(id)sender
 {
 with_bql(^{
-console_select([sender tag]);
+[cocoaView selectConsoleLocked:[sender tag]];
 });
 }
 
@@ -1945,7 +1957,6 @@ static void cocoa_switch(DisplayChangeListener *dcl,
 pixman_image_ref(image);
 
 dispatch_async(dispatch_get_main_queue(), ^{
-[cocoaView updateUIInfo];
 [cocoaView switchSurface:image];
 });
 }
@@ -1955,7 +1966,7 @@ static void cocoa_refresh(DisplayChangeListener *dcl)
 NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
 
 COCOA_DEBUG("qemu_cocoa: cocoa_refresh\n");
-graphic_hw_update(NULL);
+graphic_hw_update(dcl->con);
 
 if (qemu_input_is_absolute(dcl->con)) {
 dispatch_async(dispatch_get_main_queue(), ^{
@@ -2039,8 +2050,12 @@ static void cocoa_display_init(DisplayState *ds, 
DisplayOptions *opts)
 add_console_menu_entries();
 addRemovableDevicesMenuItems();
 
+dcl.con = qemu_console_lookup_default();
+kbd = qkbd_state_init(dcl.con);
+
 // register vga output callbacks
 register_displaychangelistener();
+[cocoaView updateUIInfo];
 
 qemu_event_init(, false);
 cbowner = [[QemuCocoaPasteboardTypeOwner alloc] init];

-- 
2.44.0




[PATCH v2 0/4] ui/console: Remove console_select()

2024-03-18 Thread Akihiko Odaki
ui/console has a concept of "active" console; the active console is used
when NULL is set for DisplayListener::con, and console_select() updates
the active console state. However, the global nature of the state cause
odd behaviors, and replacing NULL with the active console also resulted
in extra code. Remove it to solve these problems.

The active console state is shared, so if there are two displays
referring to the active console, switching the console for one will also
affect the other. All displays that use the active console state,
namely cocoa, curses, and vnc, need to reset some of its state before
switching the console, and such a reset operation cannot be performed if
the console is switched by another display. This can result in stuck
keys, for example.

While the active console state is shared, displays other than cocoa,
curses, and vnc don't update the state. A chardev-vc inherits the
size of the active console, but it does not make sense for such a
display.

This series removes the shared "active" console state from ui/console.
curses, cocoa, and vnc will hold the reference to the console currently
shown with DisplayListener::con. This also eliminates the need to
replace NULL with the active console and save code.

Signed-off-by: Akihiko Odaki 
---
Changes in v2:
- Changed to fall back to a text console if there is no graphical
  console as previously done.
- Link to v1: 
https://lore.kernel.org/r/20240318-console-v1-0-f4efbfa71...@daynix.com

---
Akihiko Odaki (4):
  ui/vc: Do not inherit the size of active console
  ui/vnc: Do not use console_select()
  ui/cocoa: Do not use console_select()
  ui/curses: Do not use console_select()

 include/ui/console.h   |   2 +-
 include/ui/kbd-state.h |  11 
 ui/console-priv.h  |   2 +-
 ui/console-vc-stubs.c  |   2 +-
 ui/console-vc.c|   7 ++-
 ui/console.c   | 133 -
 ui/curses.c|  48 ++
 ui/kbd-state.c |   6 +++
 ui/vnc.c   |  14 --
 ui/cocoa.m |  37 ++
 10 files changed, 118 insertions(+), 144 deletions(-)
---
base-commit: ba49d760eb04630e7b15f423ebecf6c871b8f77b
change-id: 20240317-console-6744d4ab8ba6

Best regards,
-- 
Akihiko Odaki 




[PATCH v2 1/4] ui/vc: Do not inherit the size of active console

2024-03-18 Thread Akihiko Odaki
A chardev-vc used to inherit the size of a graphic console when its
size not explicitly specified, but it often did not make sense. If a
chardev-vc is instantiated during the startup, the active graphic
console has no content at the time, so it will have the size of graphic
console placeholder, which contains no useful information. It's better
to have the standard size of text console instead.

Signed-off-by: Akihiko Odaki 
Reviewed-by: Marc-André Lureau 
---
 ui/console-vc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui/console-vc.c b/ui/console-vc.c
index 9c13cc2981b0..f22c8e23c2ed 100644
--- a/ui/console-vc.c
+++ b/ui/console-vc.c
@@ -990,8 +990,8 @@ static void vc_chr_open(Chardev *chr,
 trace_console_txt_new(width, height);
 if (width == 0 || height == 0) {
 s = QEMU_TEXT_CONSOLE(object_new(TYPE_QEMU_TEXT_CONSOLE));
-width = qemu_console_get_width(NULL, 80 * FONT_WIDTH);
-height = qemu_console_get_height(NULL, 24 * FONT_HEIGHT);
+width = 80 * FONT_WIDTH;
+height = 24 * FONT_HEIGHT;
 } else {
 s = QEMU_TEXT_CONSOLE(object_new(TYPE_QEMU_FIXED_TEXT_CONSOLE));
 }

-- 
2.44.0




RE: [PATCH v1 04/11] vfio: Add HostIOMMUDevice handle into VFIODevice

2024-03-18 Thread Duan, Zhenzhong


>-Original Message-
>From: Eric Auger 
>Subject: Re: [PATCH v1 04/11] vfio: Add HostIOMMUDevice handle into
>VFIODevice
>
>
>
>On 2/28/24 04:58, Zhenzhong Duan wrote:
>> This handle points to either IOMMULegacyDevice or IOMMUFDDevice
>variant,
>> neither both.
>I would reword into:
>store an handle to the HostIOMMUDevice the VFIODevice is associated with
>. Its actual nature depends on the backend in use (VFIO or IOMMUFD).

More clear, thanks Eric, will use it.

Zhenzhong

>
>Thanks
>
>Eric
>>
>> Signed-off-by: Zhenzhong Duan 
>> ---
>>  include/hw/vfio/vfio-common.h | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>> index 8bfb9cbe94..b6676c9f79 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -130,6 +130,7 @@ typedef struct VFIODevice {
>>  OnOffAuto pre_copy_dirty_page_tracking;
>>  bool dirty_pages_supported;
>>  bool dirty_tracking;
>> +HostIOMMUDevice *base_hdev;
>>  int devid;
>>  IOMMUFDBackend *iommufd;
>>  } VFIODevice;



Re: [PATCH-for-9.1 v2 2/3] hw/display/pxa2xx_lcd: Set rotation angle using qemu_console_set_rotate

2024-03-18 Thread Akihiko Odaki

On 2024/03/18 20:31, Philippe Mathieu-Daudé wrote:

Signed-off-by: Philippe Mathieu-Daudé 


Reviewed-by: Akihiko Odaki 


---
  hw/display/pxa2xx_lcd.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/hw/display/pxa2xx_lcd.c b/hw/display/pxa2xx_lcd.c
index a9d0d981a0..7d03fa57d0 100644
--- a/hw/display/pxa2xx_lcd.c
+++ b/hw/display/pxa2xx_lcd.c
@@ -1439,6 +1439,7 @@ PXA2xxLCDState *pxa2xx_lcdc_init(MemoryRegion *sysmem,
  memory_region_add_subregion(sysmem, base, >iomem);
  
  s->con = graphic_console_init(NULL, 0, _ops, s);

+qemu_console_set_rotate(s->con, graphic_rotate);
  
  vmstate_register(NULL, 0, _pxa2xx_lcdc, s);
  




Re: [PATCH-for-9.1 v2 3/3] ui/console: Add 'rotate_arcdegree' field to allow per-console rotation

2024-03-18 Thread Akihiko Odaki

On 2024/03/18 20:31, Philippe Mathieu-Daudé wrote:

Add the 'rotate_arcdegree' field to QemuGraphicConsole and
remove the use of the 'graphic_rotate' global.

Signed-off-by: Philippe Mathieu-Daudé 


Reviewed-by: Akihiko Odaki 


---
  ui/console.c | 15 +++
  1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/ui/console.c b/ui/console.c
index 84aee76846..94624c37ae 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -37,7 +37,6 @@
  #include "trace.h"
  #include "exec/memory.h"
  #include "qom/object.h"
-#include "sysemu/sysemu.h"
  
  #include "console-priv.h"
  
@@ -51,6 +50,8 @@ typedef struct QemuGraphicConsole {
  
  QEMUCursor *cursor;

  int cursor_x, cursor_y, cursor_on;
+
+unsigned rotate_arcdegree;
  } QemuGraphicConsole;
  
  typedef QemuConsoleClass QemuGraphicConsoleClass;

@@ -210,17 +211,23 @@ void qemu_console_set_window_id(QemuConsole *con, int 
window_id)
  
  void qemu_console_set_rotate(QemuConsole *con, unsigned arcdegree)

  {
-graphic_rotate = arcdegree;
+QemuGraphicConsole *gc = QEMU_GRAPHIC_CONSOLE(con);
+
+gc->rotate_arcdegree = arcdegree;
  }
  
  bool qemu_console_is_rotated(QemuConsole *con)

  {
-return graphic_rotate != 0;
+QemuGraphicConsole *gc = QEMU_GRAPHIC_CONSOLE(con);
+
+return gc->rotate_arcdegree != 0;
  }
  
  unsigned qemu_console_get_rotate_arcdegree(QemuConsole *con)

  {
-return graphic_rotate;
+QemuGraphicConsole *gc = QEMU_GRAPHIC_CONSOLE(con);
+
+return gc->rotate_arcdegree;
  }
  
  void graphic_hw_invalidate(QemuConsole *con)




Re: [PATCH-for-9.1 v2 1/3] ui/console: Introduce API to change console orientation

2024-03-18 Thread Akihiko Odaki

On 2024/03/18 20:31, Philippe Mathieu-Daudé wrote:

Extract the following methods:

   - qemu_console_set_rotate()
   - qemu_console_is_rotated()
   - qemu_console_get_rotate_arcdegree()

Signed-off-by: Philippe Mathieu-Daudé 


Reviewed-by: Akihiko Odaki 


---
  include/ui/console.h |  3 +++
  ui/console.c | 16 
  ui/input.c   |  9 -
  3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index a4a49ffc64..86ba36e391 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -422,15 +422,18 @@ bool qemu_console_is_visible(QemuConsole *con);
  bool qemu_console_is_graphic(QemuConsole *con);
  bool qemu_console_is_fixedsize(QemuConsole *con);
  bool qemu_console_is_gl_blocked(QemuConsole *con);
+bool qemu_console_is_rotated(QemuConsole *con);
  char *qemu_console_get_label(QemuConsole *con);
  int qemu_console_get_index(QemuConsole *con);
  uint32_t qemu_console_get_head(QemuConsole *con);
  int qemu_console_get_width(QemuConsole *con, int fallback);
  int qemu_console_get_height(QemuConsole *con, int fallback);
+unsigned qemu_console_get_rotate_arcdegree(QemuConsole *con);
  /* Return the low-level window id for the console */
  int qemu_console_get_window_id(QemuConsole *con);
  /* Set the low-level window id for the console */
  void qemu_console_set_window_id(QemuConsole *con, int window_id);
+void qemu_console_set_rotate(QemuConsole *con, unsigned arcdegree);
  
  void console_select(unsigned int index);

  void qemu_console_resize(QemuConsole *con, int width, int height);
diff --git a/ui/console.c b/ui/console.c
index 832055675c..84aee76846 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -37,6 +37,7 @@
  #include "trace.h"
  #include "exec/memory.h"
  #include "qom/object.h"
+#include "sysemu/sysemu.h"
  
  #include "console-priv.h"
  
@@ -207,6 +208,21 @@ void qemu_console_set_window_id(QemuConsole *con, int window_id)

  con->window_id = window_id;
  }
  
+void qemu_console_set_rotate(QemuConsole *con, unsigned arcdegree)

+{
+graphic_rotate = arcdegree;
+}
+
+bool qemu_console_is_rotated(QemuConsole *con)
+{
+return graphic_rotate != 0;
+}
+
+unsigned qemu_console_get_rotate_arcdegree(QemuConsole *con)
+{
+return graphic_rotate;
+}
+
  void graphic_hw_invalidate(QemuConsole *con)
  {
  if (!con) {
diff --git a/ui/input.c b/ui/input.c
index dc745860f4..951806bf05 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -1,5 +1,4 @@
  #include "qemu/osdep.h"
-#include "sysemu/sysemu.h"
  #include "qapi/error.h"
  #include "qapi/qapi-commands-ui.h"
  #include "trace.h"
@@ -179,10 +178,10 @@ static int qemu_input_transform_invert_abs_value(int 
value)
return (int64_t)INPUT_EVENT_ABS_MAX - value + INPUT_EVENT_ABS_MIN;
  }
  
-static void qemu_input_transform_abs_rotate(InputEvent *evt)

+static void qemu_input_transform_abs_rotate(QemuConsole *src, InputEvent *evt)
  {
  InputMoveEvent *move = evt->u.abs.data;
-switch (graphic_rotate) {
+switch (qemu_console_get_rotate_arcdegree(src)) {
  case 90:
  if (move->axis == INPUT_AXIS_X) {
  move->axis = INPUT_AXIS_Y;
@@ -341,8 +340,8 @@ void qemu_input_event_send_impl(QemuConsole *src, 
InputEvent *evt)
  qemu_input_event_trace(src, evt);
  
  /* pre processing */

-if (graphic_rotate && (evt->type == INPUT_EVENT_KIND_ABS)) {
-qemu_input_transform_abs_rotate(evt);
+if (qemu_console_is_rotated(src) && (evt->type == INPUT_EVENT_KIND_ABS)) {
+qemu_input_transform_abs_rotate(src, evt);
  }
  
  /* send event */




Re: [PATCH v5 15/65] i386/tdx: Get tdx_capabilities via KVM_TDX_CAPABILITIES

2024-03-18 Thread Wang, Lei
On 2/29/2024 14:36, Xiaoyao Li wrote:> KVM provides TDX capabilities via sub
command KVM_TDX_CAPABILITIES of
> IOCTL(KVM_MEMORY_ENCRYPT_OP). Get the capabilities when initializing
> TDX context. It will be used to validate user's setting later.
> 
> Since there is no interface reporting how many cpuid configs contains in
> KVM_TDX_CAPABILITIES, QEMU chooses to try starting with a known number
> and abort when it exceeds KVM_MAX_CPUID_ENTRIES.
> 
> Besides, introduce the interfaces to invoke TDX "ioctls" at different
> scope (KVM, VM and VCPU) in preparation.

tdx_platform_ioctl() is dropped because no user so suggest rephrasing this
statement because no KVM scope ioctl interface is introduced in this patch.

> 
> Signed-off-by: Xiaoyao Li 
> ---
> Changes in v4:
> - use {} to initialize struct kvm_tdx_cmd, to avoid memset();
> - remove tdx_platform_ioctl() because no user;
> 
> Changes in v3:
> - rename __tdx_ioctl() to tdx_ioctl_internal()
> - Pass errp in get_tdx_capabilities();
> 
> changes in v2:
>   - Make the error message more clear;
> 
> changes in v1:
>   - start from nr_cpuid_configs = 6 for the loop;
>   - stop the loop when nr_cpuid_configs exceeds KVM_MAX_CPUID_ENTRIES;
> ---
>  target/i386/kvm/kvm.c  |  2 -
>  target/i386/kvm/kvm_i386.h |  2 +
>  target/i386/kvm/tdx.c  | 91 +-
>  3 files changed, 92 insertions(+), 3 deletions(-)
> 
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 52d99d30bdc8..0e68e80f4291 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1685,8 +1685,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
>  
>  static Error *invtsc_mig_blocker;
>  
> -#define KVM_MAX_CPUID_ENTRIES  100
> -
>  static void kvm_init_xsave(CPUX86State *env)
>  {
>  if (has_xsave2) {
> diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
> index 55fb25fa8e2e..c3ef46a97a7b 100644
> --- a/target/i386/kvm/kvm_i386.h
> +++ b/target/i386/kvm/kvm_i386.h
> @@ -13,6 +13,8 @@
>  
>  #include "sysemu/kvm.h"
>  
> +#define KVM_MAX_CPUID_ENTRIES  100
> +
>  #ifdef CONFIG_KVM
>  
>  #define kvm_pit_in_kernel() \
> diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
> index d9a1dd46dc69..2b956450a083 100644
> --- a/target/i386/kvm/tdx.c
> +++ b/target/i386/kvm/tdx.c
> @@ -12,18 +12,107 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/error-report.h"
> +#include "qapi/error.h"
>  #include "qom/object_interfaces.h"
> +#include "sysemu/kvm.h"
>  
>  #include "hw/i386/x86.h"
> +#include "kvm_i386.h"
>  #include "tdx.h"
>  
> +static struct kvm_tdx_capabilities *tdx_caps;
> +
> +enum tdx_ioctl_level{
> +TDX_VM_IOCTL,
> +TDX_VCPU_IOCTL,
> +};
> +
> +static int tdx_ioctl_internal(void *state, enum tdx_ioctl_level level, int 
> cmd_id,
> +__u32 flags, void *data)
> +{
> +struct kvm_tdx_cmd tdx_cmd = {};
> +int r;
> +
> +tdx_cmd.id = cmd_id;
> +tdx_cmd.flags = flags;
> +tdx_cmd.data = (__u64)(unsigned long)data;
> +
> +switch (level) {
> +case TDX_VM_IOCTL:
> +r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, _cmd);
> +break;
> +case TDX_VCPU_IOCTL:
> +r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, _cmd);
> +break;
> +default:
> +error_report("Invalid tdx_ioctl_level %d", level);
> +exit(1);
> +}
> +
> +return r;
> +}
> +
> +static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data)
> +{
> +return tdx_ioctl_internal(NULL, TDX_VM_IOCTL, cmd_id, flags, data);
> +}
> +
> +static inline int tdx_vcpu_ioctl(void *vcpu_fd, int cmd_id, __u32 flags,
> + void *data)
> +{
> +return  tdx_ioctl_internal(vcpu_fd, TDX_VCPU_IOCTL, cmd_id, flags, data);
> +}
> +
> +static int get_tdx_capabilities(Error **errp)
> +{
> +struct kvm_tdx_capabilities *caps;
> +/* 1st generation of TDX reports 6 cpuid configs */
> +int nr_cpuid_configs = 6;
> +size_t size;
> +int r;
> +
> +do {
> +size = sizeof(struct kvm_tdx_capabilities) +
> +   nr_cpuid_configs * sizeof(struct kvm_tdx_cpuid_config);
> +caps = g_malloc0(size);
> +caps->nr_cpuid_configs = nr_cpuid_configs;
> +
> +r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps);
> +if (r == -E2BIG) {
> +g_free(caps);
> +nr_cpuid_configs *= 2;
> +if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
> +error_setg(errp, "%s: KVM TDX seems broken that number of 
> CPUID "
> +   "entries in kvm_tdx_capabilities exceeds limit 
> %d",
> +   __func__, KVM_MAX_CPUID_ENTRIES);
> +return r;
> +}
> +} else if (r < 0) {
> +g_free(caps);
> +error_setg_errno(errp, -r, "%s: KVM_TDX_CAPABILITIES failed", 
> __func__);
> +return r;
> +}
> +}
> +while (r == -E2BIG);
> +
> +tdx_caps 

[PATCH] hw/loongarch: Refine default numa id calculation

2024-03-18 Thread Bibo Mao
With numa_test test case, there is subcase named test_def_cpu_split(),
there are 8 sockets and 2 numa nodes. Here is command line:
"-machine smp.cpus=8,smp.sockets=8 -numa node,memdev=ram -numa node"

The required result is:
  node 0 cpus: 0 2 4 6
  node 1 cpus: 1 3 5 7
Test case numa_test fails on LoongArch, since the actual result is:
  node 0 cpus: 0 1 2 3
  node 1 cpus: 4 5 6 7

It will be better if all the cpus in one socket share the same numa
node. Here socket id is used to calculate numa id in function
virt_get_default_cpu_node_id().

Signed-off-by: Bibo Mao 
---
 hw/loongarch/virt.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index deb3750d81..29885f6777 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -1219,15 +1219,14 @@ virt_cpu_index_to_props(MachineState *ms, unsigned 
cpu_index)
 
 static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
 {
-int64_t nidx = 0;
+int64_t socket_id;
 
 if (ms->numa_state->num_nodes) {
-nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes);
-if (ms->numa_state->num_nodes <= nidx) {
-nidx = ms->numa_state->num_nodes - 1;
-}
+socket_id = ms->possible_cpus->cpus[idx].props.socket_id;
+return socket_id % ms->numa_state->num_nodes;
+} else {
+return 0;
 }
-return nidx;
 }
 
 static void loongarch_class_init(ObjectClass *oc, void *data)
-- 
2.39.3




[PATCH] monitor/hmp-cmds-target.c: append a space in error message in gpa2hva()

2024-03-18 Thread Shiyang Ruan via
From: Yao Xingtao 

In qemu monitor mode, when we use gpa2hva command to print the host
virtual address corresponding to a guest physical address, if the gpa is
not in RAM, the error message is below:

(qemu) gpa2hva 0x75000
Memory at address 0x75000is not RAM

a space is missed between '0x75000' and 'is'.

Signed-off-by: Yao Xingtao 
---
 monitor/hmp-cmds-target.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monitor/hmp-cmds-target.c b/monitor/hmp-cmds-target.c
index 9338ae8440..ff01cf9d8d 100644
--- a/monitor/hmp-cmds-target.c
+++ b/monitor/hmp-cmds-target.c
@@ -261,7 +261,7 @@ void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, uint64_t 
size, Error **errp)
 }
 
 if (!memory_region_is_ram(mrs.mr) && !memory_region_is_romd(mrs.mr)) {
-error_setg(errp, "Memory at address 0x%" HWADDR_PRIx "is not RAM", 
addr);
+error_setg(errp, "Memory at address 0x%" HWADDR_PRIx " is not RAM", 
addr);
 memory_region_unref(mrs.mr);
 return NULL;
 }
-- 
2.27.0




Re: [PATCH v5 08/65] kvm: handle KVM_EXIT_MEMORY_FAULT

2024-03-18 Thread Wang, Lei
On 2/29/2024 14:36, Xiaoyao Li wrote:
> From: Chao Peng 
> 
> When geeting KVM_EXIT_MEMORY_FAULT exit, it indicates userspace needs to
> do the memory conversion on the RAMBlock to turn the memory into desired
> attribute, i.e., private/shared.
> 
> Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when
> KVM_EXIT_MEMORY_FAULT happens.
> 
> Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has
> guest_memfd memory backend.
> 
> Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is
> added.
> 
> When page is converted from shared to private, the original shared
> memory can be discarded via ram_block_discard_range(). Note, shared
> memory can be discarded only when it's not back'ed by hugetlb because
> hugetlb is supposed to be pre-allocated and no need for discarding.
> 
> Signed-off-by: Chao Peng 
> Co-developed-by: Xiaoyao Li 
> Signed-off-by: Xiaoyao Li 
> 
> ---
> Changes in v4:
> - open-coded ram_block_discard logic;
> - change warn_report() to error_report(); (Daniel)
> ---
>  accel/kvm/kvm-all.c | 94 -
>  1 file changed, 84 insertions(+), 10 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 70d482a2c936..87e4275932a7 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -2903,6 +2903,68 @@ static void kvm_eat_signals(CPUState *cpu)
>  } while (sigismember(, SIG_IPI));
>  }
>  
> +static int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
> +{
> +MemoryRegionSection section;
> +ram_addr_t offset;
> +MemoryRegion *mr;
> +RAMBlock *rb;
> +void *addr;
> +int ret = -1;
> +
> +if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
> +!QEMU_PTR_IS_ALIGNED(size, qemu_host_page_size)) {
> +return -1;
> +}
> +
> +if (!size) {
> +return -1;
> +}
> +
> +section = memory_region_find(get_system_memory(), start, size);
> +mr = section.mr;
> +if (!mr) {
> +return -1;
> +}
> +
> +if (memory_region_has_guest_memfd(mr)) {
> +if (to_private) {
> +ret = kvm_set_memory_attributes_private(start, size);
> +} else {
> +ret = kvm_set_memory_attributes_shared(start, size);
> +}
> +
> +if (ret) {
> +memory_region_unref(section.mr);
> +return ret;
> +}
> +
> +addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
> +rb = qemu_ram_block_from_host(addr, false, );
> +
> +if (to_private) {
> +if (rb->page_size != qemu_host_page_size) {
> +/*
> +* shared memory is back'ed by  hugetlb, which is supposed to 
> be
> +* pre-allocated and doesn't need to be discarded
> +*/

Nit: comment indentation is broken here.

> +return 0;
> +} else {
> +ret = ram_block_discard_range(rb, offset, size);
> +}
> +} else {
> +ret = ram_block_discard_guest_memfd_range(rb, offset, size);
> +}
> +} else {
> +error_report("Convert non guest_memfd backed memory region "
> +"(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",

Same as above.

> +start, size, to_private ? "private" : "shared");
> +}
> +
> +memory_region_unref(section.mr);
> +return ret;
> +}
> +
>  int kvm_cpu_exec(CPUState *cpu)
>  {
>  struct kvm_run *run = cpu->kvm_run;
> @@ -2970,18 +3032,20 @@ int kvm_cpu_exec(CPUState *cpu)
>  ret = EXCP_INTERRUPT;
>  break;
>  }
> -fprintf(stderr, "error: kvm run failed %s\n",
> -strerror(-run_ret));
> +if (!(run_ret == -EFAULT && run->exit_reason == 
> KVM_EXIT_MEMORY_FAULT)) {
> +fprintf(stderr, "error: kvm run failed %s\n",
> +strerror(-run_ret));
>  #ifdef TARGET_PPC
> -if (run_ret == -EBUSY) {
> -fprintf(stderr,
> -"This is probably because your SMT is enabled.\n"
> -"VCPU can only run on primary threads with all "
> -"secondary threads offline.\n");
> -}
> +if (run_ret == -EBUSY) {
> +fprintf(stderr,
> +"This is probably because your SMT is enabled.\n"
> +"VCPU can only run on primary threads with all "
> +"secondary threads offline.\n");
> +}
>  #endif
> -ret = -1;
> -break;
> +ret = -1;
> +break;
> +}
>  }
>  
>  trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
> @@ -3064,6 +3128,16 @@ int kvm_cpu_exec(CPUState *cpu)
>  break;
>  }
>  break;
> +case 

Re: [PATCH 2/4] i386/sev: Switch to use confidential_guest_kvm_init()

2024-03-18 Thread Xiaoyao Li

On 3/19/2024 5:51 AM, Paolo Bonzini wrote:

On Thu, Feb 29, 2024 at 7:01 AM Xiaoyao Li  wrote:


Use confidential_guest_kvm_init() instead of calling SEV specific
sev_kvm_init(). As a bouns, it fits to future TDX when TDX implements
its own confidential_guest_support and .kvm_init().

Move the "TypeInfo sev_guest_info" definition and related functions to
the end of the file, to avoid declaring the sev_kvm_init() ahead.

Delete the sve-stub.c since it's not needed anymore.

Signed-off-by: Xiaoyao Li 
---
Changes from rfc v1:
- check ms->cgs not NULL before calling confidential_guest_kvm_init();
- delete the sev-stub.c;


Queued, with just one small simplification that can be done on top:


thank you, Paolo!


diff --git a/target/i386/sev.c b/target/i386/sev.c
index e89d64fa52..b8f79d34d1 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -851,18 +851,13 @@ sev_vm_state_change(void *opaque, bool running,
RunState state)

  static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
  {
-SevGuestState *sev
-= (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
+SevGuestState *sev = SEV_GUEST(cgs);
  char *devname;
  int ret, fw_error, cmd;
  uint32_t ebx;
  uint32_t host_cbitpos;
  struct sev_user_data_status status = {};

-if (!sev) {
-return 0;
-}
-
  ret = ram_block_discard_disable(true);
  if (ret) {
  error_report("%s: cannot disable RAM discard", __func__);


It looks good.


Thanks!

Paolo





Re: [PATCH v5 06/65] kvm: Introduce support for memory_attributes

2024-03-18 Thread Wang, Lei
On 2/29/2024 14:36, Xiaoyao Li wrote:> Introduce the helper functions to set 
the attributes of a range of
> memory to private or shared.
> 
> This is necessary to notify KVM the private/shared attribute of each gpa
> range. KVM needs the information to decide the GPA needs to be mapped at
> hva-based shared memory or guest_memfd based private memory.
> 
> Signed-off-by: Xiaoyao Li 
> ---
> Changes in v4:
> - move the check of kvm_supported_memory_attributes to the common
>   kvm_set_memory_attributes(); (Wang Wei)
> - change warn_report() to error_report() in kvm_set_memory_attributes()
>   and drop the __func__; (Daniel)
> ---
>  accel/kvm/kvm-all.c  | 44 
>  include/sysemu/kvm.h |  3 +++
>  2 files changed, 47 insertions(+)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index cd0aa7545a1f..70d482a2c936 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -92,6 +92,7 @@ static bool kvm_has_guest_debug;
>  static int kvm_sstep_flags;
>  static bool kvm_immediate_exit;
>  static bool kvm_guest_memfd_supported;
> +static uint64_t kvm_supported_memory_attributes;
>  static hwaddr kvm_max_slot_size = ~0;
>  
>  static const KVMCapabilityInfo kvm_required_capabilites[] = {
> @@ -1304,6 +1305,46 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
>  kvm_max_slot_size = max_slot_size;
>  }
>  
> +static int kvm_set_memory_attributes(hwaddr start, hwaddr size, uint64_t 
> attr)
> +{
> +struct kvm_memory_attributes attrs;
> +int r;
> +
> +if (kvm_supported_memory_attributes == 0) {
> +error_report("No memory attribute supported by KVM\n");
> +return -EINVAL;
> +}
> +
> +if ((attr & kvm_supported_memory_attributes) != attr) {
> +error_report("memory attribute 0x%lx not supported by KVM,"
> + " supported bits are 0x%lx\n",
> + attr, kvm_supported_memory_attributes);
> +return -EINVAL;
> +}
> +
> +attrs.attributes = attr;
> +attrs.address = start;
> +attrs.size = size;
> +attrs.flags = 0;
> +
> +r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, );
> +if (r) {
> +error_report("failed to set memory (0x%lx+%#zx) with attr 0x%lx 
> error '%s'",
> + start, size, attr, strerror(errno));
> +}
> +return r;
> +}
> +
> +int kvm_set_memory_attributes_private(hwaddr start, hwaddr size)
> +{
> +return kvm_set_memory_attributes(start, size, 
> KVM_MEMORY_ATTRIBUTE_PRIVATE);
> +}
> +
> +int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size)
> +{
> +return kvm_set_memory_attributes(start, size, 0);
> +}
> +
>  /* Called with KVMMemoryListener.slots_lock held */
>  static void kvm_set_phys_mem(KVMMemoryListener *kml,
>   MemoryRegionSection *section, bool add)
> @@ -2439,6 +2480,9 @@ static int kvm_init(MachineState *ms)
>  
>  kvm_guest_memfd_supported = kvm_check_extension(s, KVM_CAP_GUEST_MEMFD);
>  
> +ret = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
> +kvm_supported_memory_attributes = ret > 0 ? ret : 0;

kvm_check_extension() only returns non-negative value, so we can just

kvm_supported_memory_attributes = kvm_check_extension(s, 
KVM_CAP_MEMORY_ATTRIBUTES);

> +
>  if (object_property_find(OBJECT(current_machine), "kvm-type")) {
>  g_autofree char *kvm_type = 
> object_property_get_str(OBJECT(current_machine),
>  "kvm-type",
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index 6cdf82de8372..8e83adfbbd19 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -546,4 +546,7 @@ uint32_t kvm_dirty_ring_size(void);
>  bool kvm_hwpoisoned_mem(void);
>  
>  int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
> +
> +int kvm_set_memory_attributes_private(hwaddr start, hwaddr size);
> +int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size);
>  #endif



Re: [PATCH for 9.0 v15 02/10] trans_rvv.c.inc: set vstart = 0 in int scalar move insns

2024-03-18 Thread LIU Zhiwei



On 2024/3/15 1:56, Daniel Henrique Barboza wrote:

trans_vmv_x_s, trans_vmv_s_x, trans_vfmv_f_s and trans_vfmv_s_f aren't
setting vstart = 0 after execution. This is usually done by a helper in
vector_helper.c but these functions don't use helpers.

We'll set vstart after any potential 'over' brconds, and that will also
mandate a mark_vs_dirty() too.

Fixes: dedc53cbc9 ("target/riscv: rvv-1.0: integer scalar move instructions")
Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Richard Henderson 


Reviewd-by: LIU Zhiwei 

Zhiwei


---
  target/riscv/insn_trans/trans_rvv.c.inc | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index e42728990e..8c16a9f5b3 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3373,6 +3373,8 @@ static bool trans_vmv_x_s(DisasContext *s, arg_vmv_x_s *a)
  vec_element_loadi(s, t1, a->rs2, 0, true);
  tcg_gen_trunc_i64_tl(dest, t1);
  gen_set_gpr(s, a->rd, dest);
+tcg_gen_movi_tl(cpu_vstart, 0);
+mark_vs_dirty(s);
  return true;
  }
  return false;
@@ -3399,8 +3401,9 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
  s1 = get_gpr(s, a->rs1, EXT_NONE);
  tcg_gen_ext_tl_i64(t1, s1);
  vec_element_storei(s, a->rd, 0, t1);
-mark_vs_dirty(s);
  gen_set_label(over);
+tcg_gen_movi_tl(cpu_vstart, 0);
+mark_vs_dirty(s);
  return true;
  }
  return false;
@@ -3427,6 +3430,8 @@ static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s 
*a)
  }
  
  mark_fs_dirty(s);

+tcg_gen_movi_tl(cpu_vstart, 0);
+mark_vs_dirty(s);
  return true;
  }
  return false;
@@ -3452,8 +3457,9 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f 
*a)
  do_nanbox(s, t1, cpu_fpr[a->rs1]);
  
  vec_element_storei(s, a->rd, 0, t1);

-mark_vs_dirty(s);
  gen_set_label(over);
+tcg_gen_movi_tl(cpu_vstart, 0);
+mark_vs_dirty(s);
  return true;
  }
  return false;




Re: [PATCH for 9.0 v15 01/10] target/riscv/vector_helper.c: set vstart = 0 in GEN_VEXT_VSLIDEUP_VX()

2024-03-18 Thread LIU Zhiwei



On 2024/3/15 1:56, Daniel Henrique Barboza wrote:

The helper isn't setting env->vstart = 0 after its execution, as it is
expected from every vector instruction that completes successfully.

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 


Reviewed-by: LIU Zhiwei 

Zhiwei


---
  target/riscv/vector_helper.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index fe56c007d5..ca79571ae2 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4781,6 +4781,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2, \
  } \
  *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));  \
  } \
+env->vstart = 0;  \
  /* set tail elements to 1s */ \
  vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
  }




Re: [PATCH v2 1/1] cxl/mem: Fix for the index of Clear Event Record Handle

2024-03-18 Thread Dan Williams
Jonathan Cameron wrote:
> On Mon, 18 Mar 2024 10:29:28 +0800
> Yuquan Wang  wrote:
> 
> > The dev_dbg info for Clear Event Records mailbox command would report
> > the handle of the next record to clear not the current one.
> > 
> > This was because the index 'i' had incremented before printing the
> > current handle value.
> > 
> > Signed-off-by: Yuquan Wang 
> > ---
> >  drivers/cxl/core/mbox.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 9adda4795eb7..b810a6aa3010 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct 
> > cxl_memdev_state *mds,
> >  
> > payload->handles[i++] = gen->hdr.handle;
> > dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
> > -   le16_to_cpu(payload->handles[i]));
> > +   le16_to_cpu(payload->handles[i-1]));
> Trivial but needs spaces around the -. e.g.  [i - 1] 
> 
> Maybe Dan can fix up whilst applying.
> 
> Otherwise
> 
> Reviewed-by: Jonathan Cameron 

I have enlisted Dave to start wrangling CXL kernel patches upstream, and
I will fall back to just reviewing.

Dave, you can add my:

Reviewed-by: Dan Williams 

...with the same caveat as above.



Re: [PULL 0/4] machine development tool

2024-03-18 Thread Peter Xu
On Mon, Mar 18, 2024 at 08:08:29PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 08.03.24 06:47, Peter Xu wrote:
> > On Thu, Mar 07, 2024 at 12:06:59PM +0300, Maksim Davydov wrote:
> > > 
> > > On 3/6/24 04:57, Peter Xu wrote:
> > > > On Tue, Mar 05, 2024 at 03:43:41PM +0100, Markus Armbruster wrote:
> > > > > Peter Maydell  writes:
> > > > > 
> > > > > > On Mon, 4 Mar 2024 at 13:52, Maksim 
> > > > > > Davydov  wrote:
> > > > > > > The following changes since commit 
> > > > > > > e1007b6bab5cf97705bf4f2aaec1f607787355b8:
> > > > > > > 
> > > > > > > Merge tag 'pull-request-2024-03-01' 
> > > > > > > ofhttps://gitlab.com/thuth/qemu  into staging (2024-03-01 
> > > > > > > 10:14:32 +)
> > > > > > > 
> > > > > > > are available in the Git repository at:
> > > > > > > 
> > > > > > > https://gitlab.com/davydov-max/qemu.git  
> > > > > > > tags/pull-compare-mt-2024-03-04
> > > > > > > 
> > > > > > > for you to fetch changes up to 
> > > > > > > 7693a2e8518811a907d73a85807ee71dac8fabcb:
> > > > > > > 
> > > > > > > scripts: add script to compare compatibility properties 
> > > > > > > (2024-03-04 14:10:53 +0300)
> > > > > > > 
> > > > > > > 
> > > > > > > Please note. This is the first pull request from me.
> > > > > > > My public GPG key is available here
> > > > > > > https://keys.openpgp.org/vks/v1/by-fingerprint/CDB5BEEF8837142579F5CDFE8E927E10F72F78D4
> > > > > > > 
> > > > > > > 
> > > > > > > scripts: add a new script for machine development
> > > > > > > 
> > > > > > > 
> > > > > > Hi; I would prefer this to go through some existing submaintainer
> > > > > > tree if possible, please.
> > > > > Migration?  QOM?  Not sure.  Cc'ing the maintainers anyway.
> > > > Yeah this seems like migration relevant.. however now I'm slightly 
> > > > confused
> > > > on when this script should be useful.
> > > > 
> > > > According to:
> > > > 
> > > > https://lore.kernel.org/qemu-devel/20240222153912.646053-5-davydov-...@yandex-team.ru/
> > > > 
> > > >   This script runs QEMU to obtain compat_props of machines and
> > > >   default values of different types of drivers to produce 
> > > > comparison
> > > >   table. This table can be used to compare machine types to 
> > > > choose
> > > >   the most suitable machine or compare binaries to be sure that
> > > >   migration to the newer version will save all device
> > > >   properties. Also the json or csv format of this table can be 
> > > > used
> > > >   to check does a new machine affect the previous ones by 
> > > > comparing
> > > >   tables with and without the new machine.
> > > > 
> > > > In regards to "choose the most suitable machine": why do you need to 
> > > > choose
> > > > a machine?
> > > > 
> > > > If it's pretty standalone setup, shouldn't we always try to use the 
> > > > latest
> > > > machine type if possible (as normally compat props are only used to keep
> > > > compatible with old machine types, and the default should always be
> > > > preferred). If it's a cluster setup, IMHO it should depend on the oldest
> > > > QEMU version that plans to be supported.  I don't see how such 
> > > > comparison
> > > > helps yet in either of the cases..
> > > > 
> > > > In regards to "compare binaries to be sure that migration to the newer
> > > > version will save all device properties": do we even support migrating
> > > > _between_ machine types??
> > > > 
> > > > Sololy relying on compat properties to detect device compatibility is 
> > > > also
> > > > not reliable.  For example, see VMStateField.field_exists() or 
> > > > similarly,
> > > > VMStateDescription.needed(), which are hooks that device can provide to
> > > > dynamically decide what device state to be saved/loaded.  Such things 
> > > > are
> > > > not reflected in compat properties, so even if compat properties of all
> > > > devices are the same between two machine types, it may not mean that the
> > > > migration stream will always be compatible.
> > > > 
> > > > Thanks,
> > > 
> > > In fact, the last commit describes the meaning of this series best. 
> > > Perhaps
> > > it should have been moved to the cover letter:
> > > Often, many teams have their own "machines" inherited from "upstream" to
> > > manage default values of devices. This is done because of the limitations
> > > imposed by the compatibility requirements or the desire to help their
> > > customers better configure their devices. And since machine type has
> > > a hard-to-read structure, it is very easy to make a mistake when
> > > transferring
> > > default values from one machine to another. For example, when some 
> > > property
> > > is set for the entire abstract class x86_64-cpu (which will be applied to
> > > all
> > > models), and then rolled back 

Re: [PATCH v4 2/2] vhost: Perform memory section dirty scans once per iteration

2024-03-18 Thread Si-Wei Liu




On 3/17/2024 8:22 PM, Jason Wang wrote:

On Sat, Mar 16, 2024 at 2:45 AM Si-Wei Liu  wrote:



On 3/14/2024 9:03 PM, Jason Wang wrote:

On Fri, Mar 15, 2024 at 5:39 AM Si-Wei Liu  wrote:

On setups with one or more virtio-net devices with vhost on,
dirty tracking iteration increases cost the bigger the number
amount of queues are set up e.g. on idle guests migration the
following is observed with virtio-net with vhost=on:

48 queues -> 78.11%  [.] vhost_dev_sync_region.isra.13
8 queues -> 40.50%   [.] vhost_dev_sync_region.isra.13
1 queue -> 6.89% [.] vhost_dev_sync_region.isra.13
2 devices, 1 queue -> 18.60%  [.] vhost_dev_sync_region.isra.14

With high memory rates the symptom is lack of convergence as soon
as it has a vhost device with a sufficiently high number of queues,
the sufficient number of vhost devices.

On every migration iteration (every 100msecs) it will redundantly
query the *shared log* the number of queues configured with vhost
that exist in the guest. For the virtqueue data, this is necessary,
but not for the memory sections which are the same. So essentially
we end up scanning the dirty log too often.

To fix that, select a vhost device responsible for scanning the
log with regards to memory sections dirty tracking. It is selected
when we enable the logger (during migration) and cleared when we
disable the logger. If the vhost logger device goes away for some
reason, the logger will be re-selected from the rest of vhost
devices.

After making mem-section logger a singleton instance, constant cost
of 7%-9% (like the 1 queue report) will be seen, no matter how many
queues or how many vhost devices are configured:

48 queues -> 8.71%[.] vhost_dev_sync_region.isra.13
2 devices, 8 queues -> 7.97%   [.] vhost_dev_sync_region.isra.14

Co-developed-by: Joao Martins 
Signed-off-by: Joao Martins 
Signed-off-by: Si-Wei Liu 

---
v3 -> v4:
- add comment to clarify effect on cache locality and
  performance

v2 -> v3:
- add after-fix benchmark to commit log
- rename vhost_log_dev_enabled to vhost_dev_should_log
- remove unneeded comparisons for backend_type
- use QLIST array instead of single flat list to store vhost
  logger devices
- simplify logger election logic
---
   hw/virtio/vhost.c | 67 
++-
   include/hw/virtio/vhost.h |  1 +
   2 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 612f4db..58522f1 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -45,6 +45,7 @@

   static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
   static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
+static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX];

   /* Memslots used by backends that support private memslots (without an fd). 
*/
   static unsigned int used_memslots;
@@ -149,6 +150,47 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev)
   }
   }

+static inline bool vhost_dev_should_log(struct vhost_dev *dev)
+{
+assert(dev->vhost_ops);
+assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
+assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
+
+return dev == QLIST_FIRST(_log_devs[dev->vhost_ops->backend_type]);

A dumb question, why not simple check

dev->log == vhost_log_shm[dev->vhost_ops->backend_type]

Because we are not sure if the logger comes from vhost_log_shm[] or
vhost_log[]. Don't want to complicate the check here by calling into
vhost_dev_log_is_shared() everytime when the .log_sync() is called.

It has very low overhead, isn't it?
Whether this has low overhead will have to depend on the specific 
backend's implementation for .vhost_requires_shm_log(), which the common 
vhost layer should not assume upon or rely on the current implementation.




static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
{
 return dev->vhost_ops->vhost_requires_shm_log &&
dev->vhost_ops->vhost_requires_shm_log(dev);
}

And it helps to simplify the logic.
Generally yes, but when it comes to hot path operations the performance 
consideration could override this principle. I think there's no harm to 
check against logger device cached in vhost layer itself, and the 
current patch does not create a lot of complexity or performance side 
effect (actually I think the conditional should be very straightforward 
to turn into just a couple of assembly compare and branch instructions 
rather than indirection through another jmp call).


-Siwei



Thanks


-Siwei

?

Thanks






Re: [PATCH v4 1/2] vhost: dirty log should be per backend type

2024-03-18 Thread Si-Wei Liu




On 3/17/2024 8:20 PM, Jason Wang wrote:

On Sat, Mar 16, 2024 at 2:33 AM Si-Wei Liu  wrote:



On 3/14/2024 8:50 PM, Jason Wang wrote:

On Fri, Mar 15, 2024 at 5:39 AM Si-Wei Liu  wrote:

There could be a mix of both vhost-user and vhost-kernel clients
in the same QEMU process, where separate vhost loggers for the
specific vhost type have to be used. Make the vhost logger per
backend type, and have them properly reference counted.

It's better to describe what's the advantage of doing this.

Yes, I can add that to the log. Although it's a niche use case, it was
actually a long standing limitation / bug that vhost-user and
vhost-kernel loggers can't co-exist per QEMU process, but today it's
just silent failure that may be ended up with. This bug fix removes that
implicit limitation in the code.

Ok.


Suggested-by: Michael S. Tsirkin 
Signed-off-by: Si-Wei Liu 

---
v3->v4:
- remove checking NULL return value from vhost_log_get

v2->v3:
- remove non-effective assertion that never be reached
- do not return NULL from vhost_log_get()
- add neccessary assertions to vhost_log_get()
---
   hw/virtio/vhost.c | 45 +
   1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 2c9ac79..612f4db 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -43,8 +43,8 @@
   do { } while (0)
   #endif

-static struct vhost_log *vhost_log;
-static struct vhost_log *vhost_log_shm;
+static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
+static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];

   /* Memslots used by backends that support private memslots (without an fd). 
*/
   static unsigned int used_memslots;
@@ -287,6 +287,10 @@ static int vhost_set_backend_type(struct vhost_dev *dev,
   r = -1;
   }

+if (r == 0) {
+assert(dev->vhost_ops->backend_type == backend_type);
+}
+

Under which condition could we hit this?

Just in case some other function inadvertently corrupted this earlier,
we have to capture discrepancy in the first place... On the other hand,
it will be helpful for other vhost backend writers to diagnose day-one
bug in the code. I feel just code comment here will not be
sufficient/helpful.

See below.


   It seems not good to assert a local logic.

It seems to me quite a few local asserts are in the same file already,
vhost_save_backend_state,

For example it has assert for

assert(!dev->started);

which is not the logic of the function itself but require
vhost_dev_start() not to be called before.

But it looks like this patch you assert the code just a few lines
above the assert itself?
Yes, that was the intent - for e.g. xxx_ops may contain corrupted 
xxx_ops.backend_type already before coming to this 
vhost_set_backend_type() function. And we may capture this corrupted 
state by asserting the expected xxx_ops.backend_type (to be consistent 
with the backend_type passed in), which needs be done in the first place 
when this discrepancy is detected. In practice I think there should be 
no harm to add this assert, but this will add warranted guarantee to the 
current code.


Regards,
-Siwei



dev->vhost_ops = _ops;

...

assert(dev->vhost_ops->backend_type == backend_type)

?

Thanks


vhost_load_backend_state,
vhost_virtqueue_mask, vhost_config_mask, just to name a few. Why local
assert a problem?

Thanks,
-Siwei


Thanks






Re: [PATCH v2 1/2] target/s390x: Use mutable temporary value for op_ts

2024-03-18 Thread David Hildenbrand

On 18.03.24 21:27, Ilya Leoshkevich wrote:

From: Ido Plat 

Otherwise TCG would assume the register that holds t1 would be constant
and reuse whenever it needs the value within it.

Cc: qemu-sta...@nongnu.org
Fixes: f1ea739bd598 ("target/s390x: Use tcg_constant_* in local contexts")
Reviewed-by: Ilya Leoshkevich 
Reviewed-by: Richard Henderson 
[iii: Adjust a newline and capitalization, add tags]
Signed-off-by: Ido Plat 
Signed-off-by: Ilya Leoshkevich 
---


Reviewed-by: David Hildenbrand 

--
Cheers,

David / dhildenb




[PATCH 2/2] vl: do not assert if sev-guest is used together with TCG

2024-03-18 Thread Paolo Bonzini
cgs->ready can be false if the accelerator does not look at
current_machine->cgs altogether.

Assume that the lack of initialization is due to this, and
report a nicer error instead of an assertion failure:

$ qemu-system-x86_64 -object 
sev-guest,id=sev0,policy=0x5,id=sev0,cbitpos=51,reduced-phys-bits=1 -M 
confidential-guest-support=sev0
qemu-system-x86_64: ../softmmu/vl.c:2619: qemu_machine_creation_done: 
Assertion `machine->cgs->ready' failed.

Signed-off-by: Paolo Bonzini 
---
 system/vl.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/system/vl.c b/system/vl.c
index 0c970cf0203..c6442229824 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -2676,11 +2676,10 @@ static bool qemu_machine_creation_done(Error **errp)
 
 qdev_machine_creation_done();
 
-if (machine->cgs) {
-/*
- * Verify that Confidential Guest Support has actually been initialized
- */
-assert(machine->cgs->ready);
+if (machine->cgs && !machine->cgs->ready) {
+error_setg(errp, "accelerator does not support confidential guest %s",
+   object_get_typename(OBJECT(machine->cgs)));
+exit(1);
 }
 
 if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) {
-- 
2.44.0




[PATCH 0/2] avoid assertion failure when trying confidential guests without KVM

2024-03-18 Thread Paolo Bonzini
When using confidential guests and forgetting the accelerator, the result
is not very nice:

$ qemu-system-x86_64 -object 
sev-guest,id=sev0,policy=0x5,id=sev0,cbitpos=51,reduced-phys-bits=1 -M 
confidential-guest-support=sev0
qemu-system-x86_64: ../softmmu/vl.c:2619: qemu_machine_creation_done: 
Assertion `machine->cgs->ready' failed.

Assume that the lack of initialization is due to missing code in the
accelerator to look at current_machine->cgs, and report a nicer
error error.

Paolo Bonzini (2):
  vl: convert qemu_machine_creation_done() to Error **
  vl: do not assert if sev-guest is used together with TCG

 system/vl.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

-- 
2.44.0




[PATCH 1/2] vl: convert qemu_machine_creation_done() to Error **

2024-03-18 Thread Paolo Bonzini
Allow using Error ** to pass an error string up to qmp_x_exit_preconfig()
and possibly main().

Signed-off-by: Paolo Bonzini 
---
 system/vl.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/system/vl.c b/system/vl.c
index 70f4cece7f9..0c970cf0203 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -2653,7 +2653,7 @@ static void qemu_create_cli_devices(void)
 rom_reset_order_override();
 }
 
-static void qemu_machine_creation_done(void)
+static bool qemu_machine_creation_done(Error **errp)
 {
 MachineState *machine = MACHINE(qdev_get_machine());
 
@@ -2684,7 +2684,8 @@ static void qemu_machine_creation_done(void)
 }
 
 if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) {
-exit(1);
+error_setg(errp, "could not start gdbserver");
+return false;
 }
 if (!vga_interface_created && !default_vga &&
 vga_interface_type != VGA_NONE) {
@@ -2692,6 +2693,7 @@ static void qemu_machine_creation_done(void)
 "type does not use that option; "
 "No VGA device has been created");
 }
+return true;
 }
 
 void qmp_x_exit_preconfig(Error **errp)
@@ -2703,7 +2705,9 @@ void qmp_x_exit_preconfig(Error **errp)
 
 qemu_init_board();
 qemu_create_cli_devices();
-qemu_machine_creation_done();
+if (!qemu_machine_creation_done(errp)) {
+return;
+}
 
 if (loadvm) {
 RunState state = autostart ? RUN_STATE_RUNNING : runstate_get();
-- 
2.44.0




Re: [PATCH v2 2/2] tests/tcg/s390x: Test TEST AND SET

2024-03-18 Thread Richard Henderson

On 3/18/24 10:27, Ilya Leoshkevich wrote:

Add a small test to prevent regressions.

Signed-off-by: Ilya Leoshkevich
---
  tests/tcg/s390x/Makefile.target |  1 +
  tests/tcg/s390x/ts.c| 35 +
  2 files changed, 36 insertions(+)
  create mode 100644 tests/tcg/s390x/ts.c


Reviewed-by: Richard Henderson 

r~



Re: [PATCH 2/4] i386/sev: Switch to use confidential_guest_kvm_init()

2024-03-18 Thread Paolo Bonzini
On Thu, Feb 29, 2024 at 7:01 AM Xiaoyao Li  wrote:
>
> Use confidential_guest_kvm_init() instead of calling SEV specific
> sev_kvm_init(). As a bouns, it fits to future TDX when TDX implements
> its own confidential_guest_support and .kvm_init().
>
> Move the "TypeInfo sev_guest_info" definition and related functions to
> the end of the file, to avoid declaring the sev_kvm_init() ahead.
>
> Delete the sve-stub.c since it's not needed anymore.
>
> Signed-off-by: Xiaoyao Li 
> ---
> Changes from rfc v1:
> - check ms->cgs not NULL before calling confidential_guest_kvm_init();
> - delete the sev-stub.c;

Queued, with just one small simplification that can be done on top:

diff --git a/target/i386/sev.c b/target/i386/sev.c
index e89d64fa52..b8f79d34d1 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -851,18 +851,13 @@ sev_vm_state_change(void *opaque, bool running,
RunState state)

 static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
-SevGuestState *sev
-= (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
+SevGuestState *sev = SEV_GUEST(cgs);
 char *devname;
 int ret, fw_error, cmd;
 uint32_t ebx;
 uint32_t host_cbitpos;
 struct sev_user_data_status status = {};

-if (!sev) {
-return 0;
-}
-
 ret = ram_block_discard_disable(true);
 if (ret) {
 error_report("%s: cannot disable RAM discard", __func__);

Thanks!

Paolo

> ---
>  target/i386/kvm/kvm.c   |  10 +--
>  target/i386/kvm/meson.build |   2 -
>  target/i386/kvm/sev-stub.c  |  21 ---
>  target/i386/sev.c   | 120 +++-
>  target/i386/sev.h   |   2 -
>  5 files changed, 68 insertions(+), 87 deletions(-)
>  delete mode 100644 target/i386/kvm/sev-stub.c
>
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 42970ab046fa..ca4e1fb72dd9 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -2531,10 +2531,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>   * mechanisms are supported in future (e.g. TDX), they'll need
>   * their own initialization either here or elsewhere.
>   */
> -ret = sev_kvm_init(ms->cgs, _err);
> -if (ret < 0) {
> -error_report_err(local_err);
> -return ret;
> +if (ms->cgs) {
> +ret = confidential_guest_kvm_init(ms->cgs, _err);
> +if (ret < 0) {
> +error_report_err(local_err);
> +return ret;
> +}
>  }
>
>  has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
> diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
> index 84d9143e6029..e7850981e62d 100644
> --- a/target/i386/kvm/meson.build
> +++ b/target/i386/kvm/meson.build
> @@ -7,8 +7,6 @@ i386_kvm_ss.add(files(
>
>  i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
>
> -i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))
> -
>  i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), 
> if_false: files('hyperv-stub.c'))
>
>  i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
> diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c
> deleted file mode 100644
> index 1be5341e8a6a..
> --- a/target/i386/kvm/sev-stub.c
> +++ /dev/null
> @@ -1,21 +0,0 @@
> -/*
> - * QEMU SEV stub
> - *
> - * Copyright Advanced Micro Devices 2018
> - *
> - * Authors:
> - *  Brijesh Singh 
> - *
> - * This work is licensed under the terms of the GNU GPL, version 2 or later.
> - * See the COPYING file in the top-level directory.
> - *
> - */
> -
> -#include "qemu/osdep.h"
> -#include "sev.h"
> -
> -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
> -{
> -/* If we get here, cgs must be some non-SEV thing */
> -return 0;
> -}
> diff --git a/target/i386/sev.c b/target/i386/sev.c
> index 173de91afe7d..19e79d3631d0 100644
> --- a/target/i386/sev.c
> +++ b/target/i386/sev.c
> @@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, 
> bool value, Error **errp)
>  sev->kernel_hashes = value;
>  }
>
> -static void
> -sev_guest_class_init(ObjectClass *oc, void *data)
> -{
> -object_class_property_add_str(oc, "sev-device",
> -  sev_guest_get_sev_device,
> -  sev_guest_set_sev_device);
> -object_class_property_set_description(oc, "sev-device",
> -"SEV device to use");
> -object_class_property_add_str(oc, "dh-cert-file",
> -  sev_guest_get_dh_cert_file,
> -  sev_guest_set_dh_cert_file);
> -object_class_property_set_description(oc, "dh-cert-file",
> -"guest owners DH certificate (encoded with base64)");
> -object_class_property_add_str(oc, "session-file",
> -  sev_guest_get_session_file,
> -  sev_guest_set_session_file);
> -

Re: [PATCH 7/7] target/hppa: fix do_stdby_e()

2024-03-18 Thread Richard Henderson

On 3/17/24 12:14, Sven Schnelle wrote:

stdby,e,m was writing data from the wrong half of the register
into memory for cases 0-3.

Signed-off-by: Sven Schnelle
---
  target/hppa/op_helper.c | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)


Fixes: 25460fc5a71 ("target/hppa: Implement STDBY")
Reviewed-by: Richard Henderson 


r~



[PULL v2 0/4] machine development tool

2024-03-18 Thread Maksim Davydov
The following changes since commit ba49d760eb04630e7b15f423ebecf6c871b8f77b:

  Merge tag 'pull-maintainer-final-130324-1' of https://gitlab.com/stsquad/qemu 
into staging (2024-03-13 15:12:14 +)

are available in the Git repository at:

  https://gitlab.com/davydov-max/qemu.git tags/pull-compare-mt-2024-03-19

for you to fetch changes up to e12b89f71ba5b93877b6a3adc379a6369d0c9bab:

  scripts: add script to compare compatibility properties (2024-03-19 00:13:54 
+0300)


Please note. This is the first pull request from me.
My public GPG key is available here
https://keys.openpgp.org/vks/v1/by-fingerprint/CDB5BEEF8837142579F5CDFE8E927E10F72F78D4


scripts: add a new script for machine development



Maksim Davydov (4):
  qom: add default value
  qmp: add dump machine type compatibility properties
  python/qemu/machine: add method to retrieve QEMUMachine::binary field
  scripts: add script to compare compatibility properties

 MAINTAINERS  |   5 +
 hw/core/machine-qmp-cmds.c   |  23 +-
 python/qemu/machine/machine.py   |   5 +
 qapi/machine.json|  67 -
 qom/qom-qmp-cmds.c   |   1 +
 scripts/compare-machine-types.py | 486 +++
 tests/qtest/fuzz/qos_fuzz.c  |   2 +-
 7 files changed, 585 insertions(+), 4 deletions(-)
 create mode 100755 scripts/compare-machine-types.py

-- 
2.34.1




[PULL v2 3/4] python/qemu/machine: add method to retrieve QEMUMachine::binary field

2024-03-18 Thread Maksim Davydov
Add a supportive property to access the path to the QEMU binary

Signed-off-by: Maksim Davydov 
Reviewed-by: John Snow 
Reviewed-by: Philippe Mathieu-Daudé 
---
 python/qemu/machine/machine.py | 5 +
 1 file changed, 5 insertions(+)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 31cb9d617d..f648f6af45 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -328,6 +328,11 @@ def args(self) -> List[str]:
 """Returns the list of arguments given to the QEMU binary."""
 return self._args
 
+@property
+def binary(self) -> str:
+"""Returns path to the QEMU binary"""
+return self._binary
+
 def _pre_launch(self) -> None:
 if self._qmp_set:
 if self._monitor_address is None:
-- 
2.34.1




[PULL v2 4/4] scripts: add script to compare compatibility properties

2024-03-18 Thread Maksim Davydov
This script runs QEMU to obtain compat_props of machines and default
values of different types of drivers to produce comparison table. This
table can be used to compare machine types to choose the most suitable
machine or compare binaries to be sure that migration to the newer version
will save all device properties. Also the json or csv format of this
table can be used to check does a new machine affect the previous ones by
comparing tables with and without the new machine.

Default values (that will be used without machine compat_props) of
properties are needed to fill "holes" in the table (one machine has
the property but another machine not. For instance, 2.12 machine has
`{ "EPYC-" TYPE_X86_CPU, "xlevel", "0x800a" }`, but compat_pros of
3.1 machine doesn't have it. Thus, to compare these machines we need to
get unknown value of "EPYC-x86_64-cpu-xlevel" for 3.1 machine. These
unknown values in the table are called "holes". To get values for these
"holes" the script uses list of appropriate methods.)

Notes:
* Some init values from the devices can't be available like properties
  from virtio-9p when configure has --disable-virtfs. This situations will
  be seen in the table as "unavailable driver".
* Default values can be obtained in an unobvious way, like x86 features.
  If the script doesn't know how to get property default value to compare
  one machine with another it fills "holes" with "unavailable method". This
  is done because script uses whitelist model to get default values of
  different types. It means that the method that can't be applied to a new
  type that can crash this script. It is better to get an "unavailable
  driver" when creating a new machine with new compatible properties than
  to break this script. So it turns out a more stable and generic script.
* If the default value can't be obtained because this property doesn't
  exist or because this property can't have default value, appropriate
  "hole" will be filled by "unknown property" or "no default value"
* If the property is applied to the abstract class, the script collects
  default values from all child classes and prints all these classes
* Raw table (--raw flag) should be used with json/csv parameters for
  scripts and etc. Human-readable (default) format contains transformed
  and simplified values and it doesn't contain lines with the same values
  in columns

Example:
./scripts/compare-machine-types.py --mt pc-q35-6.2 pc-q35-7.1
╒══╤══╤╤╕
│  Driver  │ Property │  build/qemu-system-x86_64  │  
build/qemu-system-x86_64  │
│  │  │ pc-q35-6.2 │
 pc-q35-7.1 │
╞══╪══╪╪╡
│ PIIX4_PM │ x-not-migrate-acpi-index │True│
   False│
├──┼──┼┼┤
│ arm-gicv3-common │ force-8-bit-prio │True│
 unavailable driver │
├──┼──┼┼┤
│ nvme-ns  │  eui64-default   │True│
   False│
├──┼──┼┼┤
│virtio-mem│  unplugged-inaccessible  │   False│
auto│
╘══╧══╧╧╛

Signed-off-by: Maksim Davydov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 MAINTAINERS  |   5 +
 scripts/compare-machine-types.py | 486 +++
 2 files changed, 491 insertions(+)
 create mode 100755 scripts/compare-machine-types.py

diff --git a/MAINTAINERS b/MAINTAINERS
index 409d7db4d4..1f5e77b518 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4255,3 +4255,8 @@ Code Coverage Tools
 M: Alex Bennée 
 S: Odd Fixes
 F: scripts/coverage/
+
+Machine development tool
+M: Maksim Davydov 
+S: Supported
+F: scripts/compare-machine-types.py
diff --git a/scripts/compare-machine-types.py b/scripts/compare-machine-types.py
new file mode 100755
index 00..2af3995eb8
--- /dev/null
+++ b/scripts/compare-machine-types.py
@@ -0,0 +1,486 @@
+#!/usr/bin/env python3
+#
+# Script to compare machine type compatible properties (include/hw/boards.h).
+# compat_props are applied to the driver during initialization to change
+# default values, for instance, to maintain compatibility.
+# This script constructs table with machines and values of their compat_props
+# to compare and to find places for improvements or places with bugs. If
+# during the comparison, some machine type doesn't have a property (it 

[PULL v2 2/4] qmp: add dump machine type compatibility properties

2024-03-18 Thread Maksim Davydov
To control that creating new machine type doesn't affect the previous
types (their compat_props) and to check complex compat_props inheritance
we need qmp command to print machine type compatibility properties.
This patch adds the ability to get list of all the compat_props of the
corresponding supported machines for their comparison via new optional
argument of "query-machines" command. Since information on compatibility
properties can increase the command output by a factor of 40, add an
argument to enable it, default off.

Signed-off-by: Maksim Davydov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Markus Armbruster 
---
 hw/core/machine-qmp-cmds.c  | 23 -
 qapi/machine.json   | 67 +++--
 tests/qtest/fuzz/qos_fuzz.c |  2 +-
 3 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 4b72009cd3..2fac989623 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -65,7 +65,8 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
 return head;
 }
 
-MachineInfoList *qmp_query_machines(Error **errp)
+MachineInfoList *qmp_query_machines(bool has_compat_props, bool compat_props,
+Error **errp)
 {
 GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false);
 MachineInfoList *mach_list = NULL;
@@ -97,6 +98,26 @@ MachineInfoList *qmp_query_machines(Error **errp)
 info->default_ram_id = g_strdup(mc->default_ram_id);
 }
 
+if (compat_props && mc->compat_props) {
+int i;
+info->compat_props = NULL;
+CompatPropertyList **tail = &(info->compat_props);
+info->has_compat_props = true;
+
+for (i = 0; i < mc->compat_props->len; i++) {
+GlobalProperty *mt_prop = g_ptr_array_index(mc->compat_props,
+i);
+CompatProperty *prop;
+
+prop = g_malloc0(sizeof(*prop));
+prop->qom_type = g_strdup(mt_prop->driver);
+prop->property = g_strdup(mt_prop->property);
+prop->value = g_strdup(mt_prop->value);
+
+QAPI_LIST_APPEND(tail, prop);
+}
+}
+
 QAPI_LIST_PREPEND(mach_list, info);
 }
 
diff --git a/qapi/machine.json b/qapi/machine.json
index bb5a178909..2de4d16a39 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -135,6 +135,26 @@
 ##
 { 'command': 'query-cpus-fast', 'returns': [ 'CpuInfoFast' ] }
 
+##
+# @CompatProperty:
+#
+# Property default values specific to a machine type, for use by
+# scripts/compare-machine-types.
+#
+# @qom-type: name of the QOM type to which the default applies
+#
+# @property: name of its property to which the default applies
+#
+# @value: the default value (machine-specific default can overwrite
+# the "default" default, to avoid this use -machine none)
+#
+# Since: 9.1
+##
+{ 'struct': 'CompatProperty',
+  'data': { 'qom-type': 'str',
+'property': 'str',
+'value': 'str' } }
+
 ##
 # @MachineInfo:
 #
@@ -166,6 +186,14 @@
 #
 # @acpi: machine type supports ACPI (since 8.0)
 #
+# @compat-props: The machine type's compatibility properties.  Only
+# present when query-machines argument @compat-props is true.
+# (since 9.1)
+#
+# Features:
+#
+# @unstable: Member @compat-props is experimental.
+#
 # Since: 1.2
 ##
 { 'struct': 'MachineInfo',
@@ -173,18 +201,53 @@
 '*is-default': 'bool', 'cpu-max': 'int',
 'hotpluggable-cpus': 'bool',  'numa-mem-supported': 'bool',
 'deprecated': 'bool', '*default-cpu-type': 'str',
-'*default-ram-id': 'str', 'acpi': 'bool' } }
+'*default-ram-id': 'str', 'acpi': 'bool',
+'*compat-props': { 'type': ['CompatProperty'],
+   'features': ['unstable'] } } }
 
 ##
 # @query-machines:
 #
 # Return a list of supported machines
 #
+# @compat-props: if true, also return compatibility properties.
+# (default: false) (since 9.1)
+#
+# Features:
+#
+# @unstable: Argument @compat-props is experimental.
+#
 # Returns: a list of MachineInfo
 #
 # Since: 1.2
+#
+# Example:
+#
+# -> { "execute": "query-machines", "arguments": { "compat-props": true } }
+# <- { "return": [
+#   {
+#  "hotpluggable-cpus": true,
+#  "name": "pc-q35-6.2",
+#  "compat-props": [
+#   {
+#  "qom-type": "virtio-mem",
+#  "property": "unplugged-inaccessible",
+#  "value": "off"
+#   }
+#   ],
+#   "numa-mem-supported": false,
+#   "default-cpu-type": "qemu64-x86_64-cpu",
+#   "cpu-max": 288,
+#   "deprecated": false,
+#   

[PULL v2 1/4] qom: add default value

2024-03-18 Thread Maksim Davydov
qmp_qom_list_properties can print default values if they are available
as qmp_device_list_properties does, because both of them use the
ObjectPropertyInfo structure with default_value field. This can be useful
when working with "not device" types (e.g. memory-backend).

Signed-off-by: Maksim Davydov 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Markus Armbruster 
---
 qom/qom-qmp-cmds.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
index 7c087299de..e91a235347 100644
--- a/qom/qom-qmp-cmds.c
+++ b/qom/qom-qmp-cmds.c
@@ -212,6 +212,7 @@ ObjectPropertyInfoList *qmp_qom_list_properties(const char 
*typename,
 info->name = g_strdup(prop->name);
 info->type = g_strdup(prop->type);
 info->description = g_strdup(prop->description);
+info->default_value = qobject_ref(prop->defval);
 
 QAPI_LIST_PREPEND(prop_list, info);
 }
-- 
2.34.1




Re: [PATCH 6/7] target/hppa: mask privilege bits in mfia

2024-03-18 Thread Richard Henderson

On 3/17/24 12:14, Sven Schnelle wrote:

mfia should return only the iaoq bits without privilege
bits.

Signed-off-by: Sven Schnelle 
---
  target/hppa/translate.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Fixes: 98a9cb792c8 ("target-hppa: Implement system and memory-management insns")
Reviewed-by: Richard Henderson 


r~



Re: [PATCH 5/7] target/hppa: copy new_spc to iasq_f on be,n instruction

2024-03-18 Thread Richard Henderson

On 3/17/24 12:14, Sven Schnelle wrote:

Otherwise the first instruction at the new location gets executed from
the old space.

Signed-off-by: Sven Schnelle 
---
  target/hppa/translate.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 58d7ec1ade..a09112e4ae 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -3777,6 +3777,9 @@ static bool trans_be(DisasContext *ctx, arg_be *a)
  }
  copy_iaoq_entry(ctx, cpu_iaoq_b, -1, tmp);
  tcg_gen_mov_i64(cpu_iasq_b, new_spc);
+if (a->n) {
+tcg_gen_mov_i64(cpu_iasq_f, new_spc);
+}
  nullify_set(ctx, a->n);
  }
  tcg_gen_lookup_and_goto_ptr();



Without use_nullify_skip(), we're going to execute the next instruction even if we know it 
is nullified (a->n).  This is usually because there's a page crossing or breakpoint, and 
we need to take the exception that might be raised there.


So, we advance the queue:

copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b);
if (ctx->iaoq_b == -1) {
tcg_gen_mov_i64(cpu_iasq_f, cpu_iasq_b);
}

then put the branch destination at the back of the queue:

copy_iaoq_entry(ctx, cpu_iaoq_b, -1, tmp);
tcg_gen_mov_i64(cpu_iasq_b, new_spc);

Note that iaoq_b is always -1 on a space change.

So your change does not look correct.
What is the issue that you saw?


r~



Re: [PATCH 4/7] target/hppa: exit tb on flush cache instructions

2024-03-18 Thread Richard Henderson

On 3/17/24 12:14, Sven Schnelle wrote:

When the guest modifies the tb it is currently executing from,
it executes a fic instruction. Exit the tb on such instruction,
otherwise we might execute stale code.

Signed-off-by: Sven Schnelle 
---
  target/hppa/translate.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 8ba31567e8..58d7ec1ade 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -1940,6 +1940,7 @@ static void do_page_zero(DisasContext *ctx)
  static bool trans_nop(DisasContext *ctx, arg_nop *a)
  {
  cond_free(>null_cond);
+ctx->base.is_jmp = DISAS_IAQ_N_STALE;
  return true;
  }
  
@@ -2290,6 +2291,7 @@ static bool trans_nop_addrx(DisasContext *ctx, arg_ldst *a)

  save_gpr(ctx, a->b, dest);
  }
  cond_free(>null_cond);
+ctx->base.is_jmp = DISAS_IAQ_N_STALE;
  return true;
  }
  


You should create new functions for fic,

static bool trans_fic(DisasContext *ctx, arg_nop *a)
{
ctx->base.is_jmp = DISAS_IAQ_N_STALE;
return trans_nop(ctx, a);
}

because fid and pdc also use trans_nop/trans_nop_addrx.


r~



Re: [PATCH 3/7] target/hppa: fix access_id check

2024-03-18 Thread Richard Henderson

On 3/17/24 12:14, Sven Schnelle wrote:

+static bool match_prot_id(CPUHPPAState *env, uint32_t access_id, uint32_t 
*_pid)
+{
+for (int i = 0; i < 8; i++) {
+uint32_t pid = get_pid(env, i);


There are only 4 pid's for pa1.x.


+static uint32_t get_pid(CPUHPPAState *env, int num)
+{
+const struct pid_map {
+int reg;
+bool shift;
+} *pid;
+
+const struct pid_map pids64[] = {
+{ .reg = 8,  .shift = true  },
+{ .reg = 8,  .shift = false },
+{ .reg = 9,  .shift = true  },
+{ .reg = 9,  .shift = false },
+{ .reg = 12, .shift = true  },
+{ .reg = 12, .shift = false },
+{ .reg = 13, .shift = true  },
+{ .reg = 13, .shift = false }
+};
+
+const struct pid_map pids32[] = {
+{ .reg = 8,  .shift = false  },
+{ .reg = 9,  .shift = false  },
+{ .reg = 12, .shift = false  },
+{ .reg = 13, .shift = false  },
+};
+
+if (hppa_is_pa20(env)) {


This predicate is fairly expensive -- you don't want to put it deep inside a 
loop.
The table is very predictable.  Moreover, you don't need to test these in any particular 
order.



 /* If bits [31:1] match, and bit 0 is set, suppress write.  */
-int match = ent->access_id * 2 + 1;
-
-if (match == env->cr[CR_PID1] || match == env->cr[CR_PID2] ||
-match == env->cr[CR_PID3] || match == env->cr[CR_PID4]) {
-prot &= PAGE_READ | PAGE_EXEC;
-if (type == PAGE_WRITE) {
-ret = EXCP_DMPI;
-goto egress;
+uint32_t pid;
+if (match_prot_id(env, ent->access_id, )) {
+if ((pid & 1) && (prot & PROT_WRITE)) {
+prot &= ~PROT_WRITE;
 }
+} else {
+prot = 0;
 }


You're losing the data memory protection id trap.

Therefore I suggest

/* Return the set of protections allowed by a PID match. */
static int match_prot_id_1(uint32_t access_id, uint32_t prot_id)
{
if (((access_id ^ (prot_id >> 1) & ACCESS_ID_MASK) == 0) {
return (prot_id & 1
? PROT_EXEC | PROT_READ
: PROT_EXEC | PROT_READ | PROT_WRITE);
}
return 0;
}

static int match_prot_id32(CPUHPPAState *env, uint32_t access_id)
{
int r, i;
for (i = CR_PID1; i <= CR_PID4; ++i) {
r = match_prot_id_1(access_id, env->cr[i]);
if (r) {
return r;
}
}
return 0;
}

static int match_prot_id64(CPUHPPAState *env, uint32_t access_id)
{
int r, i;
for (i = CR_PID1; i <= CR_PID4; ++i) {
r = match_prot_id_1(access_id, env->cr[i]);
if (r) {
return r;
}
r = match_prot_id_1(access_id, env->cr[i] >> 32);
if (r) {
return r;
}
}
return 0;
}

---

if (ent->access_id && MMU_IDX_TO_P(mmu_idx)) {
int access_prot = (hppa_is_pa20(env)
   ? match_prot_id64(env, ent->access_id)
   : match_prot_id32(env, ent->access_id));
if (prot & ~access_prot) {
ret = EXCP_DMPI;
goto egress;
}
}

At this point there are now a couple of hppa_is_pa20() calls within 
hppa_get_physical_address, which could be unified to a single local bool.



r~



Re: [PULL 0/4] machine development tool

2024-03-18 Thread Maksim Davydov



On 3/8/24 06:47, Peter Xu wrote:

On Thu, Mar 07, 2024 at 12:06:59PM +0300, Maksim Davydov wrote:

On 3/6/24 04:57, Peter Xu wrote:

On Tue, Mar 05, 2024 at 03:43:41PM +0100, Markus Armbruster wrote:

Peter Maydell  writes:


On Mon, 4 Mar 2024 at 13:52, Maksim Davydov  wrote:

The following changes since commit e1007b6bab5cf97705bf4f2aaec1f607787355b8:

Merge tag 'pull-request-2024-03-01' ofhttps://gitlab.com/thuth/qemu  into 
staging (2024-03-01 10:14:32 +)

are available in the Git repository at:

https://gitlab.com/davydov-max/qemu.git  tags/pull-compare-mt-2024-03-04

for you to fetch changes up to 7693a2e8518811a907d73a85807ee71dac8fabcb:

scripts: add script to compare compatibility properties (2024-03-04 
14:10:53 +0300)


Please note. This is the first pull request from me.
My public GPG key is available here
https://keys.openpgp.org/vks/v1/by-fingerprint/CDB5BEEF8837142579F5CDFE8E927E10F72F78D4


scripts: add a new script for machine development



Hi; I would prefer this to go through some existing submaintainer
tree if possible, please.

Migration?  QOM?  Not sure.  Cc'ing the maintainers anyway.

Yeah this seems like migration relevant.. however now I'm slightly confused
on when this script should be useful.

According to:

https://lore.kernel.org/qemu-devel/20240222153912.646053-5-davydov-...@yandex-team.ru/

  This script runs QEMU to obtain compat_props of machines and
  default values of different types of drivers to produce comparison
  table. This table can be used to compare machine types to choose
  the most suitable machine or compare binaries to be sure that
  migration to the newer version will save all device
  properties. Also the json or csv format of this table can be used
  to check does a new machine affect the previous ones by comparing
  tables with and without the new machine.

In regards to "choose the most suitable machine": why do you need to choose
a machine?

If it's pretty standalone setup, shouldn't we always try to use the latest
machine type if possible (as normally compat props are only used to keep
compatible with old machine types, and the default should always be
preferred). If it's a cluster setup, IMHO it should depend on the oldest
QEMU version that plans to be supported.  I don't see how such comparison
helps yet in either of the cases..

In regards to "compare binaries to be sure that migration to the newer
version will save all device properties": do we even support migrating
_between_ machine types??

Sololy relying on compat properties to detect device compatibility is also
not reliable.  For example, see VMStateField.field_exists() or similarly,
VMStateDescription.needed(), which are hooks that device can provide to
dynamically decide what device state to be saved/loaded.  Such things are
not reflected in compat properties, so even if compat properties of all
devices are the same between two machine types, it may not mean that the
migration stream will always be compatible.

Thanks,

In fact, the last commit describes the meaning of this series best. Perhaps
it should have been moved to the cover letter:
Often, many teams have their own "machines" inherited from "upstream" to
manage default values of devices. This is done because of the limitations
imposed by the compatibility requirements or the desire to help their
customers better configure their devices. And since machine type has
a hard-to-read structure, it is very easy to make a mistake when
transferring
default values from one machine to another. For example, when some property
is set for the entire abstract class x86_64-cpu (which will be applied to
all
models), and then rolled back for a specific model. The situation is about
the same with changing the default values of devices: if the value changes
in the description of the device itself, then you need to make sure that
nothing changes when using the current machine.
Therefore, there was a desire to make a dev tool that will help quickly
expand
the definition of a machine or compare several machines from different
binary
files. It can be used when rebasing to a new version of qemu and/or for
automatic tests.

OK, thanks.

So is it a migration compatibility issue that you care (migrating VMs from
your old downstream binary to new downstream binary should always succeed),
or perhaps you care more on making sure the features you wanted, i.e., you
want to make sure some specific devices that you care will have the
properties that you expect?

I think compat properties are mostly used for migration purposes, but
indeed it can also be used to keep old behaviors of devices, even if the
migration could succed with/without such a compat property entry.

If it's about migration, 

Re: [PATCH 6/7] target/hppa: mask privilege bits in mfia

2024-03-18 Thread Helge Deller

On 3/17/24 23:14, Sven Schnelle wrote:

mfia should return only the iaoq bits without privilege
bits.

Signed-off-by: Sven Schnelle 


Reviewed-by: Helge Deller 

Helge



---
  target/hppa/translate.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index a09112e4ae..e47f8f9f47 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -1962,7 +1962,7 @@ static bool trans_mfia(DisasContext *ctx, arg_mfia *a)
  {
  unsigned rt = a->t;
  TCGv_i64 tmp = dest_gpr(ctx, rt);
-tcg_gen_movi_i64(tmp, ctx->iaoq_f);
+tcg_gen_movi_i64(tmp, ctx->iaoq_f & ~3ULL);
  save_gpr(ctx, rt, tmp);

  cond_free(>null_cond);





Re: [PATCH 4/7] target/hppa: exit tb on flush cache instructions

2024-03-18 Thread Helge Deller

On 3/17/24 23:14, Sven Schnelle wrote:

When the guest modifies the tb it is currently executing from,
it executes a fic instruction. Exit the tb on such instruction,
otherwise we might execute stale code.

Signed-off-by: Sven Schnelle 
---
  target/hppa/translate.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 8ba31567e8..58d7ec1ade 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -1940,6 +1940,7 @@ static void do_page_zero(DisasContext *ctx)
  static bool trans_nop(DisasContext *ctx, arg_nop *a)
  {
  cond_free(>null_cond);
+ctx->base.is_jmp = DISAS_IAQ_N_STALE;
  return true;
  }

@@ -2290,6 +2291,7 @@ static bool trans_nop_addrx(DisasContext *ctx, arg_ldst 
*a)
  save_gpr(ctx, a->b, dest);
  }
  cond_free(>null_cond);
+ctx->base.is_jmp = DISAS_IAQ_N_STALE;


I wonder if it makes sense to rename trans_nop() and trans_nop_addrx()
to something like trans_cache_flush() and trans_cache_flush_addrx() ?

Other than that:
Reviewed-by: Helge Deller 

Helge



Re: [PATCH 3/7] target/hppa: fix access_id check

2024-03-18 Thread Helge Deller

On 3/17/24 23:14, Sven Schnelle wrote:

PA2.0 provides 8 instead of 4 PID registers.

Signed-off-by: Sven Schnelle 


Reviewed-by: Helge Deller 
with a few comments below...

Helge


---
  roms/SLOF|  2 +-
  target/hppa/mem_helper.c | 67 +++-
  2 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/roms/SLOF b/roms/SLOF
index 3a259df244..6b6c16b4b4 16
--- a/roms/SLOF
+++ b/roms/SLOF
@@ -1 +1 @@
-Subproject commit 3a259df2449fc4a4e43ab5f33f0b2c66484b4bc3
+Subproject commit 6b6c16b4b40763507cf1f518096f3c3883c5cf2d


this doesn't belong here.



diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c
index 80f51e753f..e4e3f6cdbe 100644
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -152,6 +152,59 @@ static HPPATLBEntry *hppa_alloc_tlb_ent(CPUHPPAState *env)
  return ent;
  }

+static uint32_t get_pid(CPUHPPAState *env, int num)
+{
+const struct pid_map {
+int reg;
+bool shift;


does it makes sense to condense it, e.g.:
 +unsigned char reg:7,
 +unsigned char shift:1;

Helge



+} *pid;
+
+const struct pid_map pids64[] = {
+{ .reg = 8,  .shift = true  },
+{ .reg = 8,  .shift = false },
+{ .reg = 9,  .shift = true  },
+{ .reg = 9,  .shift = false },
+{ .reg = 12, .shift = true  },
+{ .reg = 12, .shift = false },
+{ .reg = 13, .shift = true  },
+{ .reg = 13, .shift = false }
+};
+
+const struct pid_map pids32[] = {
+{ .reg = 8,  .shift = false  },
+{ .reg = 9,  .shift = false  },
+{ .reg = 12, .shift = false  },
+{ .reg = 13, .shift = false  },
+};
+
+if (hppa_is_pa20(env)) {
+pid = pids64 + num;
+} else {
+pid = pids32 + num;
+}
+uint64_t cr = env->cr[pid->reg];
+if (pid->shift) {
+cr >>= 32;
+} else {
+cr &= 0x;
+}
+return cr;
+}
+
+#define ACCESS_ID_MASK 0x
+
+static bool match_prot_id(CPUHPPAState *env, uint32_t access_id, uint32_t 
*_pid)
+{
+for (int i = 0; i < 8; i++) {
+uint32_t pid = get_pid(env, i);
+if ((access_id & ACCESS_ID_MASK) == ((pid >> 1) & ACCESS_ID_MASK)) {
+*_pid = pid;
+return true;
+}
+}
+return false;
+}
+
  int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
int type, hwaddr *pphys, int *pprot,
HPPATLBEntry **tlb_entry)
@@ -227,15 +280,13 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr 
addr, int mmu_idx,
  /* access_id == 0 means public page and no check is performed */
  if (ent->access_id && MMU_IDX_TO_P(mmu_idx)) {
  /* If bits [31:1] match, and bit 0 is set, suppress write.  */
-int match = ent->access_id * 2 + 1;
-
-if (match == env->cr[CR_PID1] || match == env->cr[CR_PID2] ||
-match == env->cr[CR_PID3] || match == env->cr[CR_PID4]) {
-prot &= PAGE_READ | PAGE_EXEC;
-if (type == PAGE_WRITE) {
-ret = EXCP_DMPI;
-goto egress;
+uint32_t pid;
+if (match_prot_id(env, ent->access_id, )) {
+if ((pid & 1) && (prot & PROT_WRITE)) {
+prot &= ~PROT_WRITE;
  }
+} else {
+prot = 0;
  }
  }






Re: [PATCH 2/7] target/hppa: fix shrp for wide mode

2024-03-18 Thread Richard Henderson

On 3/17/24 12:14, Sven Schnelle wrote:

Signed-off-by: Sven Schnelle
---
  target/hppa/translate.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)


Fixes: f7b775a9c075 ("target/hppa: Implement SHRPD")
Reviewed-by: Richard Henderson 


r~



Re: [PATCH 1/7] target/hppa: ldcw,s uses static shift of 3

2024-03-18 Thread Richard Henderson

On 3/17/24 12:14, Sven Schnelle wrote:

Signed-off-by: Sven Schnelle 
---
  target/hppa/translate.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index eb2046c5ad..6a513d7d5c 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -3085,7 +3085,7 @@ static bool trans_ldc(DisasContext *ctx, arg_ldst *a)
  dest = dest_gpr(ctx, a->t);
  }
  
-form_gva(ctx, , , a->b, a->x, a->scale ? a->size : 0,

+form_gva(ctx, , , a->b, a->x, a->scale ? 3 : 0,
   a->disp, a->sp, a->m, MMU_DISABLED(ctx));
  
  /*


Whoops, broken since day one.

Fixes: 96d6407f363 ("target-hppa: Implement loads and stores")
Reviewed-by: Richard Henderson 


r~



Re: [PATCH 2/7] target/hppa: fix shrp for wide mode

2024-03-18 Thread Helge Deller

On 3/17/24 23:14, Sven Schnelle wrote:

Signed-off-by: Sven Schnelle 


Reviewed-by: Helge Deller 

Helge


---
  target/hppa/translate.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 6a513d7d5c..8ba31567e8 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -3462,7 +3462,7 @@ static bool trans_shrp_sar(DisasContext *ctx, 
arg_shrp_sar *a)
  /* Install the new nullification.  */
  cond_free(>null_cond);
  if (a->c) {
-ctx->null_cond = do_sed_cond(ctx, a->c, false, dest);
+ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest);
  }
  return nullify_end(ctx);
  }
@@ -3505,7 +3505,7 @@ static bool trans_shrp_imm(DisasContext *ctx, 
arg_shrp_imm *a)
  /* Install the new nullification.  */
  cond_free(>null_cond);
  if (a->c) {
-ctx->null_cond = do_sed_cond(ctx, a->c, false, dest);
+ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest);
  }
  return nullify_end(ctx);
  }





[PATCH v2 1/2] target/s390x: Use mutable temporary value for op_ts

2024-03-18 Thread Ilya Leoshkevich
From: Ido Plat 

Otherwise TCG would assume the register that holds t1 would be constant
and reuse whenever it needs the value within it.

Cc: qemu-sta...@nongnu.org
Fixes: f1ea739bd598 ("target/s390x: Use tcg_constant_* in local contexts")
Reviewed-by: Ilya Leoshkevich 
Reviewed-by: Richard Henderson 
[iii: Adjust a newline and capitalization, add tags]
Signed-off-by: Ido Plat 
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/tcg/translate.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 0d0c672c959..57b7db1ee96 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -4781,9 +4781,10 @@ static DisasJumpType op_trXX(DisasContext *s, DisasOps 
*o)
 
 static DisasJumpType op_ts(DisasContext *s, DisasOps *o)
 {
-TCGv_i32 t1 = tcg_constant_i32(0xff);
+TCGv_i32 ff = tcg_constant_i32(0xff);
+TCGv_i32 t1 = tcg_temp_new_i32();
 
-tcg_gen_atomic_xchg_i32(t1, o->in2, t1, get_mem_index(s), MO_UB);
+tcg_gen_atomic_xchg_i32(t1, o->in2, ff, get_mem_index(s), MO_UB);
 tcg_gen_extract_i32(cc_op, t1, 7, 1);
 set_cc_static(s);
 return DISAS_NEXT;
-- 
2.44.0




[PATCH v2 2/2] tests/tcg/s390x: Test TEST AND SET

2024-03-18 Thread Ilya Leoshkevich
Add a small test to prevent regressions.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/ts.c| 35 +
 2 files changed, 36 insertions(+)
 create mode 100644 tests/tcg/s390x/ts.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index e2aba2ec274..a8f86c94498 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -47,6 +47,7 @@ TESTS+=add-logical-with-carry
 TESTS+=lae
 TESTS+=cvd
 TESTS+=cvb
+TESTS+=ts
 
 cdsg: CFLAGS+=-pthread
 cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/ts.c b/tests/tcg/s390x/ts.c
new file mode 100644
index 000..441faf30d98
--- /dev/null
+++ b/tests/tcg/s390x/ts.c
@@ -0,0 +1,35 @@
+/*
+ * Test the TEST AND SET instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+static int ts(char *p)
+{
+int cc;
+
+asm("ts %[p]\n"
+"ipm %[cc]"
+: [cc] "=r" (cc)
+, [p] "+Q" (*p)
+: : "cc");
+
+return (cc >> 28) & 3;
+}
+
+int main(void)
+{
+char c;
+
+c = 0x80;
+assert(ts() == 1);
+assert(c == 0xff);
+
+c = 0x7f;
+assert(ts() == 0);
+assert(c == 0xff);
+
+return EXIT_SUCCESS;
+}
-- 
2.44.0




Re: [PATCH 3/3 for 9.0] Revert "chardev: use a child source for qio input source"

2024-03-18 Thread Marc-André Lureau
Hi

On Mon, Mar 18, 2024 at 10:25 PM Daniel P. Berrangé  wrote:
>
> This reverts commit a7077b8e354d90fec26c2921aa2dea85b90dff90,
> and add comments to explain why child sources cannot be used.
>
> When a GSource is added as a child of another GSource, if its
> 'prepare' function indicates readiness, then the parent's
> 'prepare' function will never be run. The io_watch_poll_prepare
> absolutely *must* be run on every iteration of the main loop,
> to ensure that the chardev backend doesn't feed data to the
> frontend that it is unable to consume.
>
> At the time a7077b8e354d90fec26c2921aa2dea85b90dff90 was made,
> all the child GSource impls were relying on poll'ing an FD,
> so their 'prepare' functions would never indicate readiness
> ahead of poll() being invoked. So the buggy behaviour was
> not noticed and lay dormant.
>
> Relatively recently the QIOChannelTLS impl introduced a
> level 2 child GSource, which checks with GNUTLS whether it
> has cached any data that was decoded but not yet consumed:
>
>   commit ffda5db65aef42266a5053a4be34515106c4c7ee
>   Author: Antoine Damhet 
>   Date:   Tue Nov 15 15:23:29 2022 +0100
>
> io/channel-tls: fix handling of bigger read buffers
>
> Since the TLS backend can read more data from the underlying QIOChannel
> we introduce a minimal child GSource to notify if we still have more
> data available to be read.
>
> Signed-off-by: Antoine Damhet 
> Signed-off-by: Charles Frey 
> Signed-off-by: Daniel P. Berrangé 
>
> With this, it is now quite common for the 'prepare' function
> on a QIOChannelTLS GSource to indicate immediate readiness,
> bypassing the parent GSource 'prepare' function. IOW, the
> critical 'io_watch_poll_prepare' is being skipped on some
> iterations of the main loop. As a result chardev frontend
> asserts are now being triggered as they are fed data they
> are not ready to consume.
>
> A reproducer is as follows:
>
>  * In terminal 1 run a GNUTLS *echo* server
>
>$ gnutls-serv --echo \
>  --x509cafile ca-cert.pem \
>  --x509keyfile server-key.pem \
>  --x509certfile server-cert.pem \
>  -p 9000
>
>  * In terminal 2 run a QEMU guest
>
>$ qemu-system-s390x \
>-nodefaults \
>-display none \
>-object tls-creds-x509,id=tls0,dir=$PWD,endpoint=client \
>-chardev socket,id=con0,host=localhost,port=9000,tls-creds=tls0 \
>-device sclpconsole,chardev=con0 \
>-hda Fedora-Cloud-Base-39-1.5.s390x.qcow2
>
> After the previous patch revert, but before this patch revert,
> this scenario will crash:
>
>   qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion
>   `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed.
>
> This assert indicates that 'tcp_chr_read' was called without
> 'tcp_chr_read_poll' having first been checked for ability to
> receive more data
>
> QEMU's use of a 'prepare' function to create/delete another
> GSource is rather a hack and not normally the kind of thing that
> is expected to be done by a GSource. There is no mechanism to
> force GLib to always run the 'prepare' function of a parent
> GSource. The best option is to simply not use the child source
> concept, and go back to the functional approach previously
> relied on.
>
> Signed-off-by: Daniel P. Berrangé 
> ---
>  chardev/char-io.c | 55 ++-
>  1 file changed, 50 insertions(+), 5 deletions(-)
>
> diff --git a/chardev/char-io.c b/chardev/char-io.c
> index 4451128cba..3c725f530b 100644
> --- a/chardev/char-io.c
> +++ b/chardev/char-io.c
> @@ -33,6 +33,7 @@ typedef struct IOWatchPoll {
>  IOCanReadHandler *fd_can_read;
>  GSourceFunc fd_read;
>  void *opaque;
> +GMainContext *context;
>  } IOWatchPoll;
>
>  static IOWatchPoll *io_watch_poll_from_source(GSource *source)
> @@ -50,28 +51,58 @@ static gboolean io_watch_poll_prepare(GSource *source,
>  return FALSE;
>  }
>
> +/*
> + * We do not register the QIOChannel watch as a child GSource.
> + * The 'prepare' function on the parent GSource will be
> + * skipped if a child GSource's 'prepare' function indicates
> + * readiness. We need this prepare function be guaranteed

argh, indeed

I suppose the child 'prepare' could be changed to always return FALSE,
but that would be hackish too

> + * to run on *every* iteration of the main loop, because
> + * it is critical to ensure we remove the QIOChannel watch
> + * if 'fd_can_read' indicates the frontend cannot receive
> + * more data.
> + */
>  if (now_active) {
>  iwp->src = qio_channel_create_watch(
>  iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
>  g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
> -g_source_add_child_source(source, iwp->src);
> -g_source_unref(iwp->src);
> +g_source_attach(iwp->src, iwp->context);
>  } else {
> -

Re: [External] Re: [PATCH v2 1/1] memory tier: acpi/hmat: create CPUless memory tiers after obtaining HMAT info

2024-03-18 Thread Ho-Ren (Jack) Chuang
I'm working on V3. Thanks for Ying's feedback.

cc: sthanne...@micron.com


On Thu, Mar 14, 2024 at 12:54 AM Huang, Ying  wrote:
>
> "Ho-Ren (Jack) Chuang"  writes:
>
> > On Tue, Mar 12, 2024 at 2:21 AM Huang, Ying  wrote:
> >>
> >> "Ho-Ren (Jack) Chuang"  writes:
> >>
> >> > The current implementation treats emulated memory devices, such as
> >> > CXL1.1 type3 memory, as normal DRAM when they are emulated as normal 
> >> > memory
> >> > (E820_TYPE_RAM). However, these emulated devices have different
> >> > characteristics than traditional DRAM, making it important to
> >> > distinguish them. Thus, we modify the tiered memory initialization 
> >> > process
> >> > to introduce a delay specifically for CPUless NUMA nodes. This delay
> >> > ensures that the memory tier initialization for these nodes is deferred
> >> > until HMAT information is obtained during the boot process. Finally,
> >> > demotion tables are recalculated at the end.
> >> >
> >> > * Abstract common functions into `find_alloc_memory_type()`
> >>
> >> We should move kmem_put_memory_types() (renamed to
> >> mt_put_memory_types()?) too.  This can be put in a separate patch.
> >>
> >
> > Will do! Thanks,
> >
> >
> >>
> >> > Since different memory devices require finding or allocating a memory 
> >> > type,
> >> > these common steps are abstracted into a single function,
> >> > `find_alloc_memory_type()`, enhancing code scalability and conciseness.
> >> >
> >> > * Handle cases where there is no HMAT when creating memory tiers
> >> > There is a scenario where a CPUless node does not provide HMAT 
> >> > information.
> >> > If no HMAT is specified, it falls back to using the default DRAM tier.
> >> >
> >> > * Change adist calculation code to use another new lock, mt_perf_lock.
> >> > In the current implementation, iterating through CPUlist nodes requires
> >> > holding the `memory_tier_lock`. However, `mt_calc_adistance()` will end 
> >> > up
> >> > trying to acquire the same lock, leading to a potential deadlock.
> >> > Therefore, we propose introducing a standalone `mt_perf_lock` to protect
> >> > `default_dram_perf`. This approach not only avoids deadlock but also
> >> > prevents holding a large lock simultaneously.
> >> >
> >> > Signed-off-by: Ho-Ren (Jack) Chuang 
> >> > Signed-off-by: Hao Xiang 
> >> > ---
> >> >  drivers/acpi/numa/hmat.c | 11 ++
> >> >  drivers/dax/kmem.c   | 13 +--
> >> >  include/linux/acpi.h |  6 
> >> >  include/linux/memory-tiers.h |  8 +
> >> >  mm/memory-tiers.c| 70 +---
> >> >  5 files changed, 92 insertions(+), 16 deletions(-)
> >> >
> >> > diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
> >> > index d6b85f0f6082..28812ec2c793 100644
> >> > --- a/drivers/acpi/numa/hmat.c
> >> > +++ b/drivers/acpi/numa/hmat.c
> >> > @@ -38,6 +38,8 @@ static LIST_HEAD(targets);
> >> >  static LIST_HEAD(initiators);
> >> >  static LIST_HEAD(localities);
> >> >
> >> > +static LIST_HEAD(hmat_memory_types);
> >> > +
> >>
> >> HMAT isn't a device driver for some memory devices.  So I don't think we
> >> should manage memory types in HMAT.
> >
> > I can put it back in memory-tier.c. How about the list? Do we still
> > need to keep a separate list for storing late_inited memory nodes?
> > And how about the list name if we need to remove the prefix "hmat_"?
>
> I don't think we need a separate list for memory-less nodes.  Just
> iterate all memory-less nodes.
>

Ok. There is no need to keep a list of memory-less nodes. I will
only keep a memory_type list to manage created memory types.


> >
> >> Instead, if the memory_type of a
> >> node isn't set by the driver, we should manage it in memory-tier.c as
> >> fallback.
> >>
> >
> > Do you mean some device drivers may init memory tiers between
> > memory_tier_init() and late_initcall(memory_tier_late_init);?
> > And this is the reason why you mention to exclude
> > "node_memory_types[nid].memtype != NULL" in memory_tier_late_init().
> > Is my understanding correct?
>
> Yes.
>

Thanks.

> >> >  static DEFINE_MUTEX(target_lock);
> >> >
> >> >  /*
> >> > @@ -149,6 +151,12 @@ int acpi_get_genport_coordinates(u32 uid,
> >> >  }
> >> >  EXPORT_SYMBOL_NS_GPL(acpi_get_genport_coordinates, CXL);
> >> >
> >> > +struct memory_dev_type *hmat_find_alloc_memory_type(int adist)
> >> > +{
> >> > + return find_alloc_memory_type(adist, _memory_types);
> >> > +}
> >> > +EXPORT_SYMBOL_GPL(hmat_find_alloc_memory_type);
> >> > +
> >> >  static __init void alloc_memory_initiator(unsigned int cpu_pxm)
> >> >  {
> >> >   struct memory_initiator *initiator;
> >> > @@ -1038,6 +1046,9 @@ static __init int hmat_init(void)
> >> >   if (!hmat_set_default_dram_perf())
> >> >   register_mt_adistance_algorithm(_adist_nb);
> >> >
> >> > + /* Post-create CPUless memory tiers after getting HMAT info */
> >> > + memory_tier_late_init();
> >> > +
> >>
> >> This should be called in memory-tier.c via
> >>
> 

Re: Intention to work on GSoC project

2024-03-18 Thread Sahil
Hi,

I was reading the "Virtqueues and virtio ring: How the data travels"
article [1]. There are a few things that I have not understood in the
"avail rings" section.

Q1.
Step 2 in the "Process to make a buffer available" diagram depicts
how the virtio driver writes the descriptor index in the avail ring.
In the example, the descriptor index #0 is written in the first entry.
But in figure 2, the number 0 is in the 4th position in the avail ring.
Is the avail ring queue an array of "struct virtq_avail" which maintains
metadata such as the number of descriptor indexes in the header?

Also, in the second position, the number changes from 0 (figure 1) to
1 (figure 2). I haven't understood what idx, 0 (later 1) and ring[] represent
in the figures. Does this number represent the number of descriptors
that are currently in the avail ring?

Q2.

There's this paragraph in the article right below the above mentioned
diagram:

> The avail ring must be able to hold the same number of descriptors
> as the descriptor area, and the descriptor area must have a size power
> of two, so idx wraps naturally at some point. For example, if the ring
> size is 256 entries, idx 1 references the same descriptor as idx 257, 513...
> And it will wrap at a 16 bit boundary. This way, neither side needs to
> worry about processing an invalid idx: They are all valid.

I haven't really understood this. I have understood that idx is calculated
as idx mod queue_length. But I haven't understood the "16 bit boundary"
part.

I am also not very clear on how a queue length that is not a power of 2
might cause trouble. Could you please expand on this?

Q3.
I have started going through the source code in "drivers/virtio/virtio_ring.c".
I have understood that the virtio driver runs in the guest's kernel. Does that
mean the drivers in "drivers/virtio/*" are enabled when linux is being run in
a guest VM?

Thanks,
Sahil

[1] https://www.redhat.com/en/blog/virtqueues-and-virtio-ring-how-data-travels







Re: [PATCH for-9.0 v3] vdpa-dev: Fix initialisation order to restore VDUSE compatibility

2024-03-18 Thread Eugenio Perez Martin
On Mon, Mar 18, 2024 at 10:02 AM Michael S. Tsirkin  wrote:
>
> On Mon, Mar 18, 2024 at 12:31:26PM +0800, Jason Wang wrote:
> > On Fri, Mar 15, 2024 at 11:59 PM Kevin Wolf  wrote:
> > >
> > > VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> > > status flag is set; with the current API of the kernel module, it is
> > > impossible to enable the opposite order in our block export code because
> > > userspace is not notified when a virtqueue is enabled.
> > >
> > > This requirement also mathces the normal initialisation order as done by
> > > the generic vhost code in QEMU. However, commit 6c482547 accidentally
> > > changed the order for vdpa-dev and broke access to VDUSE devices with
> > > this.
> > >
> > > This changes vdpa-dev to use the normal order again and use the standard
> > > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> > > used with vdpa-dev again after this fix.
> > >
> > > vhost_net intentionally avoided enabling the vrings for vdpa and does
> > > this manually later while it does enable them for other vhost backends.
> > > Reflect this in the vhost_net code and return early for vdpa, so that
> > > the behaviour doesn't change for this device.
> > >
> > > Cc: qemu-sta...@nongnu.org
> > > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> > > Signed-off-by: Kevin Wolf 
> > > ---
> > > v2:
> > > - Actually make use of the @enable parameter
> > > - Change vhost_net to preserve the current behaviour
> > >
> > > v3:
> > > - Updated trace point [Stefano]
> > > - Fixed typo in comment [Stefano]
> > >
> > >  hw/net/vhost_net.c | 10 ++
> > >  hw/virtio/vdpa-dev.c   |  5 +
> > >  hw/virtio/vhost-vdpa.c | 29 ++---
> > >  hw/virtio/vhost.c  |  8 +++-
> > >  hw/virtio/trace-events |  2 +-
> > >  5 files changed, 45 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > index e8e1661646..fd1a93701a 100644
> > > --- a/hw/net/vhost_net.c
> > > +++ b/hw/net/vhost_net.c
> > > @@ -541,6 +541,16 @@ int vhost_set_vring_enable(NetClientState *nc, int 
> > > enable)
> > >  VHostNetState *net = get_vhost_net(nc);
> > >  const VhostOps *vhost_ops = net->dev.vhost_ops;
> > >
> > > +/*
> > > + * vhost-vdpa network devices need to enable dataplane virtqueues 
> > > after
> > > + * DRIVER_OK, so they can recover device state before starting 
> > > dataplane.
> > > + * Because of that, we don't enable virtqueues here and leave it to
> > > + * net/vhost-vdpa.c.
> > > + */
> > > +if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > +return 0;
> > > +}
> >
> > I think we need some inputs from Eugenio, this is only needed for
> > shadow virtqueue during live migration but not other cases.
> >
> > Thanks
>
>
> Yes I think we had a backend flag for this, right? Eugenio can you
> comment please?
>

We have the VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK backend flag,
right. If the backend does not offer it, it is better to enable all
the queues here and add a migration blocker in net/vhost-vdpa.c.

So the check should be:
nc->info->type == VHOST_VDPA && (backend_features &
VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK).

I can manage to add the migration blocker on top of this patch.

Thanks!




Re: [PATCH 1/3 for 9.0] chardev: lower priority of the HUP GSource in socket chardev

2024-03-18 Thread Marc-André Lureau
On Mon, Mar 18, 2024 at 10:25 PM Daniel P. Berrangé  wrote:
>
> The socket chardev often has 2 GSource object registered against the
> same FD. One is registered all the time and is just intended to handle
> POLLHUP events, while the other gets registered & unregistered on the
> fly as the frontend is ready to receive more data or not.
>
> It is very common for poll() to signal a POLLHUP event at the same time
> as there is pending incoming data from the disconnected client. It is
> therefore essential to process incoming data prior to processing HUP.
> The problem with having 2 GSource on the same FD is that there is no
> guaranteed ordering of execution between them, so the chardev code may
> process HUP first and thus discard data.
>
> This failure scenario is non-deterministic but can be seen fairly
> reliably by reverting a7077b8e354d90fec26c2921aa2dea85b90dff90, and
> then running 'tests/unit/test-char', which will sometimes fail with
> missing data.
>
> Ideally QEMU would only have 1 GSource, but that's a complex code
> refactoring job. The next best solution is to try to ensure ordering
> between the 2 GSource objects. This can be achieved by lowering the
> priority of the HUP GSource, so that it is never dispatched if the
> main GSource is also ready to dispatch. Counter-intuitively, lowering
> the priority of a GSource is done by raising its priority number.
>
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Marc-André Lureau 

> ---
>  chardev/char-socket.c | 16 
>  1 file changed, 16 insertions(+)
>
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 8a0406cc1e..2c4dffc0e6 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -601,6 +601,22 @@ static void update_ioc_handlers(SocketChardev *s)
>
>  remove_hup_source(s);
>  s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
> +/*
> + * poll() is liable to return POLLHUP even when there is
> + * still incoming data available to read on the FD. If
> + * we have the hup_source at the same priority as the
> + * main io_add_watch_poll GSource, then we might end up
> + * processing the POLLHUP event first, closing the FD,
> + * and as a result silently discard data we should have
> + * read.
> + *
> + * By setting the hup_source to G_PRIORITY_DEFAULT + 1,
> + * we ensure that io_add_watch_poll GSource will always
> + * be dispatched first, thus guaranteeing we will be
> + * able to process all incoming data before closing the
> + * FD
> + */
> +g_source_set_priority(s->hup_source, G_PRIORITY_DEFAULT + 1);
>  g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
>chr, NULL);
>  g_source_attach(s->hup_source, chr->gcontext);
> --
> 2.43.0
>
>


-- 
Marc-André Lureau



Re: [PATCH] vhost-vdpa: check vhost_vdpa_set_vring_ready() return value

2024-03-18 Thread Eugenio Perez Martin
On Mon, Mar 18, 2024 at 5:35 AM Jason Wang  wrote:
>
> On Fri, Mar 15, 2024 at 4:23 PM Stefano Garzarella  
> wrote:
> >
> > On Thu, Mar 14, 2024 at 11:17:01AM +0800, Jason Wang wrote:
> > >On Wed, Feb 7, 2024 at 5:27 PM Stefano Garzarella  
> > >wrote:
> > >>
> > >> vhost_vdpa_set_vring_ready() could already fail, but if Linux's
> > >> patch [1] will be merged, it may fail with more chance if
> > >> userspace does not activate virtqueues before DRIVER_OK when
> > >> VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not negotiated.
> > >
> > >I wonder what happens if we just leave it as is.
> >
> > Are you referring to this patch or the kernel patch?
>
> This patch.
>
> >
> > Here I'm just checking the return value of vhost_vdpa_set_vring_ready().
> > It can return an error also without that kernel patch, so IMHO is
> > better to check the return value here in QEMU.
> >
> > What issue do you see with this patch applied?
>
> For the parent which can enable after driver_ok but not advertise it.
>
> (To say the truth, I'm not sure if we need to care about this)
>
> >
> > >
> > >VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK: We do know enabling could be
> > >done after driver_ok.
> > >Without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK: We don't know whether
> > >enabling could be done after driver_ok or not.
> >
> > I see your point, indeed I didn't send a v2 of that patch.
> > Maybe we should document that, because it could be interpreted that if
> > VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not negotiated the enabling
> > should always be done before driver_ok (which is true for example in
> > VDUSE).
>
> I see, so I think we probably need the fix.
>

A more complete fix is proper checking for
VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK. But even with that, checking
for failure in the ioctl is also worth it, isn't it?

The only point I see to not checking is to closely mimic virtio
behavior, but vDPA can already fail here.

> >
> > BTW I think we should discuss it in the kernel patch.
> >
> > Thanks,
> > Stefano
> >
> > >
> > >Thanks
> > >
> > >>
> > >> So better check its return value anyway.
> > >>
> > >> [1] 
> > >> https://lore.kernel.org/virtualization/20240206145154.118044-1-sgarz...@redhat.com/T/#u
> > >>
> > >> Signed-off-by: Stefano Garzarella 
> > >> ---
> > >> Note: This patch conflicts with [2], but the resolution is simple,
> > >> so for now I sent a patch for the current master, but I'll rebase
> > >> this patch if we merge the other one first.
>
> Will go through [2].
>
> Thanks
>
>
> > >>
> > >> [2]
> > >> https://lore.kernel.org/qemu-devel/20240202132521.32714-1-kw...@redhat.com/
> > >> ---
> > >>  hw/virtio/vdpa-dev.c |  8 +++-
> > >>  net/vhost-vdpa.c | 15 ---
> > >>  2 files changed, 19 insertions(+), 4 deletions(-)
> > >>
> > >> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > >> index eb9ecea83b..d57cd76c18 100644
> > >> --- a/hw/virtio/vdpa-dev.c
> > >> +++ b/hw/virtio/vdpa-dev.c
> > >> @@ -259,7 +259,11 @@ static int vhost_vdpa_device_start(VirtIODevice 
> > >> *vdev, Error **errp)
> > >>  goto err_guest_notifiers;
> > >>  }
> > >>  for (i = 0; i < s->dev.nvqs; ++i) {
> > >> -vhost_vdpa_set_vring_ready(>vdpa, i);
> > >> +ret = vhost_vdpa_set_vring_ready(>vdpa, i);
> > >> +if (ret < 0) {
> > >> +error_setg_errno(errp, -ret, "Error starting vring %d", i);
> > >> +goto err_dev_stop;
> > >> +}
> > >>  }
> > >>  s->started = true;
> > >>
> > >> @@ -274,6 +278,8 @@ static int vhost_vdpa_device_start(VirtIODevice 
> > >> *vdev, Error **errp)
> > >>
> > >>  return ret;
> > >>
> > >> +err_dev_stop:
> > >> +vhost_dev_stop(>dev, vdev, false);
> > >>  err_guest_notifiers:
> > >>  k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > >>  err_host_notifiers:
> > >> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > >> index 3726ee5d67..e3d8036479 100644
> > >> --- a/net/vhost-vdpa.c
> > >> +++ b/net/vhost-vdpa.c
> > >> @@ -381,7 +381,10 @@ static int vhost_vdpa_net_data_load(NetClientState 
> > >> *nc)
> > >>  }
> > >>
> > >>  for (int i = 0; i < v->dev->nvqs; ++i) {
> > >> -vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
> > >> +int ret = vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
> > >> +if (ret < 0) {
> > >> +return ret;
> > >> +}
> > >>  }
> > >>  return 0;
> > >>  }
> > >> @@ -1213,7 +1216,10 @@ static int vhost_vdpa_net_cvq_load(NetClientState 
> > >> *nc)
> > >>
> > >>  assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > >>
> > >> -vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
> > >> +r = vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
> > >> +if (unlikely(r < 0)) {
> > >> +return r;
> > >> +}
> > >>
> > >>  if (v->shadow_vqs_enabled) {
> > >>  n = VIRTIO_NET(v->dev->vdev);
> > >> @@ -1252,7 +1258,10 @@ static int vhost_vdpa_net_cvq_load(NetClientState 
> > 

Re: [PATCH 21/22] plugins: Inline plugin_gen_empty_callback

2024-03-18 Thread Alex Bennée
Richard Henderson  writes:

> Each caller can use tcg_gen_plugin_cb directly.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



Re: [PATCH 2/3 for 9.0] Revert "chardev/char-socket: Fix TLS io channels sending too much data to the backend"

2024-03-18 Thread Marc-André Lureau
Hi

On Mon, Mar 18, 2024 at 10:23 PM Daniel P. Berrangé  wrote:
>
> This commit results in unexpected termination of the TLS connection.
> When 'fd_can_read' returns 0, the code goes on to pass a zero length
> buffer to qio_channel_read. The TLS impl calls into gnutls_recv()
> with this zero length buffer, at which point GNUTLS returns an error
> GNUTLS_E_INVALID_REQUEST. This is treated as fatal by QEMU's TLS code
> resulting in the connection being torn down by the chardev.
>
> Simply skipping the qio_channel_read when the buffer length is zero
> is also not satisfactory, as it results in a high CPU burn busy loop
> massively slowing QEMU's functionality.
>
> The proper solution is to avoid tcp_chr_read being called at all
> unless the frontend is able to accept more data. This will be done
> in a followup commit.
>
> This reverts commit 1907f4d149c3589ade641423c6a33fd7598fa4d3.

Actually 462945cd22d2bcd233401ed3aa167d83a8e35b05 upstream.

>
> Signed-off-by: Daniel P. Berrangé 
> ---
>  chardev/char-socket.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 2c4dffc0e6..812d7aa38a 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -496,9 +496,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, 
> GIOCondition cond, void *opaque)
>  s->max_size <= 0) {
>  return TRUE;
>  }
> -len = tcp_chr_read_poll(opaque);
> -if (len > sizeof(buf)) {
> -len = sizeof(buf);
> +len = sizeof(buf);
> +if (len > s->max_size) {
> +len = s->max_size;
>  }
>  size = tcp_chr_recv(chr, (void *)buf, len);
>  if (size == 0 || (size == -1 && errno != EAGAIN)) {
> --
> 2.43.0
>




Re: [PATCH] vhost-vdpa: check vhost_vdpa_set_vring_ready() return value

2024-03-18 Thread Eugenio Perez Martin
On Wed, Feb 7, 2024 at 10:27 AM Stefano Garzarella  wrote:
>
> vhost_vdpa_set_vring_ready() could already fail, but if Linux's
> patch [1] will be merged, it may fail with more chance if
> userspace does not activate virtqueues before DRIVER_OK when
> VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not negotiated.
>
> So better check its return value anyway.
>
> [1] 
> https://lore.kernel.org/virtualization/20240206145154.118044-1-sgarz...@redhat.com/T/#u
>

If any, I'd split the patches in CVQ and generic vdpa device. But I
don't think it is a big deal so

Acked-by: Eugenio Pérez 

Sorry for the long delay!

> Signed-off-by: Stefano Garzarella 
> ---
> Note: This patch conflicts with [2], but the resolution is simple,
> so for now I sent a patch for the current master, but I'll rebase
> this patch if we merge the other one first.
>
> [2] 
> https://lore.kernel.org/qemu-devel/20240202132521.32714-1-kw...@redhat.com/
> ---
>  hw/virtio/vdpa-dev.c |  8 +++-
>  net/vhost-vdpa.c | 15 ---
>  2 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> index eb9ecea83b..d57cd76c18 100644
> --- a/hw/virtio/vdpa-dev.c
> +++ b/hw/virtio/vdpa-dev.c
> @@ -259,7 +259,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, 
> Error **errp)
>  goto err_guest_notifiers;
>  }
>  for (i = 0; i < s->dev.nvqs; ++i) {
> -vhost_vdpa_set_vring_ready(>vdpa, i);
> +ret = vhost_vdpa_set_vring_ready(>vdpa, i);
> +if (ret < 0) {
> +error_setg_errno(errp, -ret, "Error starting vring %d", i);
> +goto err_dev_stop;
> +}
>  }
>  s->started = true;
>
> @@ -274,6 +278,8 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, 
> Error **errp)
>
>  return ret;
>
> +err_dev_stop:
> +vhost_dev_stop(>dev, vdev, false);
>  err_guest_notifiers:
>  k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>  err_host_notifiers:
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 3726ee5d67..e3d8036479 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -381,7 +381,10 @@ static int vhost_vdpa_net_data_load(NetClientState *nc)
>  }
>
>  for (int i = 0; i < v->dev->nvqs; ++i) {
> -vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
> +int ret = vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
> +if (ret < 0) {
> +return ret;
> +}
>  }
>  return 0;
>  }
> @@ -1213,7 +1216,10 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc)
>
>  assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>
> -vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
> +r = vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
> +if (unlikely(r < 0)) {
> +return r;
> +}
>
>  if (v->shadow_vqs_enabled) {
>  n = VIRTIO_NET(v->dev->vdev);
> @@ -1252,7 +1258,10 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc)
>  }
>
>  for (int i = 0; i < v->dev->vq_index; ++i) {
> -vhost_vdpa_set_vring_ready(v, i);
> +r = vhost_vdpa_set_vring_ready(v, i);
> +if (unlikely(r < 0)) {
> +return r;
> +}
>  }
>
>  return 0;
> --
> 2.43.0
>




Re: [PATCH v2] virtio-blk: iothread-vq-mapping coroutine pool sizing

2024-03-18 Thread Stefan Hajnoczi
On Tue, Mar 12, 2024 at 11:12:04AM -0400, Stefan Hajnoczi wrote:
> It is possible to hit the sysctl vm.max_map_count limit when the
> coroutine pool size becomes large. Each coroutine requires two mappings
> (one for the stack and one for the guard page). QEMU can crash with
> "failed to set up stack guard page" or "failed to allocate memory for
> stack" when this happens.
> 
> Coroutine pool sizing is simple when there is only thread: sum up all
> I/O requests across all virtqueues.
> 
> When the iothread-vq-mapping option is used we should calculate tighter
> bounds because thread may serve a subset of the device's virtqueues:
> take the maximum number of the number of I/O requests across all
> virtqueues. A thread does not need coroutine pool space for I/O requests
> that are handled by other threads.
> 
> This is not a solution to hitting vm.max_map_count, but it helps. A
> guest with 64 vCPUs (hence 64 virtqueues) across 4 IOThreads with one
> iothread-vq-mapping virtio-blk device and a root disk without goes from
> pool_max_size 16,448 to 10,304.
> 
> Reported-by: Sanjay Rao 
> Reported-by: Boaz Ben Shabat 
> Reported-by: Joe Mario 
> Signed-off-by: Stefan Hajnoczi 
> ---
> v2:
> - State the the tighter bounds reflect the fact that threads may only
>   process a subset of the total I/O requests from a device [Kevin]
> - Add Reported-by: Joe Mario, he has been investigating this issue.

I have sent a new patch series that obsoletes this patch. Please do not
apply this patch.

The new series is here:
https://lore.kernel.org/qemu-devel/20240318183429.1039340-1-stefa...@redhat.com/T/#u

> 
>  include/hw/virtio/virtio-blk.h |  2 ++
>  hw/block/virtio-blk.c  | 34 --
>  2 files changed, 34 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> index 5c14110c4b..ac29700ad4 100644
> --- a/include/hw/virtio/virtio-blk.h
> +++ b/include/hw/virtio/virtio-blk.h
> @@ -74,6 +74,8 @@ struct VirtIOBlock {
>  uint64_t host_features;
>  size_t config_size;
>  BlockRAMRegistrar blk_ram_registrar;
> +
> +unsigned coroutine_pool_size;
>  };
>  
>  typedef struct VirtIOBlockReq {
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 738cb2ac36..0a14b2b175 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -1957,6 +1957,35 @@ static void virtio_blk_stop_ioeventfd(VirtIODevice 
> *vdev)
>  s->ioeventfd_stopping = false;
>  }
>  
> +/* Increase the coroutine pool size to include our I/O requests */
> +static void virtio_blk_inc_coroutine_pool_size(VirtIOBlock *s)
> +{
> +VirtIOBlkConf *conf = >conf;
> +unsigned max_requests = 0;
> +
> +/* Tracks the total number of requests for AioContext */
> +g_autoptr(GHashTable) counters = g_hash_table_new(NULL, NULL);
> +
> +/* Call this function after setting up vq_aio_context[] */
> +assert(s->vq_aio_context);
> +
> +for (unsigned i = 0; i < conf->num_queues; i++) {
> +AioContext *ctx = s->vq_aio_context[i];
> +unsigned n = GPOINTER_TO_UINT(g_hash_table_lookup(counters, ctx));
> +
> +n += conf->queue_size / 2; /* this is a heuristic */
> +
> +g_hash_table_insert(counters, ctx, GUINT_TO_POINTER(n));
> +
> +if (n > max_requests) {
> +max_requests = n;
> +}
> +}
> +
> +qemu_coroutine_inc_pool_size(max_requests);
> +s->coroutine_pool_size = max_requests; /* stash it for ->unrealize() */
> +}
> +
>  static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
>  {
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -2048,7 +2077,6 @@ static void virtio_blk_device_realize(DeviceState *dev, 
> Error **errp)
>  for (i = 0; i < conf->num_queues; i++) {
>  virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
>  }
> -qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2);
>  
>  /* Don't start ioeventfd if transport does not support notifiers. */
>  if (!virtio_device_ioeventfd_enabled(vdev)) {
> @@ -2065,6 +2093,8 @@ static void virtio_blk_device_realize(DeviceState *dev, 
> Error **errp)
>  return;
>  }
>  
> +virtio_blk_inc_coroutine_pool_size(s);
> +
>  /*
>   * This must be after virtio_init() so virtio_blk_dma_restart_cb() gets
>   * called after ->start_ioeventfd() has already set blk's AioContext.
> @@ -2096,7 +2126,7 @@ static void virtio_blk_device_unrealize(DeviceState 
> *dev)
>  for (i = 0; i < conf->num_queues; i++) {
>  virtio_del_queue(vdev, i);
>  }
> -qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
> +qemu_coroutine_dec_pool_size(s->coroutine_pool_size);
>  qemu_mutex_destroy(>rq_lock);
>  blk_ram_registrar_destroy(>blk_ram_registrar);
>  qemu_del_vm_change_state_handler(s->change);
> -- 
> 2.44.0
> 


signature.asc
Description: PGP signature


[PATCH] coroutine: cap per-thread local pool size

2024-03-18 Thread Stefan Hajnoczi
The coroutine pool implementation can hit the Linux vm.max_map_count
limit, causing QEMU to abort with "failed to allocate memory for stack"
or "failed to set up stack guard page" during coroutine creation.

This happens because per-thread pools can grow to tens of thousands of
coroutines. Each coroutine causes 2 virtual memory areas to be created.
Eventually vm.max_map_count is reached and memory-related syscalls fail.
The per-thread pool sizes are non-uniform and depend on past coroutine
usage in each thread, so it's possible for one thread to have a large
pool while another thread's pool is empty.

Switch to a new coroutine pool implementation with a global pool that
grows to a maximum number of coroutines and per-thread local pools that
are capped at hardcoded small number of coroutines.

This approach does not leave large numbers of coroutines pooled in a
thread that may not use them again. In order to perform well it
amortizes the cost of global pool accesses by working in batches of
coroutines instead of individual coroutines.

The global pool is a list. Threads donate batches of coroutines to when
they have too many and take batches from when they have too few:

.---.
| Batch 1 | Batch 2 | Batch 3 | ... | global_pool
`---'

Each thread has up to 2 batches of coroutines:

.---.
| Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
`---'

The goal of this change is to reduce the excessive number of pooled
coroutines that cause QEMU to abort when vm.max_map_count is reached
without losing the performance of an adequately sized coroutine pool.

Here are virtio-blk disk I/O benchmark results:

  RW BLKSIZE IODEPTHOLDNEW CHANGE
randread  4k   1 113725 117451 +3.3%
randread  4k   8 192968 198510 +2.9%
randread  4k  16 207138 209429 +1.1%
randread  4k  32 212399 215145 +1.3%
randread  4k  64 218319 221277 +1.4%
randread128k   1  17587  17535 -0.3%
randread128k   8  17614  17616 +0.0%
randread128k  16  17608  17609 +0.0%
randread128k  32  17552  17553 +0.0%
randread128k  64  17484  17484 +0.0%

See files/{fio.sh,test.xml.j2} for the benchmark configuration:
https://gitlab.com/stefanha/virt-playbooks/-/tree/coroutine-pool-fix-sizing

Buglink: https://issues.redhat.com/browse/RHEL-28947
Reported-by: Sanjay Rao 
Reported-by: Boaz Ben Shabat 
Reported-by: Joe Mario 
Signed-off-by: Stefan Hajnoczi 
---
This patch obsoletes "[PATCH v2] virtio-blk: iothread-vq-mapping
coroutine pool sizing" because the pool size is now global instead of
per thread:
https://lore.kernel.org/qemu-devel/20240312151204.412624-1-stefa...@redhat.com/

Please don't apply "[PATCH v2] virtio-blk: iothread-vq-mapping coroutine
pool sizing".

 util/qemu-coroutine.c | 282 +-
 1 file changed, 223 insertions(+), 59 deletions(-)

diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 5fd2dbaf8b..2790959eaf 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -18,39 +18,200 @@
 #include "qemu/atomic.h"
 #include "qemu/coroutine_int.h"
 #include "qemu/coroutine-tls.h"
+#include "qemu/cutils.h"
 #include "block/aio.h"
 
-/**
- * The minimal batch size is always 64, coroutines from the release_pool are
- * reused as soon as there are 64 coroutines in it. The maximum pool size 
starts
- * with 64 and is increased on demand so that coroutines are not deleted even 
if
- * they are not immediately reused.
- */
 enum {
-POOL_MIN_BATCH_SIZE = 64,
-POOL_INITIAL_MAX_SIZE = 64,
+COROUTINE_POOL_BATCH_MAX_SIZE = 128,
 };
 
-/** Free list to speed up creation */
-static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
-static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
-static unsigned int release_pool_size;
+/*
+ * Coroutine creation and deletion is expensive so a pool of unused coroutines
+ * is kept as a cache. When the pool has coroutines available, they are
+ * recycled instead of creating new ones from scratch. Coroutines are added to
+ * the pool upon termination.
+ *
+ * The pool is global but each thread maintains a small local pool to avoid
+ * global pool contention. Threads fetch and return batches of coroutines from
+ * the global pool to maintain their local pool. The local pool holds up to two
+ * batches whereas the maximum size of the global pool is controlled by the
+ * qemu_coroutine_inc_pool_size() API.
+ *
+ * .---.
+ * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool
+ * `---'
+ *
+ * .---.
+ * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
+ * `---'
+ */
+typedef struct CoroutinePoolBatch {
+/* Batches are kept in a list */
+QSLIST_ENTRY(CoroutinePoolBatch) next;
 
-typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;

[PATCH 3/3 for 9.0] Revert "chardev: use a child source for qio input source"

2024-03-18 Thread Daniel P . Berrangé
This reverts commit a7077b8e354d90fec26c2921aa2dea85b90dff90,
and add comments to explain why child sources cannot be used.

When a GSource is added as a child of another GSource, if its
'prepare' function indicates readiness, then the parent's
'prepare' function will never be run. The io_watch_poll_prepare
absolutely *must* be run on every iteration of the main loop,
to ensure that the chardev backend doesn't feed data to the
frontend that it is unable to consume.

At the time a7077b8e354d90fec26c2921aa2dea85b90dff90 was made,
all the child GSource impls were relying on poll'ing an FD,
so their 'prepare' functions would never indicate readiness
ahead of poll() being invoked. So the buggy behaviour was
not noticed and lay dormant.

Relatively recently the QIOChannelTLS impl introduced a
level 2 child GSource, which checks with GNUTLS whether it
has cached any data that was decoded but not yet consumed:

  commit ffda5db65aef42266a5053a4be34515106c4c7ee
  Author: Antoine Damhet 
  Date:   Tue Nov 15 15:23:29 2022 +0100

io/channel-tls: fix handling of bigger read buffers

Since the TLS backend can read more data from the underlying QIOChannel
we introduce a minimal child GSource to notify if we still have more
data available to be read.

Signed-off-by: Antoine Damhet 
Signed-off-by: Charles Frey 
Signed-off-by: Daniel P. Berrangé 

With this, it is now quite common for the 'prepare' function
on a QIOChannelTLS GSource to indicate immediate readiness,
bypassing the parent GSource 'prepare' function. IOW, the
critical 'io_watch_poll_prepare' is being skipped on some
iterations of the main loop. As a result chardev frontend
asserts are now being triggered as they are fed data they
are not ready to consume.

A reproducer is as follows:

 * In terminal 1 run a GNUTLS *echo* server

   $ gnutls-serv --echo \
 --x509cafile ca-cert.pem \
 --x509keyfile server-key.pem \
 --x509certfile server-cert.pem \
 -p 9000

 * In terminal 2 run a QEMU guest

   $ qemu-system-s390x \
   -nodefaults \
   -display none \
   -object tls-creds-x509,id=tls0,dir=$PWD,endpoint=client \
   -chardev socket,id=con0,host=localhost,port=9000,tls-creds=tls0 \
   -device sclpconsole,chardev=con0 \
   -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2

After the previous patch revert, but before this patch revert,
this scenario will crash:

  qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion
  `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed.

This assert indicates that 'tcp_chr_read' was called without
'tcp_chr_read_poll' having first been checked for ability to
receive more data

QEMU's use of a 'prepare' function to create/delete another
GSource is rather a hack and not normally the kind of thing that
is expected to be done by a GSource. There is no mechanism to
force GLib to always run the 'prepare' function of a parent
GSource. The best option is to simply not use the child source
concept, and go back to the functional approach previously
relied on.

Signed-off-by: Daniel P. Berrangé 
---
 chardev/char-io.c | 55 ++-
 1 file changed, 50 insertions(+), 5 deletions(-)

diff --git a/chardev/char-io.c b/chardev/char-io.c
index 4451128cba..3c725f530b 100644
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -33,6 +33,7 @@ typedef struct IOWatchPoll {
 IOCanReadHandler *fd_can_read;
 GSourceFunc fd_read;
 void *opaque;
+GMainContext *context;
 } IOWatchPoll;
 
 static IOWatchPoll *io_watch_poll_from_source(GSource *source)
@@ -50,28 +51,58 @@ static gboolean io_watch_poll_prepare(GSource *source,
 return FALSE;
 }
 
+/*
+ * We do not register the QIOChannel watch as a child GSource.
+ * The 'prepare' function on the parent GSource will be
+ * skipped if a child GSource's 'prepare' function indicates
+ * readiness. We need this prepare function be guaranteed
+ * to run on *every* iteration of the main loop, because
+ * it is critical to ensure we remove the QIOChannel watch
+ * if 'fd_can_read' indicates the frontend cannot receive
+ * more data.
+ */
 if (now_active) {
 iwp->src = qio_channel_create_watch(
 iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
 g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
-g_source_add_child_source(source, iwp->src);
-g_source_unref(iwp->src);
+g_source_attach(iwp->src, iwp->context);
 } else {
-g_source_remove_child_source(source, iwp->src);
+g_source_destroy(iwp->src);
+g_source_unref(iwp->src);
 iwp->src = NULL;
 }
 return FALSE;
 }
 
+static gboolean io_watch_poll_check(GSource *source)
+{
+return FALSE;
+}
+
 static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
gpointer 

[PATCH 1/3 for 9.0] chardev: lower priority of the HUP GSource in socket chardev

2024-03-18 Thread Daniel P . Berrangé
The socket chardev often has 2 GSource object registered against the
same FD. One is registered all the time and is just intended to handle
POLLHUP events, while the other gets registered & unregistered on the
fly as the frontend is ready to receive more data or not.

It is very common for poll() to signal a POLLHUP event at the same time
as there is pending incoming data from the disconnected client. It is
therefore essential to process incoming data prior to processing HUP.
The problem with having 2 GSource on the same FD is that there is no
guaranteed ordering of execution between them, so the chardev code may
process HUP first and thus discard data.

This failure scenario is non-deterministic but can be seen fairly
reliably by reverting a7077b8e354d90fec26c2921aa2dea85b90dff90, and
then running 'tests/unit/test-char', which will sometimes fail with
missing data.

Ideally QEMU would only have 1 GSource, but that's a complex code
refactoring job. The next best solution is to try to ensure ordering
between the 2 GSource objects. This can be achieved by lowering the
priority of the HUP GSource, so that it is never dispatched if the
main GSource is also ready to dispatch. Counter-intuitively, lowering
the priority of a GSource is done by raising its priority number.

Signed-off-by: Daniel P. Berrangé 
---
 chardev/char-socket.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 8a0406cc1e..2c4dffc0e6 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -601,6 +601,22 @@ static void update_ioc_handlers(SocketChardev *s)
 
 remove_hup_source(s);
 s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
+/*
+ * poll() is liable to return POLLHUP even when there is
+ * still incoming data available to read on the FD. If
+ * we have the hup_source at the same priority as the
+ * main io_add_watch_poll GSource, then we might end up
+ * processing the POLLHUP event first, closing the FD,
+ * and as a result silently discard data we should have
+ * read.
+ *
+ * By setting the hup_source to G_PRIORITY_DEFAULT + 1,
+ * we ensure that io_add_watch_poll GSource will always
+ * be dispatched first, thus guaranteeing we will be
+ * able to process all incoming data before closing the
+ * FD
+ */
+g_source_set_priority(s->hup_source, G_PRIORITY_DEFAULT + 1);
 g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
   chr, NULL);
 g_source_attach(s->hup_source, chr->gcontext);
-- 
2.43.0




[PATCH 0/3 for 9.0] Fix TLS support for chardevs and incoming data loss on EOF

2024-03-18 Thread Daniel P . Berrangé
This fixes a problem with TLS support on chardevs that Thomas has
previously attempted to deal with:

  https://lists.nongnu.org/archive/html/qemu-devel/2024-02/msg06915.html

Unfortunately that fix caused unexpected side effects that resulted
in premature termination of the TLS connection. See patch 2 for
details.

I've since identified the root cause of the problem that Thomas was
trying to fix - bad assumptions about GSource 'prepare' functions
always being run. See patch 3 for details.

Patch 3 re-exposed a bug we've know about for a while whereby incoming
data on chardevs is sometimes discarded when POLLHUP is reported at the
same time. This required patch 1 to be applied before doing the revert
in patch 3, otherwise test-char would now very frequently fail.

So we get 2 bug fixes for the price of one :-)

Daniel P. Berrangé (3):
  chardev: lower priority of the HUP GSource in socket chardev
  Revert "chardev/char-socket: Fix TLS io channels sending too much data
to the backend"
  Revert "chardev: use a child source for qio input source"

 chardev/char-io.c | 55 +++
 chardev/char-socket.c | 22 ++---
 2 files changed, 69 insertions(+), 8 deletions(-)

-- 
2.43.0




[PATCH 2/3 for 9.0] Revert "chardev/char-socket: Fix TLS io channels sending too much data to the backend"

2024-03-18 Thread Daniel P . Berrangé
This commit results in unexpected termination of the TLS connection.
When 'fd_can_read' returns 0, the code goes on to pass a zero length
buffer to qio_channel_read. The TLS impl calls into gnutls_recv()
with this zero length buffer, at which point GNUTLS returns an error
GNUTLS_E_INVALID_REQUEST. This is treated as fatal by QEMU's TLS code
resulting in the connection being torn down by the chardev.

Simply skipping the qio_channel_read when the buffer length is zero
is also not satisfactory, as it results in a high CPU burn busy loop
massively slowing QEMU's functionality.

The proper solution is to avoid tcp_chr_read being called at all
unless the frontend is able to accept more data. This will be done
in a followup commit.

This reverts commit 1907f4d149c3589ade641423c6a33fd7598fa4d3.

Signed-off-by: Daniel P. Berrangé 
---
 chardev/char-socket.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 2c4dffc0e6..812d7aa38a 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -496,9 +496,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition 
cond, void *opaque)
 s->max_size <= 0) {
 return TRUE;
 }
-len = tcp_chr_read_poll(opaque);
-if (len > sizeof(buf)) {
-len = sizeof(buf);
+len = sizeof(buf);
+if (len > s->max_size) {
+len = s->max_size;
 }
 size = tcp_chr_recv(chr, (void *)buf, len);
 if (size == 0 || (size == -1 && errno != EAGAIN)) {
-- 
2.43.0




Re: [PATCH v2] ppc/pnv: I2C controller is not user creatable

2024-03-18 Thread Miles Glenn
On Mon, 2024-03-18 at 16:58 +0100, Cédric Le Goater wrote:

Thanks for fixing that!

-Glenn

Reviewed-by: Glenn Miles 

> The I2C controller is a subunit of the processor. Make it so and
> avoid
> QEMU crashes.
> 
>   $ build/qemu-system-ppc64 -S -machine powernv9 -device pnv-i2c
>   qemu-system-ppc64: ../hw/ppc/pnv_i2c.c:521: pnv_i2c_realize:
> Assertion `i2c->chip' failed.
>   Aborted (core dumped)
> 
> Fixes: 263b81ee15af ("ppc/pnv: Add an I2C controller model")
> Cc: Glenn Miles 
> Reported-by: Thomas Huth 
> Reviewed-by: Thomas Huth 
> Signed-off-by: Cédric Le Goater 
> ---
>  hw/ppc/pnv_i2c.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/hw/ppc/pnv_i2c.c b/hw/ppc/pnv_i2c.c
> index
> 4581cc5e5d4645ab3e358d983a633e33a214c425..eec5047ce83f842108b53a6e2bd
> 9869a81f14ac1 100644
> --- a/hw/ppc/pnv_i2c.c
> +++ b/hw/ppc/pnv_i2c.c
> @@ -557,6 +557,9 @@ static void pnv_i2c_class_init(ObjectClass
> *klass, void *data)
> 
>  xscomc->dt_xscom = pnv_i2c_dt_xscom;
> 
> +/* Reason: This device is part of the CPU and cannot be used
> separately */
> +dc->user_creatable = false;
> +
>  dc->desc = "PowerNV I2C";
>  dc->realize = pnv_i2c_realize;
>  device_class_set_props(dc, pnv_i2c_properties);




[PULL 0/4] s390x and misc patches for 9.0-rc0

2024-03-18 Thread Thomas Huth
 Hi Peter!

The following changes since commit ba49d760eb04630e7b15f423ebecf6c871b8f77b:

  Merge tag 'pull-maintainer-final-130324-1' of https://gitlab.com/stsquad/qemu 
into staging (2024-03-13 15:12:14 +)

are available in the Git repository at:

  https://gitlab.com/thuth/qemu.git tags/pull-request-2024-03-18

for you to fetch changes up to aebe0a8552e8d419c8103e60e593f2778eab41c4:

  travis-ci: Rename SOFTMMU -> SYSTEM (2024-03-18 17:18:05 +0100)


* Clarify s390x CPU topology docs and CPU compatibility error messages
* Improve the Sparc CPU help text
* Rename SOFTMMU to SYSTEM in the travis.yml file


Claudio Fontana (2):
  docs/s390: clarify even more that cpu-topology is KVM-only
  target/s390x: improve cpu compatibility check error message

Philippe Mathieu-Daudé (1):
  travis-ci: Rename SOFTMMU -> SYSTEM

Thomas Huth (1):
  target/sparc/cpu: Improve the CPU help text

 docs/system/introduction.rst   |  2 ++
 docs/system/s390x/cpu-topology.rst | 14 --
 target/s390x/cpu_models.c  | 22 +++---
 target/sparc/cpu.c |  5 +++--
 .travis.yml|  8 
 5 files changed, 32 insertions(+), 19 deletions(-)




[PULL 1/4] docs/s390: clarify even more that cpu-topology is KVM-only

2024-03-18 Thread Thomas Huth
From: Claudio Fontana 

At least for now cpu-topology is implemented only for KVM.

We already say this, but this tries to be more explicit,
and also show it in the examples.

This adds a new reference in the introduction that we can point to,
whenever we need to reference accelerators and how to select them.

Signed-off-by: Claudio Fontana 
Message-ID: <20240314172218.16478-1-cfont...@suse.de>
Reviewed-by: Nina Schoetterl-Glausch 
Tested-by: Nina Schoetterl-Glausch 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Thomas Huth 
---
 docs/system/introduction.rst   |  2 ++
 docs/system/s390x/cpu-topology.rst | 14 --
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/docs/system/introduction.rst b/docs/system/introduction.rst
index 51ac132d6c..746707eb00 100644
--- a/docs/system/introduction.rst
+++ b/docs/system/introduction.rst
@@ -1,6 +1,8 @@
 Introduction
 
 
+.. _Accelerators:
+
 Virtualisation Accelerators
 ---
 
diff --git a/docs/system/s390x/cpu-topology.rst 
b/docs/system/s390x/cpu-topology.rst
index 5133fdc362..d5b506ee5c 100644
--- a/docs/system/s390x/cpu-topology.rst
+++ b/docs/system/s390x/cpu-topology.rst
@@ -25,17 +25,19 @@ monitor polarization changes, see 
``docs/devel/s390-cpu-topology.rst``.
 Prerequisites
 -
 
-To use the CPU topology, you need to run with KVM on a s390x host that
-uses the Linux kernel v6.0 or newer (which provide the so-called
+To use the CPU topology, you currently need to choose the KVM accelerator.
+See :ref:`Accelerators` for more details about accelerators and how to select 
them.
+
+The s390x host needs to use a Linux kernel v6.0 or newer (which provides the 
so-called
 ``KVM_CAP_S390_CPU_TOPOLOGY`` capability that allows QEMU to signal the
 CPU topology facility via the so-called STFLE bit 11 to the VM).
 
 Enabling CPU topology
 -
 
-Currently, CPU topology is only enabled in the host model by default.
+Currently, CPU topology is enabled by default only in the "host" CPU model.
 
-Enabling CPU topology in a CPU model is done by setting the CPU flag
+Enabling CPU topology in another CPU model is done by setting the CPU flag
 ``ctop`` to ``on`` as in:
 
 .. code-block:: bash
@@ -132,7 +134,7 @@ In the following machine we define 8 sockets with 4 cores 
each.
 
 .. code-block:: bash
 
-  $ qemu-system-s390x -m 2G \
+  $ qemu-system-s390x -accel kvm -m 2G \
 -cpu gen16b,ctop=on \
 -smp cpus=5,sockets=8,cores=4,maxcpus=32 \
 -device host-s390x-cpu,core-id=14 \
@@ -227,7 +229,7 @@ with vertical high entitlement.
 
 .. code-block:: bash
 
-  $ qemu-system-s390x -m 2G \
+  $ qemu-system-s390x -accel kvm -m 2G \
 -cpu gen16b,ctop=on \
 -smp cpus=1,sockets=8,cores=4,maxcpus=32 \
 \
-- 
2.44.0




[PULL 2/4] target/s390x: improve cpu compatibility check error message

2024-03-18 Thread Thomas Huth
From: Claudio Fontana 

some users were confused by this message showing under TCG:

 Selected CPU generation is too new. Maximum supported model
 in the configuration: 'xyz'

Clarify that the maximum can depend on the accel, and add a
hint to try a different one.

Also add a hint for features mismatch to suggest trying
different accel, QEMU and kernel versions.

Signed-off-by: Claudio Fontana 
Message-ID: <20240314213746.27163-1-cfont...@suse.de>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Nina Schoetterl-Glausch 
Signed-off-by: Thomas Huth 
---
 target/s390x/cpu_models.c | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 1a1c096122..8ed3bb6a27 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -500,6 +500,16 @@ static void error_prepend_missing_feat(const char *name, 
void *opaque)
 error_prepend((Error **) opaque, "%s ", name);
 }
 
+static void check_compat_model_failed(Error **errp,
+  const S390CPUModel *max_model,
+  const char *msg)
+{
+error_setg(errp, "%s. Maximum supported model in the current 
configuration: \'%s\'",
+   msg, max_model->def->name);
+error_append_hint(errp, "Consider a different accelerator, try \"-accel 
help\"\n");
+return;
+}
+
 static void check_compatibility(const S390CPUModel *max_model,
 const S390CPUModel *model, Error **errp)
 {
@@ -507,15 +517,11 @@ static void check_compatibility(const S390CPUModel 
*max_model,
 S390FeatBitmap missing;
 
 if (model->def->gen > max_model->def->gen) {
-error_setg(errp, "Selected CPU generation is too new. Maximum "
-   "supported model in the configuration: \'%s\'",
-   max_model->def->name);
+check_compat_model_failed(errp, max_model, "Selected CPU generation is 
too new");
 return;
 } else if (model->def->gen == max_model->def->gen &&
model->def->ec_ga > max_model->def->ec_ga) {
-error_setg(errp, "Selected CPU GA level is too new. Maximum "
-   "supported model in the configuration: \'%s\'",
-   max_model->def->name);
+check_compat_model_failed(errp, max_model, "Selected CPU GA level is 
too new");
 return;
 }
 
@@ -537,7 +543,9 @@ static void check_compatibility(const S390CPUModel 
*max_model,
 error_setg(errp, " ");
 s390_feat_bitmap_to_ascii(missing, errp, error_prepend_missing_feat);
 error_prepend(errp, "Some features requested in the CPU model are not "
-  "available in the configuration: ");
+  "available in the current configuration: ");
+error_append_hint(errp,
+  "Consider a different accelerator, QEMU, or kernel 
version\n");
 }
 
 S390CPUModel *get_max_cpu_model(Error **errp)
-- 
2.44.0




[PULL 3/4] target/sparc/cpu: Improve the CPU help text

2024-03-18 Thread Thomas Huth
Remove the unnecessary "Sparc" at the beginning of the line and
put the chip information into parentheses so that it is clearer
which part of the line have to be passed to "-cpu" to specify a
different CPU.

Message-ID: <20240307174334.130407-4-th...@redhat.com>
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Thomas Huth 
---
 target/sparc/cpu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index dc9ead21fc..e820f50acf 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -574,9 +574,10 @@ void sparc_cpu_list(void)
 {
 unsigned int i;
 
+qemu_printf("Available CPU types:\n");
 for (i = 0; i < ARRAY_SIZE(sparc_defs); i++) {
-qemu_printf("Sparc %16s IU " TARGET_FMT_lx
-" FPU %08x MMU %08x NWINS %d ",
+qemu_printf(" %-20s (IU " TARGET_FMT_lx
+" FPU %08x MMU %08x NWINS %d) ",
 sparc_defs[i].name,
 sparc_defs[i].iu_version,
 sparc_defs[i].fpu_version,
-- 
2.44.0




[PULL 4/4] travis-ci: Rename SOFTMMU -> SYSTEM

2024-03-18 Thread Thomas Huth
From: Philippe Mathieu-Daudé 

Since we *might* have user emulation with softmmu,
rename MAIN_SOFTMMU_TARGETS as MAIN_SYSTEM_TARGETS
to express 'system emulation targets'.

Signed-off-by: Philippe Mathieu-Daudé 
Message-ID: <20240313213339.82071-3-phi...@linaro.org>
Reviewed-by: Thomas Huth 
Reviewed-by: Richard Henderson 
Signed-off-by: Thomas Huth 
---
 .travis.yml | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 76859d48da..597d151b80 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ env:
 - TEST_BUILD_CMD=""
 - TEST_CMD="make check V=1"
 # This is broadly a list of "mainline" system targets which have support 
across the major distros
-- 
MAIN_SOFTMMU_TARGETS="aarch64-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu"
+- 
MAIN_SYSTEM_TARGETS="aarch64-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu"
 - CCACHE_SLOPPINESS="include_file_ctime,include_file_mtime"
 - CCACHE_MAXSIZE=1G
 - G_MESSAGES_DEBUG=error
@@ -114,7 +114,7 @@ jobs:
   env:
 - TEST_CMD="make check check-tcg V=1"
 - CONFIG="--disable-containers --enable-fdt=system
-  --target-list=${MAIN_SOFTMMU_TARGETS} --cxx=/bin/false"
+  --target-list=${MAIN_SYSTEM_TARGETS} --cxx=/bin/false"
 - UNRELIABLE=true
 
 - name: "[ppc64] GCC check-tcg"
@@ -185,7 +185,7 @@ jobs:
   env:
 - TEST_CMD="make check check-tcg V=1"
 - CONFIG="--disable-containers --enable-fdt=system
-  --target-list=${MAIN_SOFTMMU_TARGETS},s390x-linux-user"
+  --target-list=${MAIN_SYSTEM_TARGETS},s390x-linux-user"
 - UNRELIABLE=true
   script:
 - BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$?
@@ -226,7 +226,7 @@ jobs:
   - genisoimage
   env:
 - CONFIG="--disable-containers --enable-fdt=system --audio-drv-list=sdl
-  --disable-user --target-list-exclude=${MAIN_SOFTMMU_TARGETS}"
+  --disable-user --target-list-exclude=${MAIN_SYSTEM_TARGETS}"
 
 - name: "[s390x] GCC (user)"
   arch: s390x
-- 
2.44.0




Re: [PATCH 20/22] plugins: Move qemu_plugin_insn_cleanup_fn to tcg.c

2024-03-18 Thread Alex Bennée
Richard Henderson  writes:

> This is only used in one place, and usage requires an
> out-of-line function.
>
> Signed-off-by: Richard Henderson 
> ---
>  include/qemu/plugin.h | 12 
>  tcg/tcg.c | 12 
>  2 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
> index 07b1755990..201889cbee 100644
> --- a/include/qemu/plugin.h
> +++ b/include/qemu/plugin.h
> @@ -116,18 +116,6 @@ struct qemu_plugin_scoreboard {
>  QLIST_ENTRY(qemu_plugin_scoreboard) entry;
>  };
>  
> -/*
> - * qemu_plugin_insn allocate and cleanup functions. We don't expect to
> - * cleanup many of these structures. They are reused for each fresh
> - * translation.
> - */
> -
> -static inline void qemu_plugin_insn_cleanup_fn(gpointer data)
> -{
> -struct qemu_plugin_insn *insn = (struct qemu_plugin_insn *) data;
> -g_byte_array_free(insn->data, true);
> -}
> -
>  /* Internal context for this TranslationBlock */
>  struct qemu_plugin_tb {
>  GPtrArray *insns;
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index d248c52e96..d7abc514c4 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -761,6 +761,18 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, 
> tlb.f[0]) -
>< MIN_TLB_MASK_TABLE_OFS);
>  #endif
>  
> +#ifdef CONFIG_PLUGIN
> +/*
> + * We don't expect to cleanup many of these structures.
> + * They are reused for each fresh translation.
> + */
> +static void qemu_plugin_insn_cleanup_fn(gpointer data)
> +{
> +struct qemu_plugin_insn *insn = (struct qemu_plugin_insn *) data;
> +g_byte_array_free(insn->data, true);
> +}
> +#endif
> +

You could expand the ifdef to the next function and make an alternate
empty alloc_tcg_plugin_context. Alternatively maybe we should consider
dropping the CONFIG_PLUGIN gymnastics and make it a first class TCG
feature?

Is the null case still visible on the code generation benchmarks? Does
anyone using TCG actually --disable-plugins?

Anyway:

Reviewed-by: Alex Bennée 


>  static void alloc_tcg_plugin_context(TCGContext *s)
>  {
>  #ifdef CONFIG_PLUGIN

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



RE: [EXTERNAL] [PATCH v3 for 9.1 5/6] vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits

2024-03-18 Thread Srujana Challa
> Subject: [EXTERNAL] [PATCH v3 for 9.1 5/6] vhost/vhost-user: Add
> VIRTIO_F_NOTIFICATION_DATA to vhost feature bits
> 
> Prioritize security for external emails: Confirm sender and content safety
> before clicking links or opening attachments
> 
> --
> Add support for the VIRTIO_F_NOTIFICATION_DATA feature across a variety of
> vhost devices.
> 
> The inclusion of VIRTIO_F_NOTIFICATION_DATA in the feature bits arrays for
> these devices ensures that the backend is capable of offering and providing
> support for this feature, and that it can be disabled if the backend does not
> support it.
> 
> Tested-by: Lei Yang 
> Reviewed-by: Eugenio Pérez 
> Signed-off-by: Jonah Palmer 
> ---
Acked-by: Srujana Challa 

>  hw/block/vhost-user-blk.c| 1 +
>  hw/net/vhost_net.c   | 2 ++
>  hw/scsi/vhost-scsi.c | 1 +
>  hw/scsi/vhost-user-scsi.c| 1 +
>  hw/virtio/vhost-user-fs.c| 2 +-
>  hw/virtio/vhost-user-vsock.c | 1 +
>  net/vhost-vdpa.c | 1 +
>  7 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index
> 6a856ad51a..983c0657da 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -51,6 +51,7 @@ static const int user_feature_bits[] = {
>  VIRTIO_F_RING_PACKED,
>  VIRTIO_F_IOMMU_PLATFORM,
>  VIRTIO_F_RING_RESET,
> +VIRTIO_F_NOTIFICATION_DATA,
>  VHOST_INVALID_FEATURE_BIT
>  };
> 
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index
> e8e1661646..bb1f975b39 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -48,6 +48,7 @@ static const int kernel_feature_bits[] = {
>  VIRTIO_F_IOMMU_PLATFORM,
>  VIRTIO_F_RING_PACKED,
>  VIRTIO_F_RING_RESET,
> +VIRTIO_F_NOTIFICATION_DATA,
>  VIRTIO_NET_F_HASH_REPORT,
>  VHOST_INVALID_FEATURE_BIT
>  };
> @@ -55,6 +56,7 @@ static const int kernel_feature_bits[] = {
>  /* Features supported by others. */
>  static const int user_feature_bits[] = {
>  VIRTIO_F_NOTIFY_ON_EMPTY,
> +VIRTIO_F_NOTIFICATION_DATA,
>  VIRTIO_RING_F_INDIRECT_DESC,
>  VIRTIO_RING_F_EVENT_IDX,
> 
> diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index
> ae26bc19a4..3d5fe0994d 100644
> --- a/hw/scsi/vhost-scsi.c
> +++ b/hw/scsi/vhost-scsi.c
> @@ -38,6 +38,7 @@ static const int kernel_feature_bits[] = {
>  VIRTIO_RING_F_EVENT_IDX,
>  VIRTIO_SCSI_F_HOTPLUG,
>  VIRTIO_F_RING_RESET,
> +VIRTIO_F_NOTIFICATION_DATA,
>  VHOST_INVALID_FEATURE_BIT
>  };
> 
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index
> a63b1f4948..0b050805a8 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -36,6 +36,7 @@ static const int user_feature_bits[] = {
>  VIRTIO_RING_F_EVENT_IDX,
>  VIRTIO_SCSI_F_HOTPLUG,
>  VIRTIO_F_RING_RESET,
> +VIRTIO_F_NOTIFICATION_DATA,
>  VHOST_INVALID_FEATURE_BIT
>  };
> 
> diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index
> cca2cd41be..ae48cc1c96 100644
> --- a/hw/virtio/vhost-user-fs.c
> +++ b/hw/virtio/vhost-user-fs.c
> @@ -33,7 +33,7 @@ static const int user_feature_bits[] = {
>  VIRTIO_F_RING_PACKED,
>  VIRTIO_F_IOMMU_PLATFORM,
>  VIRTIO_F_RING_RESET,
> -
> +VIRTIO_F_NOTIFICATION_DATA,
>  VHOST_INVALID_FEATURE_BIT
>  };
> 
> diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index
> 9431b9792c..802b44a07d 100644
> --- a/hw/virtio/vhost-user-vsock.c
> +++ b/hw/virtio/vhost-user-vsock.c
> @@ -21,6 +21,7 @@ static const int user_feature_bits[] = {
>  VIRTIO_RING_F_INDIRECT_DESC,
>  VIRTIO_RING_F_EVENT_IDX,
>  VIRTIO_F_NOTIFY_ON_EMPTY,
> +VIRTIO_F_NOTIFICATION_DATA,
>  VHOST_INVALID_FEATURE_BIT
>  };
> 
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index
> 2a9ddb4552..5583ce5279 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -61,6 +61,7 @@ const int vdpa_feature_bits[] = {
>  VIRTIO_F_RING_PACKED,
>  VIRTIO_F_RING_RESET,
>  VIRTIO_F_VERSION_1,
> +VIRTIO_F_NOTIFICATION_DATA,
>  VIRTIO_NET_F_CSUM,
>  VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
>  VIRTIO_NET_F_CTRL_MAC_ADDR,
> --
> 2.39.3



Re: [PATCH 1/2] target/s390x: Use mutable temporary value for op_ts

2024-03-18 Thread Richard Henderson

On 3/18/24 06:26, Ilya Leoshkevich wrote:

From: Ido Plat 

Otherwise TCG would assume the register that holds t1 would be constant
and reuse whenever it needs the value within it.

Reviewed-by: Ilya Leoshkevich 
[iii: Adjust a newline and capitalization]
Signed-off-by: Ido Plat 
---
  target/s390x/tcg/translate.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 0d0c672c959..3fdddac7684 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -4781,8 +4781,9 @@ static DisasJumpType op_trXX(DisasContext *s, DisasOps *o)
  
  static DisasJumpType op_ts(DisasContext *s, DisasOps *o)

  {
-TCGv_i32 t1 = tcg_constant_i32(0xff);
+TCGv_i32 t1 = tcg_temp_new_i32();
  
+tcg_gen_movi_i32(t1, 0xff);

  tcg_gen_atomic_xchg_i32(t1, o->in2, t1, get_mem_index(s), MO_UB);


Better as

TCGv_i32 ff = tcg_constant_i32(0xff);
TCGv_i32 t1 = tcg_temp_new_i32();

tcg_gen_atomic_xchg_i32(t1, o->in2, ff, get_mem_index(s), MO_UB);

With that,
Reviewed-by: Richard Henderson 


r~



Re: [PATCH 0/3] 64 Bit support for hppa gdbstub

2024-03-18 Thread Sven Schnelle
Richard Henderson  writes:

> On 3/17/24 20:32, Sven Schnelle wrote:
>> Hi Richard,
>> Sven Schnelle  writes:
>> 
>>> Hi List,
>>>
>>> this patchset allows to debug the hppa target when running in wide (64 bit)
>>> mode. gdb needs a small patch to switch to 64 bit mode. I pushed the
>>> patch to 
>>> https://github.com/bminor/binutils-gdb/commit/fd8662ec282d688d1f8100b4365823e57516857b
>>> With this patch gdb will switch to the appropriate mode when connecting
>>> to qemu/gdbstub.
>>>
>>> Sven Schnelle (3):
>>>Revert "target/hppa: Drop attempted gdbstub support for hppa64"
>>>target/hppa: add 64 bit support to gdbstub
>>>target/hppa: mask CR_SAR register writes to 5/6 bit in gdbstub
>>>
>>>   target/hppa/gdbstub.c | 66 +--
>>>   1 file changed, 45 insertions(+), 21 deletions(-)
>> gentle ping - if i followed correctly only one patch was reviewed so
>> far.
>
> We can't really proceed until you get your gdb patch reviewed upstream.
> All that will happen in the meantime is that qemu make-check will fail.

I see. Thanks!



Re: [PATCH 0/3] 64 Bit support for hppa gdbstub

2024-03-18 Thread Richard Henderson

On 3/17/24 20:32, Sven Schnelle wrote:

Hi Richard,

Sven Schnelle  writes:


Hi List,

this patchset allows to debug the hppa target when running in wide (64 bit)
mode. gdb needs a small patch to switch to 64 bit mode. I pushed the
patch to 
https://github.com/bminor/binutils-gdb/commit/fd8662ec282d688d1f8100b4365823e57516857b
With this patch gdb will switch to the appropriate mode when connecting
to qemu/gdbstub.

Sven Schnelle (3):
   Revert "target/hppa: Drop attempted gdbstub support for hppa64"
   target/hppa: add 64 bit support to gdbstub
   target/hppa: mask CR_SAR register writes to 5/6 bit in gdbstub

  target/hppa/gdbstub.c | 66 +--
  1 file changed, 45 insertions(+), 21 deletions(-)


gentle ping - if i followed correctly only one patch was reviewed so far.


We can't really proceed until you get your gdb patch reviewed upstream.
All that will happen in the meantime is that qemu make-check will fail.


r~



Re: [PULL 0/4] machine development tool

2024-03-18 Thread Vladimir Sementsov-Ogievskiy

On 08.03.24 06:47, Peter Xu wrote:

On Thu, Mar 07, 2024 at 12:06:59PM +0300, Maksim Davydov wrote:


On 3/6/24 04:57, Peter Xu wrote:

On Tue, Mar 05, 2024 at 03:43:41PM +0100, Markus Armbruster wrote:

Peter Maydell  writes:


On Mon, 4 Mar 2024 at 13:52, Maksim Davydov  wrote:

The following changes since commit e1007b6bab5cf97705bf4f2aaec1f607787355b8:

Merge tag 'pull-request-2024-03-01' ofhttps://gitlab.com/thuth/qemu  into 
staging (2024-03-01 10:14:32 +)

are available in the Git repository at:

https://gitlab.com/davydov-max/qemu.git  tags/pull-compare-mt-2024-03-04

for you to fetch changes up to 7693a2e8518811a907d73a85807ee71dac8fabcb:

scripts: add script to compare compatibility properties (2024-03-04 
14:10:53 +0300)


Please note. This is the first pull request from me.
My public GPG key is available here
https://keys.openpgp.org/vks/v1/by-fingerprint/CDB5BEEF8837142579F5CDFE8E927E10F72F78D4


scripts: add a new script for machine development



Hi; I would prefer this to go through some existing submaintainer
tree if possible, please.

Migration?  QOM?  Not sure.  Cc'ing the maintainers anyway.

Yeah this seems like migration relevant.. however now I'm slightly confused
on when this script should be useful.

According to:

https://lore.kernel.org/qemu-devel/20240222153912.646053-5-davydov-...@yandex-team.ru/

  This script runs QEMU to obtain compat_props of machines and
  default values of different types of drivers to produce comparison
  table. This table can be used to compare machine types to choose
  the most suitable machine or compare binaries to be sure that
  migration to the newer version will save all device
  properties. Also the json or csv format of this table can be used
  to check does a new machine affect the previous ones by comparing
  tables with and without the new machine.

In regards to "choose the most suitable machine": why do you need to choose
a machine?

If it's pretty standalone setup, shouldn't we always try to use the latest
machine type if possible (as normally compat props are only used to keep
compatible with old machine types, and the default should always be
preferred). If it's a cluster setup, IMHO it should depend on the oldest
QEMU version that plans to be supported.  I don't see how such comparison
helps yet in either of the cases..

In regards to "compare binaries to be sure that migration to the newer
version will save all device properties": do we even support migrating
_between_ machine types??

Sololy relying on compat properties to detect device compatibility is also
not reliable.  For example, see VMStateField.field_exists() or similarly,
VMStateDescription.needed(), which are hooks that device can provide to
dynamically decide what device state to be saved/loaded.  Such things are
not reflected in compat properties, so even if compat properties of all
devices are the same between two machine types, it may not mean that the
migration stream will always be compatible.

Thanks,


In fact, the last commit describes the meaning of this series best. Perhaps
it should have been moved to the cover letter:
Often, many teams have their own "machines" inherited from "upstream" to
manage default values of devices. This is done because of the limitations
imposed by the compatibility requirements or the desire to help their
customers better configure their devices. And since machine type has
a hard-to-read structure, it is very easy to make a mistake when
transferring
default values from one machine to another. For example, when some property
is set for the entire abstract class x86_64-cpu (which will be applied to
all
models), and then rolled back for a specific model. The situation is about
the same with changing the default values of devices: if the value changes
in the description of the device itself, then you need to make sure that
nothing changes when using the current machine.
Therefore, there was a desire to make a dev tool that will help quickly
expand
the definition of a machine or compare several machines from different
binary
files. It can be used when rebasing to a new version of qemu and/or for
automatic tests.


OK, thanks.

So is it a migration compatibility issue that you care (migrating VMs from
your old downstream binary to new downstream binary should always succeed),
or perhaps you care more on making sure the features you wanted, i.e., you
want to make sure some specific devices that you care will have the
properties that you expect?


Actually both things.

1. We need a tool to analyze, what exactly changes between MT-s. Do we want to 
move on new upstream MT or not, how much it is different from our downstream MT 
and so on.
2. It also could be used to check, 

Re: [PATCH V6] target/loongarch: Fix tlb huge page loading issue

2024-03-18 Thread Richard Henderson

On 3/17/24 21:03, Xianglai Li wrote:

When we use qemu tcg simulation, the page size of bios is 4KB.
When using the level 2 super huge page (page size is 1G) to create the page 
table,
it is found that the content of the corresponding address space is abnormal,
resulting in the bios can not start the operating system and graphical 
interface normally.

The lddir and ldpte instruction emulation has
a problem with the use of super huge page processing above level 2.
The page size is not correctly calculated,
resulting in the wrong page size of the table entry found by tlb.

Signed-off-by: Xianglai Li
---
  target/loongarch/cpu-csr.h|   3 +
  target/loongarch/internals.h  |   5 --
  target/loongarch/tcg/tlb_helper.c | 113 +-
  3 files changed, 82 insertions(+), 39 deletions(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v2 1/1] cxl/mem: Fix for the index of Clear Event Record Handle

2024-03-18 Thread fan
On Mon, Mar 18, 2024 at 10:29:28AM +0800, Yuquan Wang wrote:
> The dev_dbg info for Clear Event Records mailbox command would report
> the handle of the next record to clear not the current one.
> 
> This was because the index 'i' had incremented before printing the
> current handle value.
> 
> Signed-off-by: Yuquan Wang 
> ---
>  drivers/cxl/core/mbox.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 9adda4795eb7..b810a6aa3010 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state 
> *mds,
>  
>   payload->handles[i++] = gen->hdr.handle;
>   dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
> - le16_to_cpu(payload->handles[i]));
> + le16_to_cpu(payload->handles[i-1]));

LGTM except for the space issue mentioned by Jonathan.

After the fix,
Reviewed-by: Fan Ni 

Fan
>  
>   if (i == max_handles) {
>   payload->nr_recs = i;
> -- 
> 2.34.1
> 



Re: [PATCH v2] ppc/pnv: I2C controller is not user creatable

2024-03-18 Thread Philippe Mathieu-Daudé

On 18/3/24 16:58, Cédric Le Goater wrote:

The I2C controller is a subunit of the processor. Make it so and avoid
QEMU crashes.

   $ build/qemu-system-ppc64 -S -machine powernv9 -device pnv-i2c
   qemu-system-ppc64: ../hw/ppc/pnv_i2c.c:521: pnv_i2c_realize: Assertion 
`i2c->chip' failed.
   Aborted (core dumped)

Fixes: 263b81ee15af ("ppc/pnv: Add an I2C controller model")
Cc: Glenn Miles 
Reported-by: Thomas Huth 
Reviewed-by: Thomas Huth 
Signed-off-by: Cédric Le Goater 
---
  hw/ppc/pnv_i2c.c | 3 +++
  1 file changed, 3 insertions(+)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH 12/22] plugins: Remove plugin helpers

2024-03-18 Thread Alex Bennée
Richard Henderson  writes:

> These placeholder helpers are no longer required.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



Re: [PATCH v3 0/4] tests/avocado: update sbsa-ref firmware to latest

2024-03-18 Thread Philippe Mathieu-Daudé

On 18/3/24 17:30, Philippe Mathieu-Daudé wrote:

Hi Marcin,

On 18/3/24 15:08, Marcin Juszkiewicz wrote:

Updating sbsa-ref firmware for QEMU CI was manual task. Now it is
replaced by CI job run on CodeLinaro Gitlab instance.

This patchset updates to current state:

- Trusted Firmware v2.10.2 (latest LTS)
- Tianocore EDK2 stable202402 (latest release)

And Tianocore EDK2-platforms commit 085c2fb (edk2-platforms does not
have releases).

Firmware images were built using Debian 'bookworm' cross gcc 12.2.0
compiler.

And while I am in that file I dropped use of 'virtio-rng-pci' device as
sbsa-ref is supposed to emulate physical hardware.

Added 'max' tests with 'pauth=off' and 'pauth-impdef=on' variants.

(01/11) test_sbsaref_edk2_firmware: PASS (2.51 s)
(02/11) test_sbsaref_alpine_linux_cortex_a57: PASS (23.72 s)
(03/11) test_sbsaref_alpine_linux_neoverse_n1: PASS (23.70 s)
(04/11) test_sbsaref_alpine_linux_max_pauth_off: PASS (23.00 s)
(05/11) test_sbsaref_alpine_linux_max_pauth_impdef: PASS (29.03 s)
(06/11) test_sbsaref_alpine_linux_max: PASS (80.69 s)


This one is timeouting for me, should we skip it with
AVOCADO_TIMEOUT_EXPECTED? (See below)


Well AVOCADO_TIMEOUT_EXPECTED is present but apparently ignored :/


(07/11) test_sbsaref_openbsd73_cortex_a57: PASS (16.05 s)
(08/11) test_sbsaref_openbsd73_neoverse_n1: PASS (15.97 s)
(09/11) test_sbsaref_openbsd73_max_pauth_off: PASS (16.22 s)
(10/11) test_sbsaref_openbsd73_max_pauth_impdef: PASS (16.11 s)
(11/11) test_sbsaref_openbsd73_max: PASS (16.08 s)

Signed-off-by: Marcin Juszkiewicz 
---
Changes in v3:
- left OpenBSD at 7.3 (7.4+ is known to not boot)
   https://gitlab.com/qemu-project/qemu/-/issues/2224
   https://marc.info/?l=openbsd-arm=171050428327850=2
- added pauth variants of 'max' to OpenBSD tests
- Link to v2: 
https://lore.kernel.org/r/20240314-sbsa-ref-firmware-update-v2-0-b557c5655...@linaro.org


Changes in v2:
- disabled 'max' tests on OpenBSD
- moved tags to 'one tag per line'
- added 'os:linux' tags to Alpine ones
- Link to v1: 
https://lore.kernel.org/r/20240313-sbsa-ref-firmware-update-v1-0-e166703c5...@linaro.org


---
Marcin Juszkiewicz (4):
   tests/avocado: update sbsa-ref firmware
   tests/avocado: drop virtio-rng from sbsa-ref tests
   tests/avocado: sbsa-ref: add Alpine tests for misc 'max' setup
   tests/avocado: sbsa-ref: add OpenBSD tests for misc 'max' setup


$ make check-avocado AVOCADO_TAGS='machine:sbsa-ref'
ninja: no work to do.
JOB ID : 76d5dc90c6f70f0801c5269ff1c1db6c0d2cb27b
JOB LOG    : 
build/system_arm/tests/results/job-2024-03-18T15.54-76d5dc9/job.log
  (1/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_edk2_firmware: PASS (4.96 s)
  (2/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_alpine_linux_cortex_a57: PASS (52.67 s)
  (3/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_alpine_linux_neoverse_n1: PASS (51.01 s)
  (4/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_alpine_linux_max: INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout reached\nOriginal status: ERROR\n{'name': '4-tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_alpine_linux_max', 'logdir': 'build/system_... (180.50 s)
  (5/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_openbsd73_cortex_a57: PASS (21.15 s)
  (6/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_openbsd73_neoverse_n1: PASS (20.88 s)
  (7/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_openbsd73_max: PASS (20.70 s)
RESULTS    : PASS 6 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 1 | 
CANCEL 0

JOB TIME   : 355.21 s
make: *** [tests/Makefile.include:139: check-avocado] Error 8

Looking at debug.log:

15:56:17 DEBUG| Opening console file
15:56:17 DEBUG| Opening console socket
15:56:17 DEBUG| NOTICE:  Booting Trusted Firmware
15:56:17 DEBUG| NOTICE:  BL1: v2.9(release):v2.9.0-764-g7c3ff62d2
15:56:17 DEBUG| NOTICE:  BL1: Built : 12:32:06, Sep 27 2023
15:56:17 DEBUG| NOTICE:  BL1: Booting BL2
15:56:17 DEBUG| NOTICE:  BL2: v2.9(release):v2.9.0-764-g7c3ff62d2
15:56:17 DEBUG| NOTICE:  BL2: Built : 12:32:06, Sep 27 2023
15:56:17 DEBUG| NOTICE:  BL1: Booting BL31
15:56:17 DEBUG| NOTICE:  BL31: v2.9(release):v2.9.0-764-g7c3ff62d2
15:56:17 DEBUG| NOTICE:  BL31: Built : 12:32:06, Sep 27 2023
15:56:17 DEBUG| UEFI firmware (version 1.0 built at 15:45:23 on Sep 20 
2023)

...
15:57:48 DEBUG| * Mounting security filesystem ... [ ok ]
15:57:49 DEBUG| * Mounting debug filesystem ... [ ok ]
15:57:49 DEBUG| * Mounting persistent storage (pstore) filesystem ... [ 
ok ]

15:57:50 DEBUG| * Mounting efivarfs filesystem ... [ ok ]
15:57:51 DEBUG| * Starting busybox mdev ... [ ok ]
15:58:05 DEBUG| * Scanning hardware for mdev ... [ ok ]
15:59:05 DEBUG| * Loading hardware drivers ... [ ok ]
15:59:05 DEBUG| * WARNING: 

Re: [PATCH v4 14/25] memory: Add Error** argument to the global_dirty_log routines

2024-03-18 Thread Peter Xu
On Mon, Mar 18, 2024 at 05:08:13PM +0100, Cédric Le Goater wrote:
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -2836,18 +2836,31 @@ static void 
> > > migration_bitmap_clear_discarded_pages(RAMState *rs)
> > >   static void ram_init_bitmaps(RAMState *rs)
> > >   {
> > > +Error *local_err = NULL;
> > > +bool ret = true;
> > > +
> > >   qemu_mutex_lock_ramlist();
> > >   WITH_RCU_READ_LOCK_GUARD() {
> > >   ram_list_init_bitmaps();
> 
> btw, should we use bitmap_try_new() to create the bitmaps instead of
> bitmap_new() which can abort() ?

I'm not sure how much it'll help in reality; if allocation can fail here I
would expect qemu crash sooner or later.. but I agree the try_new() seems
reasonable too to be used here if this can fail now, after all migration is
extra feature on top of VM's emulation functions, so it's optional.

Thanks,

-- 
Peter Xu




Re: [PATCH v3 3/4] tests/avocado: sbsa-ref: add Alpine tests for misc 'max' setup

2024-03-18 Thread Philippe Mathieu-Daudé

On 18/3/24 15:08, Marcin Juszkiewicz wrote:

PAuth makes run timeout on CI so add tests using 'max' without it
and with impdef one.

Signed-off-by: Marcin Juszkiewicz 
---
  tests/avocado/machine_aarch64_sbsaref.py | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/tests/avocado/machine_aarch64_sbsaref.py 
b/tests/avocado/machine_aarch64_sbsaref.py
index 259225f15f..cf8954d02e 100644
--- a/tests/avocado/machine_aarch64_sbsaref.py
+++ b/tests/avocado/machine_aarch64_sbsaref.py
@@ -140,18 +140,36 @@ def boot_alpine_linux(self, cpu):
  def test_sbsaref_alpine_linux_cortex_a57(self):
  """
  :avocado: tags=cpu:cortex-a57
+:avocado: tags=os:linux
  """
  self.boot_alpine_linux("cortex-a57")
  
  def test_sbsaref_alpine_linux_neoverse_n1(self):

  """
  :avocado: tags=cpu:neoverse-n1
+:avocado: tags=os:linux
  """
  self.boot_alpine_linux("neoverse-n1")
  
+def test_sbsaref_alpine_linux_max_pauth_off(self):

+"""
+:avocado: tags=cpu:max
+:avocado: tags=os:linux
+"""
+self.boot_alpine_linux("max,pauth=off")
+
+def test_sbsaref_alpine_linux_max_pauth_impdef(self):
+"""
+:avocado: tags=cpu:max
+:avocado: tags=os:linux
+"""
+self.boot_alpine_linux("max,pauth-impdef=on")
+
+@skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')


Odd, this tag doesn't seem processed in my test. Anyhow,

Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Philippe Mathieu-Daudé 


  def test_sbsaref_alpine_linux_max(self):
  """
  :avocado: tags=cpu:max
+:avocado: tags=os:linux
  """
  self.boot_alpine_linux("max")
  






Re: [PATCH v3 4/4] tests/avocado: sbsa-ref: add OpenBSD tests for misc 'max' setup

2024-03-18 Thread Philippe Mathieu-Daudé

On 18/3/24 15:08, Marcin Juszkiewicz wrote:

PAuth makes run timeout on CI so add tests using 'max' without
it and with impdef one.

Signed-off-by: Marcin Juszkiewicz 
---
  tests/avocado/machine_aarch64_sbsaref.py | 20 +++-
  1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tests/avocado/machine_aarch64_sbsaref.py 
b/tests/avocado/machine_aarch64_sbsaref.py
index cf8954d02e..98c76c1ff7 100644
--- a/tests/avocado/machine_aarch64_sbsaref.py
+++ b/tests/avocado/machine_aarch64_sbsaref.py
@@ -203,18 +203,36 @@ def boot_openbsd73(self, cpu):
  def test_sbsaref_openbsd73_cortex_a57(self):
  """
  :avocado: tags=cpu:cortex-a57
+:avocado: tags=os:openbsd
  """
  self.boot_openbsd73("cortex-a57")
  
  def test_sbsaref_openbsd73_neoverse_n1(self):

  """
  :avocado: tags=cpu:neoverse-n1
+:avocado: tags=os:openbsd
  """
  self.boot_openbsd73("neoverse-n1")
  
+def test_sbsaref_openbsd73_max_pauth_off(self):

+"""
+:avocado: tags=cpu:max
+:avocado: tags=os:openbsd
+"""
+self.boot_openbsd73("max,pauth=off")
+
+@skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')


This one worked well,


+def test_sbsaref_openbsd73_max_pauth_impdef(self):
+"""
+:avocado: tags=cpu:max
+:avocado: tags=os:openbsd
+"""
+self.boot_openbsd73("max,pauth-impdef=on")
+
+@skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')


and also this one. Do we still expect timeout?


  def test_sbsaref_openbsd73_max(self):
  """
  :avocado: tags=cpu:max
+:avocado: tags=os:openbsd
  """
  self.boot_openbsd73("max")
-



Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH 1/2] target/s390x: Use mutable temporary value for op_ts

2024-03-18 Thread Thomas Huth

On 18/03/2024 17.26, Ilya Leoshkevich wrote:

From: Ido Plat 

Otherwise TCG would assume the register that holds t1 would be constant
and reuse whenever it needs the value within it.

Reviewed-by: Ilya Leoshkevich 
[iii: Adjust a newline and capitalization]
Signed-off-by: Ido Plat 


 Hi Ilya,

please add your Signed-off-by as well when the patch is sent out by you.

 Thanks,
  Thomas





Re: [PATCH v3 1/4] tests/avocado: update sbsa-ref firmware

2024-03-18 Thread Philippe Mathieu-Daudé

On 18/3/24 15:08, Marcin Juszkiewicz wrote:

We now have CI job to build those and publish in space with
readable urls.

Firmware is built using Debian 'bookworm' cross toolchain (gcc 12.2.0).

Used versions:

- Trusted Firmware v2.10.2
- Tianocore EDK2 stable202402
- Tianocore EDK2 Platforms code commit 085c2fb

Signed-off-by: Marcin Juszkiewicz 
---
  tests/avocado/machine_aarch64_sbsaref.py | 40 +---
  1 file changed, 21 insertions(+), 19 deletions(-)


Tested-by: Philippe Mathieu-Daudé 




Re: [PATCH v3 0/4] tests/avocado: update sbsa-ref firmware to latest

2024-03-18 Thread Philippe Mathieu-Daudé

Hi Marcin,

On 18/3/24 15:08, Marcin Juszkiewicz wrote:

Updating sbsa-ref firmware for QEMU CI was manual task. Now it is
replaced by CI job run on CodeLinaro Gitlab instance.

This patchset updates to current state:

- Trusted Firmware v2.10.2 (latest LTS)
- Tianocore EDK2 stable202402 (latest release)

And Tianocore EDK2-platforms commit 085c2fb (edk2-platforms does not
have releases).

Firmware images were built using Debian 'bookworm' cross gcc 12.2.0
compiler.

And while I am in that file I dropped use of 'virtio-rng-pci' device as
sbsa-ref is supposed to emulate physical hardware.

Added 'max' tests with 'pauth=off' and 'pauth-impdef=on' variants.

(01/11) test_sbsaref_edk2_firmware: PASS (2.51 s)
(02/11) test_sbsaref_alpine_linux_cortex_a57: PASS (23.72 s)
(03/11) test_sbsaref_alpine_linux_neoverse_n1: PASS (23.70 s)
(04/11) test_sbsaref_alpine_linux_max_pauth_off: PASS (23.00 s)
(05/11) test_sbsaref_alpine_linux_max_pauth_impdef: PASS (29.03 s)
(06/11) test_sbsaref_alpine_linux_max: PASS (80.69 s)


This one is timeouting for me, should we skip it with
AVOCADO_TIMEOUT_EXPECTED? (See below)


(07/11) test_sbsaref_openbsd73_cortex_a57: PASS (16.05 s)
(08/11) test_sbsaref_openbsd73_neoverse_n1: PASS (15.97 s)
(09/11) test_sbsaref_openbsd73_max_pauth_off: PASS (16.22 s)
(10/11) test_sbsaref_openbsd73_max_pauth_impdef: PASS (16.11 s)
(11/11) test_sbsaref_openbsd73_max: PASS (16.08 s)

Signed-off-by: Marcin Juszkiewicz 
---
Changes in v3:
- left OpenBSD at 7.3 (7.4+ is known to not boot)
   https://gitlab.com/qemu-project/qemu/-/issues/2224
   https://marc.info/?l=openbsd-arm=171050428327850=2
- added pauth variants of 'max' to OpenBSD tests
- Link to v2: 
https://lore.kernel.org/r/20240314-sbsa-ref-firmware-update-v2-0-b557c5655...@linaro.org

Changes in v2:
- disabled 'max' tests on OpenBSD
- moved tags to 'one tag per line'
- added 'os:linux' tags to Alpine ones
- Link to v1: 
https://lore.kernel.org/r/20240313-sbsa-ref-firmware-update-v1-0-e166703c5...@linaro.org

---
Marcin Juszkiewicz (4):
   tests/avocado: update sbsa-ref firmware
   tests/avocado: drop virtio-rng from sbsa-ref tests
   tests/avocado: sbsa-ref: add Alpine tests for misc 'max' setup
   tests/avocado: sbsa-ref: add OpenBSD tests for misc 'max' setup


$ make check-avocado AVOCADO_TAGS='machine:sbsa-ref'
ninja: no work to do.
JOB ID : 76d5dc90c6f70f0801c5269ff1c1db6c0d2cb27b
JOB LOG: 
build/system_arm/tests/results/job-2024-03-18T15.54-76d5dc9/job.log
 (1/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_edk2_firmware: 
PASS (4.96 s)
 (2/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_alpine_linux_cortex_a57: 
PASS (52.67 s)
 (3/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_alpine_linux_neoverse_n1: 
PASS (51.01 s)
 (4/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_alpine_linux_max: 
INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout 
reached\nOriginal status: ERROR\n{'name': 
'4-tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_alpine_linux_max', 
'logdir': 'build/system_... (180.50 s)
 (5/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_openbsd73_cortex_a57: 
PASS (21.15 s)
 (6/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_openbsd73_neoverse_n1: 
PASS (20.88 s)
 (7/7) 
tests/avocado/machine_aarch64_sbsaref.py:Aarch64SbsarefMachine.test_sbsaref_openbsd73_max: 
PASS (20.70 s)
RESULTS: PASS 6 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 1 | 
CANCEL 0

JOB TIME   : 355.21 s
make: *** [tests/Makefile.include:139: check-avocado] Error 8

Looking at debug.log:

15:56:17 DEBUG| Opening console file
15:56:17 DEBUG| Opening console socket
15:56:17 DEBUG| NOTICE:  Booting Trusted Firmware
15:56:17 DEBUG| NOTICE:  BL1: v2.9(release):v2.9.0-764-g7c3ff62d2
15:56:17 DEBUG| NOTICE:  BL1: Built : 12:32:06, Sep 27 2023
15:56:17 DEBUG| NOTICE:  BL1: Booting BL2
15:56:17 DEBUG| NOTICE:  BL2: v2.9(release):v2.9.0-764-g7c3ff62d2
15:56:17 DEBUG| NOTICE:  BL2: Built : 12:32:06, Sep 27 2023
15:56:17 DEBUG| NOTICE:  BL1: Booting BL31
15:56:17 DEBUG| NOTICE:  BL31: v2.9(release):v2.9.0-764-g7c3ff62d2
15:56:17 DEBUG| NOTICE:  BL31: Built : 12:32:06, Sep 27 2023
15:56:17 DEBUG| UEFI firmware (version 1.0 built at 15:45:23 on Sep 20 2023)
...
15:57:48 DEBUG| * Mounting security filesystem ... [ ok ]
15:57:49 DEBUG| * Mounting debug filesystem ... [ ok ]
15:57:49 DEBUG| * Mounting persistent storage (pstore) filesystem ... [ ok ]
15:57:50 DEBUG| * Mounting efivarfs filesystem ... [ ok ]
15:57:51 DEBUG| * Starting busybox mdev ... [ ok ]
15:58:05 DEBUG| * Scanning hardware for mdev ... [ ok ]
15:59:05 DEBUG| * Loading hardware drivers ... [ ok ]
15:59:05 DEBUG| * WARNING: clock skew detected!
15:59:07 DEBUG| * Setting the local clock based on last shutdown time 
... [ ok ]

15:59:11 DEBUG| * 

Re: [PATCH v4 13/25] memory: Add Error** argument to .log_global_start() handler

2024-03-18 Thread Peter Xu
On Mon, Mar 18, 2024 at 03:54:28PM +0100, Cédric Le Goater wrote:
> On 3/15/24 12:18, Peter Xu wrote:
> > > @@ -3009,13 +3045,16 @@ static void 
> > > listener_add_address_space(MemoryListener *listener,
> > >   {
> > >   FlatView *view;
> > >   FlatRange *fr;
> > > +Error *local_err = NULL;
> > >   if (listener->begin) {
> > >   listener->begin(listener);
> > >   }
> > >   if (global_dirty_tracking) {
> > >   if (listener->log_global_start) {
> > > -listener->log_global_start(listener);
> > > +if (!listener->log_global_start(listener, _err)) {
> > > +error_report_err(local_err);
> > > +}
> > IMHO we should assert here instead of error report.  We have this to guard
> > hot-plug during migration so I think the assert is justified:
> > 
> > qdev_device_add_from_qdict():
> > 
> >  if (!migration_is_idle()) {
> >  error_setg(errp, "device_add not allowed while migrating");
> >  return NULL;
> >  }
> > 
> > If it really happens it's a bug, as listener_add_address_space() will still
> > keep the rest things around even if the hook failed.  It'll start to be a
> > total mess..
> 
> It seems that adding a region listener while logging is active has been
> supported from the beginning, commit 7664e80c8470 ("memory: add API for
> observing  updates to the physical memory map"). Can it happen ? if not
> we could simply remove the  log_global_start() call.

IMHO we'd better keep it for the sake of logic completeness, even though I
don't know when it'll be useful..

I think it's safe to assert because log_global_start() should only be
triggered by either vhost/vfio with current code base when reaching here.
It doesn't mean that in the future all log_global_start() hooks are based
on a device object. E.g., there's the other Xen user, it just won't trigger
either, afaict.  So the assert should be safe.

In the future maybe we could allow other things to trigger here besides
device, but obviously we're not ready for failing it.  Instead of adding
the failure handling which will never be used for now, IIUC it's simpler we
just provide an assert until someone add a real user of such.

Thanks,

-- 
Peter Xu




[PATCH 2/2] tests/tcg/s390x: Test TEST AND SET

2024-03-18 Thread Ilya Leoshkevich
Add a small test to prevent regressions.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/ts.c| 35 +
 2 files changed, 36 insertions(+)
 create mode 100644 tests/tcg/s390x/ts.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index e2aba2ec274..a8f86c94498 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -47,6 +47,7 @@ TESTS+=add-logical-with-carry
 TESTS+=lae
 TESTS+=cvd
 TESTS+=cvb
+TESTS+=ts
 
 cdsg: CFLAGS+=-pthread
 cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/ts.c b/tests/tcg/s390x/ts.c
new file mode 100644
index 000..441faf30d98
--- /dev/null
+++ b/tests/tcg/s390x/ts.c
@@ -0,0 +1,35 @@
+/*
+ * Test the TEST AND SET instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+static int ts(char *p)
+{
+int cc;
+
+asm("ts %[p]\n"
+"ipm %[cc]"
+: [cc] "=r" (cc)
+, [p] "+Q" (*p)
+: : "cc");
+
+return (cc >> 28) & 3;
+}
+
+int main(void)
+{
+char c;
+
+c = 0x80;
+assert(ts() == 1);
+assert(c == 0xff);
+
+c = 0x7f;
+assert(ts() == 0);
+assert(c == 0xff);
+
+return EXIT_SUCCESS;
+}
-- 
2.44.0




[PATCH 1/2] target/s390x: Use mutable temporary value for op_ts

2024-03-18 Thread Ilya Leoshkevich
From: Ido Plat 

Otherwise TCG would assume the register that holds t1 would be constant
and reuse whenever it needs the value within it.

Reviewed-by: Ilya Leoshkevich 
[iii: Adjust a newline and capitalization]
Signed-off-by: Ido Plat 
---
 target/s390x/tcg/translate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 0d0c672c959..3fdddac7684 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -4781,8 +4781,9 @@ static DisasJumpType op_trXX(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_ts(DisasContext *s, DisasOps *o)
 {
-TCGv_i32 t1 = tcg_constant_i32(0xff);
+TCGv_i32 t1 = tcg_temp_new_i32();
 
+tcg_gen_movi_i32(t1, 0xff);
 tcg_gen_atomic_xchg_i32(t1, o->in2, t1, get_mem_index(s), MO_UB);
 tcg_gen_extract_i32(cc_op, t1, 7, 1);
 set_cc_static(s);
-- 
2.44.0




Re: [PATCH 9/9] docs/system: Add documentation on support for IGVM

2024-03-18 Thread Daniel P . Berrangé
On Mon, Mar 18, 2024 at 03:59:31PM +, Roy Hopkins wrote:
> On Fri, 2024-03-01 at 17:10 +, Daniel P. Berrangé wrote:
> > On Tue, Feb 27, 2024 at 02:50:15PM +, Roy Hopkins wrote:
> > > IGVM support has been implemented for Confidential Guests that support
> > > AMD SEV and AMD SEV-ES. Add some documentation that gives some
> > > background on the IGVM format and how to use it to configure a
> > > confidential guest.
> > > 
> > > Signed-off-by: Roy Hopkins 
> > > ---
> > >  docs/system/igvm.rst  | 58 +++
> > >  docs/system/index.rst |  1 +
> > >  2 files changed, 59 insertions(+)
> > >  create mode 100644 docs/system/igvm.rst
> > 
> > 
> > > +Firmware Images with IGVM
> > > +-
> > > +
> > > +When an IGVM filename is specified for a Confidential Guest Support 
> > > object
> > > it
> > > +overrides the default handling of system firmware: the firmware image, 
> > > such
> > > as
> > > +an OVMF binary should be contained as a payload of the IGVM file and not
> > > +provided as a flash drive. The default QEMU firmware is not automatically
> > > mapped
> > > +into guest memory.
> > 
> > IIUC, in future the IGVM file could contain both the OVMF and SVSM
> > binaries ?
> > 
> > I'm also wondering if there can be dependancies between the IGVM
> > file and the broader QEMU configuration ?  eg if SVSM gains suupport
> > for data persistence, potentially we might need some pflash device
> > exposed as storage for SVSM to use. Would such a dependancy be
> > something expressed in the IGVM file, or would it be knowledge that
> > is out of band ?
> > 
> Yes, the IGVM file can indeed contain both OVMF and SVSM binaries. In fact, 
> that
> is exactly what we are doing with the COCONUT-SVSM project. See [1] for the 
> IGVM
> builder we use to package OVMF, bootloader components and the SVSM ELF binary.
> 
> Data persistence is something that is definitely going to be needed in the 
> SVSM.
> At present, this cannot be configured using any of the directives in the IGVM
> specification but instead requires QEMU to be configured correctly to support
> the application embedded within the IGVM file itself. You could however 
> populate
> metadata pages using IGVM that describe the storage that is _expected_ to be
> present, and validate that within the firmware itself. 
> 
> The real value from IGVM comes from the ability to describe the initial memory
> and initial CPU state which all forms part of the launch measurement and 
> initial
> boot procedure, allowing the expected launch measurement to be calculated 
> from a
> single IGVM file for multiple virtualisation stacks or configurations. Thus,
> most of the directives in the IGVM file directly have an effect on the launch
> measurement. I'm not sure configuring a storage device or other hardware
> configuration fits well with this.

Yeah, I can understand if IGVM scope should be limited to just memory
and CPU setup.

If we use the firmeware descriptor files, we could define capabilities
in that to express a need for a particular type of persistent storage
to back the vTPM. So having this info in IGVM files isn't critical.

> > Finally, if we think of the IGVM file as simply yet another firmware
> > file format, then it raises of question of integration into the
> > QEMU firmware descriptors.
> > 
> > Right now when defining a guest in libvirt if you can say 'type=bios'
> > or 'type=uefi', and libvirt consults the firmware descriptors to find
> > the binary to use.
> > 
> > If the OS distro provides IGVM files instead of traditional raw OVMF
> > binaries for SEV/TDX/etc, then from libvirt's POV I think having this
> > expressed in the firmware descriptors is highly desirable.
> > 
> 
> Whether IGVM is just another firmware file format or not, it certainly is used
> mutually exclusively with other firmware files. Integration with firmware
> descriptors does seem to make sense. 
> 
> One further question if this is the case, would we want to switch from
> specifying an "igvm-file" as a parameter on the "ConfidentialGuestSupport"
> object to providing the file using the "-bios" parameter, or maybe even a
> dedicated "-igvm" parameter?

If the IGVM format is flexible enough that it could be used for any VM
type, even non-confidential VMs, then having its config be separate from
ConfidentialGuestSUpport would make sense. If it is fundamentally tied
to CVMs, then just a property is fine I guess.

Probably best to stay away from -bios, to avoid overloading new semantics
onto a long standing argument.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PULL 21/24] tests: acpi/smbios: whitelist expected blobs

2024-03-18 Thread Michael S. Tsirkin
From: Igor Mammedov 

Signed-off-by: Igor Mammedov 
Acked-by: Ani Sinha 
Message-Id: <20240314152302.2324164-19-imamm...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..81148a604f 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/SSDT.dimmpxm",
-- 
MST




[PULL 19/24] smbios: in case of entry point is 'auto' try to build v2 tables 1st

2024-03-18 Thread Michael S. Tsirkin
From: Igor Mammedov 

QEMU for some time now uses SMBIOS 3.0 for PC/Q35 machines by
default, however Windows has a bug in locating SMBIOS 3.0
entrypoint and fails to find tables when booted on SeaBIOS
(on UEFI SMBIOS 3.0 tables work fine since firmware hands
over tables in another way)

Missing SMBIOS tables may lead to some issues for guest
though (worst are: possible reactiveation, inability to
get virtio drivers from 'Windows Update')

It's unclear  at this point if MS will fix the issue on their
side. So instead of it (or rather in addition) this patch
will try to workaround the issue.

aka, use smbios-entry-point-type=auto to make QEMU try
generating conservative SMBIOS 2.0 tables and if that
fails (due to limits/requested configuration) fallback
to SMBIOS 3.0 tables.

With this in place majority of users will use SMBIOS 2.0
tables which work fine with (Windows + legacy BIOS).
The configurations that is not to possible to describe
with SMBIOS 2.0 will switch automatically to SMBIOS 3.0
(which will trigger Windows bug but there is nothing
QEMU can do here, so go and aks Microsoft to real fix).

Signed-off-by: Igor Mammedov 
Reviewed-by: Ani Sinha 
Tested-by: Fiona Ebner 
Message-Id: <20240314152302.2324164-17-imamm...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/smbios/smbios.c | 52 +++---
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 48713aa505..b0467347c5 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -1082,7 +1082,7 @@ static void smbios_entry_point_setup(SmbiosEntryPointType 
ep_type)
 }
 }
 
-void smbios_get_tables(MachineState *ms,
+static bool smbios_get_tables_ep(MachineState *ms,
SmbiosEntryPointType ep_type,
const struct smbios_phys_mem_area *mem_array,
const unsigned int mem_array_size,
@@ -1091,6 +1091,7 @@ void smbios_get_tables(MachineState *ms,
Error **errp)
 {
 unsigned i, dimm_cnt, offset;
+ERRP_GUARD();
 
 assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 ||
ep_type == SMBIOS_ENTRY_POINT_TYPE_64);
@@ -1177,11 +1178,56 @@ void smbios_get_tables(MachineState *ms,
 abort();
 }
 
-return;
+return true;
 err_exit:
 g_free(smbios_tables);
 smbios_tables = NULL;
-return;
+return false;
+}
+
+void smbios_get_tables(MachineState *ms,
+   SmbiosEntryPointType ep_type,
+   const struct smbios_phys_mem_area *mem_array,
+   const unsigned int mem_array_size,
+   uint8_t **tables, size_t *tables_len,
+   uint8_t **anchor, size_t *anchor_len,
+   Error **errp)
+{
+Error *local_err = NULL;
+bool is_valid;
+ERRP_GUARD();
+
+switch (ep_type) {
+case SMBIOS_ENTRY_POINT_TYPE_AUTO:
+case SMBIOS_ENTRY_POINT_TYPE_32:
+is_valid = smbios_get_tables_ep(ms, SMBIOS_ENTRY_POINT_TYPE_32,
+mem_array, mem_array_size,
+tables, tables_len,
+anchor, anchor_len,
+_err);
+if (is_valid || ep_type != SMBIOS_ENTRY_POINT_TYPE_AUTO) {
+break;
+}
+/*
+ * fall through in case AUTO endpoint is selected and
+ * SMBIOS 2.x tables can't be generated, to try if SMBIOS 3.x
+ * tables would work
+ */
+case SMBIOS_ENTRY_POINT_TYPE_64:
+error_free(local_err);
+local_err = NULL;
+is_valid = smbios_get_tables_ep(ms, SMBIOS_ENTRY_POINT_TYPE_64,
+mem_array, mem_array_size,
+tables, tables_len,
+anchor, anchor_len,
+_err);
+break;
+default:
+abort();
+}
+if (!is_valid) {
+error_propagate(errp, local_err);
+}
 }
 
 static void save_opt(const char **dest, QemuOpts *opts, const char *name)
-- 
MST




  1   2   3   4   >