date:20201202

[PATCH v2 4/4] block/iscsi: Use lock guard macros

2020-12-02 Thread Gan Qixin

Replace manual lock()/unlock() calls with lock guard macros
(QEMU_LOCK_GUARD/WITH_QEMU_LOCK_GUARD) in block/iscsi.

Signed-off-by: Gan Qixin 
---
Cc: Kevin Wolf 
Cc: Paolo Bonzini 
Cc: Markus Armbruster 
---
 block/iscsi.c | 50 --
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index e30a7e3606..7d4b3b56d5 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -322,25 +322,23 @@ iscsi_aio_cancel(BlockAIOCB *blockacb)
 IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
 IscsiLun *iscsilun = acb->iscsilun;
 
-qemu_mutex_lock(>mutex);
+WITH_QEMU_LOCK_GUARD(>mutex) {
 
-/* If it was cancelled or completed already, our work is done here */
-if (acb->cancelled || acb->status != -EINPROGRESS) {
-qemu_mutex_unlock(>mutex);
-return;
-}
+/* If it was cancelled or completed already, our work is done here */
+if (acb->cancelled || acb->status != -EINPROGRESS) {
+return;
+}
 
-acb->cancelled = true;
+acb->cancelled = true;
 
-qemu_aio_ref(acb); /* released in iscsi_abort_task_cb() */
+qemu_aio_ref(acb); /* released in iscsi_abort_task_cb() */
 
-/* send a task mgmt call to the target to cancel the task on the target */
-if (iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
- iscsi_abort_task_cb, acb) < 0) {
-qemu_aio_unref(acb); /* since iscsi_abort_task_cb() won't be called */
+/* send a task mgmt call to the target to cancel the task on the 
target */
+if (iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
+ iscsi_abort_task_cb, acb) < 0) {
+qemu_aio_unref(acb); /* since iscsi_abort_task_cb() won't be 
called */
+}
 }
-
-qemu_mutex_unlock(>mutex);
 }
 
 static const AIOCBInfo iscsi_aiocb_info = {
@@ -375,22 +373,22 @@ static void iscsi_timed_check_events(void *opaque)
 {
 IscsiLun *iscsilun = opaque;
 
-qemu_mutex_lock(>mutex);
+WITH_QEMU_LOCK_GUARD(>mutex) {
+/* check for timed out requests */
+iscsi_service(iscsilun->iscsi, 0);
 
-/* check for timed out requests */
-iscsi_service(iscsilun->iscsi, 0);
+if (iscsilun->request_timed_out) {
+iscsilun->request_timed_out = false;
+iscsi_reconnect(iscsilun->iscsi);
+}
 
-if (iscsilun->request_timed_out) {
-iscsilun->request_timed_out = false;
-iscsi_reconnect(iscsilun->iscsi);
+/*
+ * newer versions of libiscsi may return zero events. Ensure we are
+ * able to return to service once this situation changes.
+ */
+iscsi_set_events(iscsilun);
 }
 
-/* newer versions of libiscsi may return zero events. Ensure we are able
- * to return to service once this situation changes. */
-iscsi_set_events(iscsilun);
-
-qemu_mutex_unlock(>mutex);
-
 timer_mod(iscsilun->event_timer,
   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
 }
-- 
2.27.0

[PATCH v2 1/4] block/accounting: Use lock guard macros

2020-12-02 Thread Gan Qixin

Replace manual lock()/unlock() calls with lock guard macros
(QEMU_LOCK_GUARD/WITH_QEMU_LOCK_GUARD) in block/accounting.

Signed-off-by: Gan Qixin 
Reviewed-by: Paolo Bonzini 
---
Cc: Kevin Wolf 
Cc: Markus Armbruster 
---
 block/accounting.c | 32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/block/accounting.c b/block/accounting.c
index 8d41c8a83a..2030851d79 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -199,29 +199,27 @@ static void block_account_one_io(BlockAcctStats *stats, 
BlockAcctCookie *cookie,
 return;
 }
 
-qemu_mutex_lock(>lock);
-
-if (failed) {
-stats->failed_ops[cookie->type]++;
-} else {
-stats->nr_bytes[cookie->type] += cookie->bytes;
-stats->nr_ops[cookie->type]++;
-}
+WITH_QEMU_LOCK_GUARD(>lock) {
+if (failed) {
+stats->failed_ops[cookie->type]++;
+} else {
+stats->nr_bytes[cookie->type] += cookie->bytes;
+stats->nr_ops[cookie->type]++;
+}
 
-block_latency_histogram_account(>latency_histogram[cookie->type],
-latency_ns);
+
block_latency_histogram_account(>latency_histogram[cookie->type],
+latency_ns);
 
-if (!failed || stats->account_failed) {
-stats->total_time_ns[cookie->type] += latency_ns;
-stats->last_access_time_ns = time_ns;
+if (!failed || stats->account_failed) {
+stats->total_time_ns[cookie->type] += latency_ns;
+stats->last_access_time_ns = time_ns;
 
-QSLIST_FOREACH(s, >intervals, entries) {
-timed_average_account(>latency[cookie->type], latency_ns);
+QSLIST_FOREACH(s, >intervals, entries) {
+timed_average_account(>latency[cookie->type], latency_ns);
+}
 }
 }
 
-qemu_mutex_unlock(>lock);
-
 cookie->type = BLOCK_ACCT_NONE;
 }
 
-- 
2.27.0

[PATCH v2 3/4] block/throttle-groups: Use lock guard macros

2020-12-02 Thread Gan Qixin

Replace manual lock()/unlock() calls with lock guard macros
(QEMU_LOCK_GUARD/WITH_QEMU_LOCK_GUARD) in block/throttle-groups.

Signed-off-by: Gan Qixin 
---
Cc: Kevin Wolf 
Cc: Paolo Bonzini 
Cc: Markus Armbruster 
---
 block/throttle-groups.c | 48 -
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index e2f2813c0f..abd16ed9db 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -546,7 +546,7 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
 tgm->aio_context = ctx;
 qatomic_set(>restart_pending, 0);
 
-qemu_mutex_lock(>lock);
+QEMU_LOCK_GUARD(>lock);
 /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */
 for (i = 0; i < 2; i++) {
 if (!tg->tokens[i]) {
@@ -565,8 +565,6 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
 qemu_co_mutex_init(>throttled_reqs_lock);
 qemu_co_queue_init(>throttled_reqs[0]);
 qemu_co_queue_init(>throttled_reqs[1]);
-
-qemu_mutex_unlock(>lock);
 }
 
 /* Unregister a ThrottleGroupMember from its group, removing it from the list,
@@ -594,25 +592,25 @@ void throttle_group_unregister_tgm(ThrottleGroupMember 
*tgm)
 /* Wait for throttle_group_restart_queue_entry() coroutines to finish */
 AIO_WAIT_WHILE(tgm->aio_context, qatomic_read(>restart_pending) > 0);
 
-qemu_mutex_lock(>lock);
-for (i = 0; i < 2; i++) {
-assert(tgm->pending_reqs[i] == 0);
-assert(qemu_co_queue_empty(>throttled_reqs[i]));
-assert(!timer_pending(tgm->throttle_timers.timers[i]));
-if (tg->tokens[i] == tgm) {
-token = throttle_group_next_tgm(tgm);
-/* Take care of the case where this is the last tgm in the group */
-if (token == tgm) {
-token = NULL;
+WITH_QEMU_LOCK_GUARD(>lock) {
+for (i = 0; i < 2; i++) {
+assert(tgm->pending_reqs[i] == 0);
+assert(qemu_co_queue_empty(>throttled_reqs[i]));
+assert(!timer_pending(tgm->throttle_timers.timers[i]));
+if (tg->tokens[i] == tgm) {
+token = throttle_group_next_tgm(tgm);
+/* Take care of the case where this is the last tgm in the 
group */
+if (token == tgm) {
+token = NULL;
+}
+tg->tokens[i] = token;
 }
-tg->tokens[i] = token;
 }
-}
 
-/* remove the current tgm from the list */
-QLIST_REMOVE(tgm, round_robin);
-throttle_timers_destroy(>throttle_timers);
-qemu_mutex_unlock(>lock);
+/* remove the current tgm from the list */
+QLIST_REMOVE(tgm, round_robin);
+throttle_timers_destroy(>throttle_timers);
+}
 
 throttle_group_unref(>ts);
 tgm->throttle_state = NULL;
@@ -638,14 +636,14 @@ void 
throttle_group_detach_aio_context(ThrottleGroupMember *tgm)
 assert(qemu_co_queue_empty(>throttled_reqs[1]));
 
 /* Kick off next ThrottleGroupMember, if necessary */
-qemu_mutex_lock(>lock);
-for (i = 0; i < 2; i++) {
-if (timer_pending(tt->timers[i])) {
-tg->any_timer_armed[i] = false;
-schedule_next_request(tgm, i);
+WITH_QEMU_LOCK_GUARD(>lock) {
+for (i = 0; i < 2; i++) {
+if (timer_pending(tt->timers[i])) {
+tg->any_timer_armed[i] = false;
+schedule_next_request(tgm, i);
+}
 }
 }
-qemu_mutex_unlock(>lock);
 
 throttle_timers_detach_aio_context(tt);
 tgm->aio_context = NULL;
-- 
2.27.0

[PATCH v2 0/4] Use lock guard macros in block

2020-12-02 Thread Gan Qixin

v1->v2:

-Patch1:
Add Paolo Bonzini reviewed tag and delete the .c suffix in the commit
message.

-Patch2:
Add Paolo Bonzini reviewed tag and delete the .c suffix in the commit
message.

-Patch3:
Delete the .c suffix in the commit.
Changes suggested by Kevin Wolf: Fix wrong indentation format.

-Patch4:
Delete the .c suffix in the commit.
Changes suggested by Kevin Wolf: Replace QEMU_LOCK_GUARD with
WITH_QEMU_LOCK_GUARD, and delete the redundant qemu_mutex_unlock().

Gan Qixin (4):
  block/accounting: Use lock guard macros
  block/curl: Use lock guard macros
  block/throttle-groups: Use lock guard macros
  block/iscsi: Use lock guard macros

 block/accounting.c  | 32 +-
 block/curl.c| 28 +++
 block/iscsi.c   | 50 -
 block/throttle-groups.c | 48 +++
 4 files changed, 76 insertions(+), 82 deletions(-)

-- 
2.27.0

[PATCH v2 2/4] block/curl: Use lock guard macros

2020-12-02 Thread Gan Qixin

Replace manual lock()/unlock() calls with lock guard macros
(QEMU_LOCK_GUARD/WITH_QEMU_LOCK_GUARD) in block/curl.

Signed-off-by: Gan Qixin 
Reviewed-by: Paolo Bonzini 
---
Cc: Kevin Wolf 
Cc: Markus Armbruster 
---
 block/curl.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/block/curl.c b/block/curl.c
index 4f907c47be..d24a4c5897 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -564,23 +564,23 @@ static void curl_detach_aio_context(BlockDriverState *bs)
 BDRVCURLState *s = bs->opaque;
 int i;
 
-qemu_mutex_lock(>mutex);
-for (i = 0; i < CURL_NUM_STATES; i++) {
-if (s->states[i].in_use) {
-curl_clean_state(>states[i]);
+WITH_QEMU_LOCK_GUARD(>mutex) {
+for (i = 0; i < CURL_NUM_STATES; i++) {
+if (s->states[i].in_use) {
+curl_clean_state(>states[i]);
+}
+if (s->states[i].curl) {
+curl_easy_cleanup(s->states[i].curl);
+s->states[i].curl = NULL;
+}
+g_free(s->states[i].orig_buf);
+s->states[i].orig_buf = NULL;
 }
-if (s->states[i].curl) {
-curl_easy_cleanup(s->states[i].curl);
-s->states[i].curl = NULL;
+if (s->multi) {
+curl_multi_cleanup(s->multi);
+s->multi = NULL;
 }
-g_free(s->states[i].orig_buf);
-s->states[i].orig_buf = NULL;
-}
-if (s->multi) {
-curl_multi_cleanup(s->multi);
-s->multi = NULL;
 }
-qemu_mutex_unlock(>mutex);
 
 timer_del(>timer);
 }
-- 
2.27.0

[Bug 1906608] [NEW] [Feature request]For some ehci controller, qemu should implement using portsc[26-27] to detect the speed of device.

2020-12-02 Thread ruimeiyan

Public bug reported:

for some ehci controller ,for example ehci controller on fsl_imx6,it
using portsc[26-27] to decide a full speed device or high speed device
was connected, hub-ehci.c should set the portsc[26-27] to return the
right speed.

line:1001 in hub-ehci.c
if (dev && dev->attached && (dev->speedmask & USB_SPEED_MASK_HIGH)) {
val |= PORTSC_PED;
}

below is the spec for fsl_imx6 USB PART.
PORTSC:27–26 :PSPD
Port Speed - Read Only.
This register field indicates the speed at which the port is operating.
00 Full Speed
01 Low Speed
10 High Speed
11 Undefined

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1906608

Title:
   [Feature request]For some ehci controller, qemu should implement
  using portsc[26-27]  to detect the speed of device.

Status in QEMU:
  New

Bug description:
  for some ehci controller ,for example ehci controller on fsl_imx6,it
  using portsc[26-27] to decide a full speed device or high speed device
  was connected, hub-ehci.c should set the portsc[26-27] to return the
  right speed.

  line:1001 in hub-ehci.c
  if (dev && dev->attached && (dev->speedmask & USB_SPEED_MASK_HIGH)) {
  val |= PORTSC_PED;
  }

  below is the spec for fsl_imx6 USB PART.
  PORTSC:27–26 :PSPD
  Port Speed - Read Only.
  This register field indicates the speed at which the port is operating.
  00 Full Speed
  01 Low Speed
  10 High Speed
  11 Undefined

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1906608/+subscriptions

RE: [PATCH 4/4] block/iscsi.c: Use lock guard macros

2020-12-02 Thread ganqixin

> -Original Message-
> From: Kevin Wolf [mailto:kw...@redhat.com]
> Sent: Wednesday, December 2, 2020 7:12 PM
> To: ganqixin 
> Cc: qemu-devel@nongnu.org; qemu-triv...@nongnu.org;
> pbonz...@redhat.com; mre...@redhat.com; stefa...@redhat.com;
> dnbrd...@gmail.com; Zhanghailiang ;
> Chenqun (kuhn) 
> Subject: Re: [PATCH 4/4] block/iscsi.c: Use lock guard macros
> 
> Am 09.11.2020 um 16:43 hat Gan Qixin geschrieben:
> > Replace manual lock()/unlock() calls with lock guard macros
> > (QEMU_LOCK_GUARD/WITH_QEMU_LOCK_GUARD) in block/iscsi.c.
> >
> > Signed-off-by: Gan Qixin 
> > ---
> >  block/iscsi.c | 28 +---
> >  1 file changed, 13 insertions(+), 15 deletions(-)
> >
> > diff --git a/block/iscsi.c b/block/iscsi.c index
> > e30a7e3606..f5f657b582 100644
> > --- a/block/iscsi.c
> > +++ b/block/iscsi.c
> > @@ -322,7 +322,7 @@ iscsi_aio_cancel(BlockAIOCB *blockacb)
> >  IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
> >  IscsiLun *iscsilun = acb->iscsilun;
> >
> > -qemu_mutex_lock(>mutex);
> > +QEMU_LOCK_GUARD(>mutex);
> >
> >  /* If it was cancelled or completed already, our work is done here */
> >  if (acb->cancelled || acb->status != -EINPROGRESS) {
>qemu_mutex_unlock(>mutex);
>return;
>}
> 
> I don't think this qemu_mutex_unlock() is right any more now.

You are right, I ignored this qemu_mutex_unlock(). I will correct it and 
resubmit. :)

Thanks,
Gan Qixin

> 
> Kevin

RE: [PATCH v2 01/12] pc-dimm: put it into the 'storage' category

2020-12-02 Thread ganqixin

> -Original Message-
> From: Pankaj Gupta [mailto:pankaj.gupta.li...@gmail.com]
> Sent: Monday, November 30, 2020 6:19 PM
> To: ganqixin 
> Cc: Qemu Developers ; qemu-triv...@nongnu.org;
> Chenqun (kuhn) ; th...@redhat.com;
> Zhanghailiang ; Michael S . Tsirkin
> 
> Subject: Re: [PATCH v2 01/12] pc-dimm: put it into the 'storage' category
> 
> > The category of the pc-dimm device is not set, put it into the 'storage'
> > category.
> >
> > Signed-off-by: Gan Qixin 
> > ---
> > Cc: Michael S. Tsirkin 
> > ---
> >  hw/mem/pc-dimm.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index
> > 2ffc986734..017146e3d1 100644
> > --- a/hw/mem/pc-dimm.c
> > +++ b/hw/mem/pc-dimm.c
> > @@ -282,6 +282,7 @@ static void pc_dimm_class_init(ObjectClass *oc,
> void *data)
> >  mdc->get_plugged_size = memory_device_get_region_size;
> >  mdc->get_memory_region = pc_dimm_md_get_memory_region;
> >  mdc->fill_device_info = pc_dimm_md_fill_device_info;
> > +set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> 
> Any reason why pc-dimm would be in the storage category?

Hi Pankaj,
Thanks for you reply. As far as I know, pc-dimm is a dimm device for memory 
hotplug, described as a "DIMM memory module" in "-device help". 
This device looks related to storage, so I put it into the "storage" category 
to make it easy to find. I'm not sure if this is appropriate, do you have any 
better ideas?

Gan Qixin

> 
> >  }
> >
> >  static TypeInfo pc_dimm_info = {
> > --
> > 2.23.0
> >
> >

Re: [PATCH 00/18] qapi/qom: QAPIfy object-add

2020-12-02 Thread Gerd Hoffmann

  Hi,

> It would be much nicer to do the wrapper the other way round, i.e.
> setting properties before the device is realized would update a
> configuration struct and realize would then call .create() with that
> struct. To me, this sounds much harder, though also a more useful state.

Well, in some places we already have separate config structs.  We have
NICConf for example, which is typically used like this:

struct USBNetState {
   USBDevice dev;
   [ ... ]
   NICConf conf;
   [ ... ]
};

and

static Property net_properties[] = {
DEFINE_NIC_PROPERTIES(USBNetState, conf),
DEFINE_PROP_END_OF_LIST(),
};

So I think we could:

  (1) move *all* properties into structs.
  (2) generate those structs from qapi schemas.
  (3) generate Property lists (or functions with
  object_class_property_add_*() calls) from qapi
  schema.

We could then convert devices one-by-one without breaking anything
or needing two code paths essentially doing the same thing in two
different ways.

take care,
  Gerd

Re: [PATCH] tests/acceptance: fix timeout for vm.wait

2020-12-02 Thread Pavel Dovgalyuk


On 02.12.2020 18:22, John Snow wrote:

On 12/2/20 1:31 AM, Pavel Dovgalyuk wrote:


This patch adds timeout parameter to vm.wait() calls, because the 
default

value is just 30 seconds, and tests may last for more time.



This doesn't sound right -- the timeout isn't meant to be for the 
entire duration of the test, the timeout is from the time of issuing 
a shutdown command until the time the VM actually shuts down. 
Ideally, that should not take a particularly long time in a 
well-behaved test.


Why is it lasting longer than 30 seconds?


These are complex Linux boot tests.
Such loading process could take more than 30 seconds.
E.g., BootLinux tests have timeout of 900 seconds.


This timeout should only count towards the time spent *shutting down*, 
not the time to run the entire test. 30 seconds used to be enough time 
for this to happen on gitlab, if it's taking longer than that I am 
worried that something has gone wrong.


Where were the failures observed, and on what tests? Are there logs I 
can review?


I've got your point. You were right.
The problem was with new long-lasting record/replay tests:

if record:
cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port),
  self.name)
vm.shutdown()
logger.info('finished the recording with log size %s bytes'
% os.path.getsize(replay_path))
else:
vm.wait(None)
logger.info('successfully fihished the replay')


Replay phase here waits for shutdown for the whole period of Linux boot 
and execution. We don't check any VM output and just wait for finishing

the replay.

Smaller RR tests include "self.wait_for_console_pattern" during replay 
and therefore can't have problems with this timeout.


Pavel Dovgalyuk

Re: [PATCH v1 1/1] security-process: update process information

2020-12-02 Thread P J P

+-- On Wed, 2 Dec 2020, Daniel P. Berrangé wrote --+
| > +- If issue is found to be less severe, an upstream public bug (or an
| > +  issue) will be created immediately.
| 
| No need to repeat "or an issue". I think it would read more clearly as
| 
|- If the severity of the issue is sufficiently low, an upstream public bug
|  may be created immediately.

* Let's settle on public GitLab issues, shall we? 

* Tomorrow after an issue triage if someone asks where should they create a 
  public tracker, it's better to have one definite answer, instead of choose 
  either LaunchPad or GitLab issues.

* OR is it better to have both? ie. file a public tracker anywhere as per ones 
  convenience?

* One GitLab is good I think.


Thank you.
--
Prasad J Pandit / Red Hat Product Security Team
8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D

Re: [PATCH v1 1/1] security-process: update process information

2020-12-02 Thread P J P

  Hello Dan, Stefano,

+-- On Wed, 2 Dec 2020, Stefano Stabellini wrote --+
| On Wed, 2 Dec 2020, Daniel P. Berrangé wrote:
| > > +  any third parties, including Xen Security Project, without your prior
| > > +  permission.
| > 
| > Why this explicit note about the Xen project ?  What if we decide to want
| > a member of the Xen security team on the QEMU security mailing list so that
| > we can collaborate on triage ?

* While that's fair point, what I think it means is, even if members from 
  other communities are present on the qemu-security list, any explicit 
  communication and/or sharing of issue details/information/reproducers etc.  
  across communities, with non-members will not happen without prior 
  permission from the reporter(s).

* Besides, that is not new text, it is from the current process page

  -> https://www.qemu.org/contribute/security-process/


| this is not an issue because the individual (probably me) of course
| would not report anything to the Xen security team without prior
| permission.

 +1000..., appreciate it.:)

| >  Any non-public information you share about security issues, is kept
| >  confidential between members of the QEMU security team, and a minimal
| >  number of supporting staff in their affliated companies.  Information
| >  will not be disclosed to other third party organizations/individuals
| >  without prior permission from the reporter
| 
| Sounds good to me

Same here, will fix it.

Thank you.
--
Prasad J Pandit / Red Hat Product Security Team
8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D

Re: [PATCH v3 07/10] arm: Add Hypervisor.framework build target

2020-12-02 Thread Roman Bolshakov

On Wed, Dec 02, 2020 at 08:04:05PM +0100, Alexander Graf wrote:
> Now that we have all logic in place that we need to handle 
> Hypervisor.framework
> on Apple Silicon systems, let's add CONFIG_HVF for aarch64 as well so that we
> can build it.
> 
> Signed-off-by: Alexander Graf 
> 
> ---
> 
> v1 -> v2:
> 
>   - Fix build on 32bit arm
> ---
>  meson.build| 11 ++-
>  target/arm/hvf/meson.build |  3 +++
>  target/arm/meson.build |  2 ++
>  3 files changed, 15 insertions(+), 1 deletion(-)
>  create mode 100644 target/arm/hvf/meson.build
> 
> diff --git a/meson.build b/meson.build
> index 2a7ff5560c..bff3fe7089 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -74,16 +74,25 @@ else
>  endif
>  
>  accelerator_targets = { 'CONFIG_KVM': kvm_targets }
> +
> +if cpu in ['x86', 'x86_64']
> +  hvf_targets = ['i386-softmmu', 'x86_64-softmmu']

i386-softmmu had issues with hvf. Perhaps better to leave it disabled
for the target as it was before.

Thanks,
Roman

> +elif cpu in ['aarch64']
> +  hvf_targets = ['aarch64-softmmu']
> +else
> +  hvf_targets = []
> +endif
> +
>  if cpu in ['x86', 'x86_64', 'arm', 'aarch64']
># i368 emulator provides xenpv machine type for multiple architectures
>accelerator_targets += {
>  'CONFIG_XEN': ['i386-softmmu', 'x86_64-softmmu'],
> +'CONFIG_HVF': hvf_targets,
>}
>  endif
>  if cpu in ['x86', 'x86_64']
>accelerator_targets += {
>  'CONFIG_HAX': ['i386-softmmu', 'x86_64-softmmu'],
> -'CONFIG_HVF': ['x86_64-softmmu'],
>  'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'],
>}
>  endif
> diff --git a/target/arm/hvf/meson.build b/target/arm/hvf/meson.build
> new file mode 100644
> index 00..855e6cce5a
> --- /dev/null
> +++ b/target/arm/hvf/meson.build
> @@ -0,0 +1,3 @@
> +arm_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
> +  'hvf.c',
> +))
> diff --git a/target/arm/meson.build b/target/arm/meson.build
> index f5de2a77b8..95bebae216 100644
> --- a/target/arm/meson.build
> +++ b/target/arm/meson.build
> @@ -56,5 +56,7 @@ arm_softmmu_ss.add(files(
>'psci.c',
>  ))
>  
> +subdir('hvf')
> +
>  target_arch += {'arm': arm_ss}
>  target_softmmu_arch += {'arm': arm_softmmu_ss}
> -- 
> 2.24.3 (Apple Git-128)
>

Re: [PATCH v3 06/10] hvf: Add Apple Silicon support

2020-12-02 Thread Roman Bolshakov

On Wed, Dec 02, 2020 at 08:04:04PM +0100, Alexander Graf wrote:
> With Apple Silicon available to the masses, it's a good time to add support
> for driving its virtualization extensions from QEMU.
> 
> This patch adds all necessary architecture specific code to get basic VMs
> working. It's still pretty raw, but definitely functional.
> 

That's very cool, Alex!

>
> [...]
> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> index a423f629d5..e613c22ad0 100644
> --- a/accel/hvf/hvf-cpus.c
> +++ b/accel/hvf/hvf-cpus.c
> @@ -60,6 +60,10 @@
>  
>  #include 
>  

On an older laptop with 10.15 I've noticed this causes a build failure.
Here's layout of Hypervisor.framework on 10.15:

 Hypervisor.framework find .
 .
 ./Versions
 ./Versions/A
 ./Versions/A/Hypervisor.tbd
 ./Versions/A/Headers
 ./Versions/A/Headers/hv_arch_vmx.h
 ./Versions/A/Headers/hv_error.h
 ./Versions/A/Headers/hv_types.h
 ./Versions/A/Headers/hv.h
 ./Versions/A/Headers/hv_arch_x86.h
 ./Versions/A/Headers/hv_vmx.h
 ./Versions/Current
 ./module.map
 ./Hypervisor.tbd
 ./Headers

The issue also exists in another patch in the series:
  "hvf: Move common code out"

> +#ifdef __aarch64__
> +#define HV_VM_DEFAULT NULL
> +#endif
> +

I don't see if it's used anywhere.

>  /* Memory slots */
>  
>  struct mac_slot {
> [...]
>

Side question. I have very little knowledge of ARM but it seems much
leaner compared to x86 trap/emulation layer. Is it a consequence of
load/store architecture and it's expected to be that small on ARM?

I have only noticed MMIO, system registers (access to them apparently
leads to a trap), kick and PSCI traps (which sounds somewhat similar to
Intel MPSpec/APIC) and no system instruction traps (except WFI in the
next patch).

Thanks,
Roman

Re: [PATCH v1 1/1] security-process: update process information

2020-12-02 Thread P J P

  Hello Dan,

+-- On Wed, 2 Dec 2020, Daniel P. BerrangÃ© wrote --+
| > +- If issue is found to be less severe, an upstream public bug (or an
| > +  issue) will be created immediately.
| 
| No need to repeat "or an issue". I think it would read more clearly as
| 
|- If the severity of the issue is sufficiently low, an upstream public bug
|  may be created immediately.

  Okay.

| > +- If issue is found to be severe, an embargo process below is followed,
| > +  and public bug (or an issue) will be opened at the end of the set
| > +  embargo period.
| 
|- If the severity of the issue requires co-ordinated disclosure at a future
|  date, then the embargo process below is followed, and public bug will be
|  opened at the end of the set embargo period.

  Okay.
  
| Somewhere around here is probably a good place to link to:
| 
|   https://www.qemu.org/docs/master/system/security.html
| 
| which describes why we'll consider some things to be not security issues

  Towards the end, there's a section about 'How impact & severity of an issue 
is decided', above link will fit in there good I think.

 
| > -If a security issue is reported that is not already publicly disclosed, an
| > -embargo date may be assigned and communicated to the reporter. Embargo
| > -periods will be negotiated by mutual agreement between members of the 
security
| > -team and other relevant parties to the problem. Members of the security 
contact
| > -list agree not to publicly disclose any details of the security issue until
| > -the embargo date expires.
| > +* If a security issue is reported that is not already public and is severe
| > +  enough, an embargo date may be assigned and communicated to the
| > +  reporter(s).
| 
| 
|   * If a security issue is reported that is not already public and its
| severity requires coordinated disclosure, an embargo date may be
| assigned and communicated to the reporter(s).
...
|   "The preferred embargo period will be upto 2 weeks, however, longer
|embargoes can be negotiated if the severity of the issues requires it."

Okay, will add above changes.

Thank you.
--
Prasad J Pandit / Red Hat Product Security Team
8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D

Re: [PATCH v1 1/1] security-process: update process information

2020-12-02 Thread P J P

+-- On Wed, 2 Dec 2020, Philippe Mathieu-Daudé wrote --+
| Maybe:
| 
|  0) **Acknowledge reception**
|- A non-automated response email is sent to acknowledge the
|  reception of the request.
|  This is the starting date for the maximum **60 days** required
|  to process the issue, including bullets 1) and 2).
| 
| > +- Create an upstream fix patch
| 
|  with the proper Buglink/CVE/Reported-by tags.
| 
|- Participate in the review process until the patch is merged.
|  Test the fix updates with the private reproducer if required.
|- Close the upstream [bug] with 'Fix released', including the
|  commit SHA-1 of the fix.
... 
| >  Email sent to us is read and acknowledged with a non-automated response. 
For
| >  issues that are complicated and require significant attention, we will 
open an
| 
|^^^ You can remove that, as now covered by bullet 0).

Okay, will do. Thank you.
--
Prasad J Pandit / Red Hat Product Security Team
8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D

[Bug 1906516] Re: [RISCV] sfence.vma need to end the translation block

2020-12-02 Thread jinyan

** Description changed:

  QEMU emulator version 5.0.0
  
  sfence.vma will flush the tlb, so after this instruction, the translation 
block should be end. The following code will only work in single step mode:
  ```
  relocate:
-   li a0, OFFSET
+  li a0, OFFSET
  
-   la t0, 1f
-   add t0, t0, a0
-   csrw stvec, t0
+  la t0, 1f
+  add t0, t0, a0
+  csrw stvec, t0
  
- la t0, early_pgtbl
-   srl t0, t0, PAGE_SHIFT
-   li t1, SATP_SV39
-   or t0, t1, t0
+ la t0, early_pgtbl
+  srl t0, t0, PAGE_SHIFT
+  li t1, SATP_SV39
+  or t0, t1, t0
  
- csrw satp, t0
+ csrw satp, t0
  1:
-   sfence.vma
-   la t0, trap_s
-   csrw stvec, t0
-   ret
+  sfence.vma
+  la t0, trap_s
+  csrw stvec, t0
+  ret
  ```
  
  In this code, I want to relocate pc to virtual address with the OFFSET
- prefix, before writing to satp, pc run at physic address, stvec has been
- set a label 1 with a virtual prefix and virtual address has been mapping
- in early_pgtbl, after writing satp, there will throw a page fault, and
- pc will set to virtual address of label 1.
+ prefix. Before writing to satp, pc run at physic address, stvec has been
+ set to label 1 with a virtual prefix and virtual address has been
+ mapping in early_pgtbl, after writing satp, qemu will throw a page
+ fault, and pc will set to virtual address of label 1.
  
  The problem is that, in this situation, the translation block will not
  end after sfence.vma, and stvec will be set to trap_s,
  
  ```
  
  IN:
  Priv: 1; Virt: 0
  0x80dc:  00a080b3  add ra,ra,a0
  0x80e0:  7297  auipc   t0,28672# 
0x800070e0
  0x80e4:  f2028293  addit0,t0,-224
  0x80e8:  00c2d293  srlit0,t0,12
  0x80ec:  fff0031b  addiw   t1,zero,-1
  0x80f0:  03f31313  sllit1,t1,63
  0x80f4:  005362b3  or  t0,t1,t0
  0x80f8:  18029073  csrrw   zero,satp,t0
  
  
  IN:
  Priv: 1; Virt: 0
  0x80fc:  1273  sfence.vma  zero,zero
  0x8100:  0297  auipc   t0,0# 
0x8100
  0x8104:  1c828293  addit0,t0,456
  0x8108:  10529073  csrrw   zero,stvec,t0
  
  riscv_raise_exception: 12
  riscv_raise_exception: 12
  riscv_raise_exception: 12
  riscv_raise_exception: 12
  ...
  ```
  
  So, the program will crash, and the program will work in single step mode:
  ```
  
  IN:
  Priv: 1; Virt: 0
  0x80f8:  18029073  csrrw   zero,satp,t0
  
  
  IN:
  Priv: 1; Virt: 0
  0x80fc:  1273  sfence.vma  zero,zero
  
  riscv_raise_exception: 12
  
  IN:
  Priv: 1; Virt: 0
  0x80fc:  1273  sfence.vma  zero,zero
  
  
  IN:
  Priv: 1; Virt: 0
  0x8100:  0297  auipc   t0,0# 
0x8100
  
  ```
  The pc will set to label 1, instead of trap_s.
  
  I try to patch the code in fence.i in trans_rvi.inc.c to sfence.vma:
  ```
- tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
- exit_tb(ctx);
- ctx->base.is_jmp = DISAS_NORETURN;
+ tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
+ exit_tb(ctx);
+ ctx->base.is_jmp = DISAS_NORETURN;
  ```
  This codes can help to end the tranlate block, since I'm not a qemu guy, I'm 
not sure if this is a corret method.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1906516

Title:
  [RISCV] sfence.vma need to end the translation block

Status in QEMU:
  New

Bug description:
  QEMU emulator version 5.0.0

  sfence.vma will flush the tlb, so after this instruction, the translation 
block should be end. The following code will only work in single step mode:
  ```
  relocate:
   li a0, OFFSET

   la t0, 1f
   add t0, t0, a0
   csrw stvec, t0

  la t0, early_pgtbl
   srl t0, t0, PAGE_SHIFT
   li t1, SATP_SV39
   or t0, t1, t0

  csrw satp, t0
  1:
   sfence.vma
   la t0, trap_s
   csrw stvec, t0
   ret
  ```

  In this code, I want to relocate pc to virtual address with the OFFSET
  prefix. Before writing to satp, pc run at physic address, stvec has
  been set to label 1 with a virtual prefix and virtual address has been
  mapping in early_pgtbl, after writing satp, qemu will throw a page
  fault, and pc will set to virtual address of label 1.

  The problem is that, in this situation, the translation block will not
  end after sfence.vma, and stvec will be set to trap_s,

  ```
  
  IN:
  Priv: 1; Virt: 0
  0x80dc:  00a080b3  add ra,ra,a0

Re: [PATCH 0/9] target/mips: Simplify MSA TCG logic

2020-12-02 Thread Jiaxun Yang





在 2020/12/3 上午2:44, Philippe Mathieu-Daudé 写道:

I converted MSA opcodes to decodetree. To keep the series
small I split it in 2, this is the non-decodetree specific
patches (so non-decodetree experts can review it ;) ).

First we stop using env->insn_flags to check for MSAi
presence, then we restrict TCG functions to DisasContext*.


Hi Philippe,

For the whole series,
Reviewed-by: Jiaxun Yang 


I'm just curious about how would you deal with so many condition flags
with decodetree?

Unlike other ISAs, MIPS have so many flavors, every ISA level (MIPS-III 
R2 R5 R6)
has it's own instructions, and in my understanding decodetree file won't 
generate
these switches. I was trying to do the same thing but soon find out 
we'll have around

20 decodetree for MIPS.

Thanks.

- Jiaxun



Based-on: <20201130102228.2395100-1-f4...@amsat.org>
"target/mips: Allow executing MSA instructions on Loongson-3A4000"

Philippe Mathieu-Daudé (9):
   target/mips: Introduce ase_msa_available() helper
   target/mips: Simplify msa_reset()
   target/mips: Use CP0_Config3 to set MIPS_HFLAG_MSA
   target/mips: Simplify MSA TCG logic
   target/mips: Remove now unused ASE_MSA definition
   target/mips: Alias MSA vector registers on FPU scalar registers
   target/mips: Extract msa_translate_init() from mips_tcg_init()
   target/mips: Remove CPUMIPSState* argument from gen_msa*() methods
   target/mips: Explode gen_msa_branch() as gen_msa_BxZ_V/BxZ()

  target/mips/internal.h   |   8 +-
  target/mips/mips-defs.h  |   1 -
  target/mips/kvm.c|  12 +-
  target/mips/translate.c  | 206 ++-
  target/mips/translate_init.c.inc |  12 +-
  5 files changed, 138 insertions(+), 101 deletions(-)

Re: [PATCH v2] gitlab-ci.yml: Add openSUSE Leap 15.2 for gitlab CI/CD

2020-12-02 Thread AL Yu-Chen Cho

On Mon, 2020-11-30 at 10:14 +0100, Philippe Mathieu-Daudé wrote:
> On 11/30/20 5:26 AM, Cho, Yu-Chen wrote:
> > v2:
> > Drop some package from dockerfile to make docker image more light.
> > 
> > v1:
> > Add build-system-opensuse jobs and opensuse-leap.docker dockerfile.
> > Use openSUSE Leap 15.2 container image in the gitlab-CI.
> > 
> > Signed-off-by: Cho, Yu-Chen 
> > ---
> >  .gitlab-ci.d/containers.yml   |  5 ++
> >  .gitlab-ci.yml| 30 +++
> >  tests/docker/dockerfiles/opensuse-leap.docker | 54
> > +++
> >  3 files changed, 89 insertions(+)
> >  create mode 100644 tests/docker/dockerfiles/opensuse-leap.docker
> > 
> > diff --git a/.gitlab-ci.d/containers.yml b/.gitlab-
> > ci.d/containers.yml
> > index 892ca8d838..910754a699 100644
> > --- a/.gitlab-ci.d/containers.yml
> > +++ b/.gitlab-ci.d/containers.yml
> > @@ -246,3 +246,8 @@ amd64-ubuntu-container:
> ><<: *container_job_definition
> >variables:
> >  NAME: ubuntu
> > +
> > +amd64-opensuse-leap-container:
> > +  <<: *container_job_definition
> > +  variables:
> > +NAME: opensuse-leap
> > diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
> > index d0173e82b1..6a256fe07b 100644
> > --- a/.gitlab-ci.yml
> > +++ b/.gitlab-ci.yml
> > @@ -195,6 +195,36 @@ acceptance-system-centos:
> >  MAKE_CHECK_ARGS: check-acceptance
> ><<: *acceptance_definition
> >  
> 
> What about adding in a comment who is the maintainer
> of these jobs? Some sort of contact in case there is
> a OpenSUSE specific issue for example.
> 

I am glad to be a maintainer or reviewer for the openSUSE specific
issue.


> See:
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg758968.html
> 
> > +build-system-opensuse:
> > +  <<: *native_build_job_definition
> > +  variables:
> > +IMAGE: opensuse-leap
> > +TARGETS: s390x-softmmu x86_64-softmmu aarch64-softmmu
> > +MAKE_CHECK_ARGS: check-build
> > +  artifacts:
> > +expire_in: 2 days
> > +paths:
> > +  - build
> > +
> > +check-system-opensuse:
> > +  <<: *native_test_job_definition
> > +  needs:
> > +- job: build-system-opensuse
> > +  artifacts: true
> > +  variables:
> > +IMAGE: opensuse-leap
> > +MAKE_CHECK_ARGS: check
> > +
> > +acceptance-system-opensuse:
> > +  <<: *native_test_job_definition
> > +  needs:
> > +- job: build-system-opensuse
> > +  artifacts: true
> > +  variables:
> > +IMAGE: opensuse-leap
> > +MAKE_CHECK_ARGS: check-acceptance
> > +  <<: *acceptance_definition
> > +
> >  build-disabled:
> ><<: *native_build_job_definition
> >variables:

Re: [PATCH 0/9] target/mips: Simplify MSA TCG logic

2020-12-02 Thread Jiaxun Yang





在 2020/12/3 上午2:44, Philippe Mathieu-Daudé 写道:

I converted MSA opcodes to decodetree. To keep the series
small I split it in 2, this is the non-decodetree specific
patches (so non-decodetree experts can review it ;) ).

First we stop using env->insn_flags to check for MSAi
presence, then we restrict TCG functions to DisasContext*.


Hi Philippe,

For the whole series,
Reviewed-by: Jiaxun Yang 


I'm just curious about how would you deal with so many condition flags
with decodetree?

Unlike other ISAs, MIPS have so many flavors, every ISA level (MIPS-III 
R2 R5 R6)
has it's own instructions, and in my understanding decodetree file won't 
generate
these switches. I was trying to do the same thing but soon find out 
we'll have around

20 decodertree for MIPS.

Thanks.

- Jiaxun



Based-on: <20201130102228.2395100-1-f4...@amsat.org>
"target/mips: Allow executing MSA instructions on Loongson-3A4000"

Philippe Mathieu-Daudé (9):
   target/mips: Introduce ase_msa_available() helper
   target/mips: Simplify msa_reset()
   target/mips: Use CP0_Config3 to set MIPS_HFLAG_MSA
   target/mips: Simplify MSA TCG logic
   target/mips: Remove now unused ASE_MSA definition
   target/mips: Alias MSA vector registers on FPU scalar registers
   target/mips: Extract msa_translate_init() from mips_tcg_init()
   target/mips: Remove CPUMIPSState* argument from gen_msa*() methods
   target/mips: Explode gen_msa_branch() as gen_msa_BxZ_V/BxZ()

  target/mips/internal.h   |   8 +-
  target/mips/mips-defs.h  |   1 -
  target/mips/kvm.c|  12 +-
  target/mips/translate.c  | 206 ++-
  target/mips/translate_init.c.inc |  12 +-
  5 files changed, 138 insertions(+), 101 deletions(-)

Re: [PATCH v1 1/1] security-process: update process information

2020-12-02 Thread Stefano Stabellini

On Wed, 2 Dec 2020, Daniel P. Berrangé wrote:
> On Mon, Nov 30, 2020 at 07:19:07PM +0530, P J P wrote:
> > From: Prasad J Pandit 
> > 
> > We are about to introduce a qemu-security mailing list to report
> > and triage QEMU security issues.
> > 
> > Update the QEMU security process web page with new mailing list
> > and triage details.
> > 
> > Signed-off-by: Prasad J Pandit 
> > ---
> >  contribute/security-process.md | 134 -
> >  1 file changed, 80 insertions(+), 54 deletions(-)
> 
> > +* List members follow a **responsible disclosure** policy. Any non-public
> > +  information you share about security issues, is kept confidential within 
> > the
> > +  respective affiliated companies. Such information shall not be passed on 
> > to
> > +  any third parties, including Xen Security Project, without your prior
> > +  permission.
> 
> Why this explicit note about the Xen project ?  What if we decide to want
> a member of the Xen security team on the QEMU security mailing list so that
> we can collaborate on triage ?

Hi Daniel,

this is not an issue because the individual (probably me) of course
would not report anything to the Xen security team without prior
permission.

Also note that the Xen case is one of the easiest because the Xen
security policy gives full powers to the discoverer: the discoverer
chooses both when to disclose and to whom and the Xen security team will
abide.


> Perhaps
> 
>  Any non-public information you share about security issues, is kept
>  confidential between members of the QEMU security team, and a minimal
>  number of supporting staff in their affliated companies.  Information
>  will not be disclosed to other third party organizations/individuals
>  without prior permission from the reporter

Sounds good to me

Re: [PATCH v3 6/6] linux-user: Add support for MIPS Loongson 2F/3E

2020-12-02 Thread chen huacai

Hi, Philippe,

On Wed, Dec 2, 2020 at 5:16 PM Philippe Mathieu-Daudé  wrote:
>
> On 12/2/20 2:01 AM, chen huacai wrote:
> > Hi, Philippe,
> >
> > On Wed, Dec 2, 2020 at 3:31 AM Philippe Mathieu-Daudé  
> > wrote:
> >>
> >> Userland ELF binaries using Longsoon SIMD instructions have the
> >> HWCAP_LOONGSON_MMI bit set [1].
> >> Binaries compiled for Longsoon 3E [2] have the HWCAP_LOONGSON_EXT
> >> bit set for the LQ / SQ instructions.
> > What is Loongson-3E? I think you want to say Loongson-3A?
>
> Yes =) I have been confused because I looked at the INSN_LOONGSON2E
> and INSN_LOONGSON2F definitions earlier.
>
> Are you OK with this patch if I change
> - 3E -> 3A in subject and body
> - Longsoon -> Loongson in body?
That's OK.

Huacai
>
> As you maybe noticed, since Loongson is currently the single MIPS
> area with contributions, I am trying to strengthen it and ease its
> maintenance by adding (and running) more tests.
>
> >
> > Huacai
> >>
> >> [1] commit 8e2d5831e4b ("target/mips: Legalize Loongson insn flags")
> >> [2] commit af868995e1b ("target/mips: Add Loongson-3 CPU definition")
> >>
> >> Reviewed-by: Richard Henderson 
> >> Signed-off-by: Philippe Mathieu-Daudé 
> >> ---
> >>  linux-user/elfload.c | 2 ++
> >>  1 file changed, 2 insertions(+)
> >>
> >> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> >> index 2ba42d8e4bd..5a39a7dc021 100644
> >> --- a/linux-user/elfload.c
> >> +++ b/linux-user/elfload.c
> >> @@ -1023,6 +1023,8 @@ static uint32_t get_elf_hwcap(void)
> >>
> >>  GET_FEATURE_REG_EQU(CP0_Config0, CP0C0_AR, 3, 2, HWCAP_MIPS_R6);
> >>  GET_FEATURE_REG_SET(CP0_Config3, 1 << CP0C3_MSAP, HWCAP_MIPS_MSA);
> >> +GET_FEATURE_INSN(ASE_LMMI, HWCAP_LOONGSON_MMI);
> >> +GET_FEATURE_INSN(ASE_LEXT, HWCAP_LOONGSON_EXT);
> >>
> >>  return hwcaps;
> >>  }
> >> --
> >> 2.26.2
> >



-- 
Huacai Chen

Re: [PATCH] scsi: allow user to set werror as report

2020-12-02 Thread Zihao Chang

Ping? This is a fix patch which has been reviewed, whose tree should it go via?

Thanks
Zihao

On 2020/11/3 22:03, Zihao Chang wrote:
> 
> 
> On 2020/11/3 18:52, Fam Zheng wrote:
>> On Tue, 2020-11-03 at 14:12 +0800, Zihao Chang wrote:
>>> 'enospc' is the default for -drive, but qemu allows user to set
>>> drive option werror. If werror of scsi-generic is set to 'report'
>>> by user, qemu will not allow vm to start.
>>>
>>> This patch allow user to set werror as 'report' for scsi-generic.
>>>
>>> Signed-off-by: Zihao Chang 
>>> ---
>>>  hw/scsi/scsi-generic.c | 3 ++-
>>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
>>> index 2cb23ca891..2730e37d63 100644
>>> --- a/hw/scsi/scsi-generic.c
>>> +++ b/hw/scsi/scsi-generic.c
>>> @@ -664,7 +664,8 @@ static void scsi_generic_realize(SCSIDevice *s,
>>> Error **errp)
>>>  return;
>>>  }
>>>  
>>> -if (blk_get_on_error(s->conf.blk, 0) !=
>>> BLOCKDEV_ON_ERROR_ENOSPC) {
>>> +if (blk_get_on_error(s->conf.blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC
>>> &&
>>> +blk_get_on_error(s->conf.blk, 0) !=
>>> BLOCKDEV_ON_ERROR_REPORT) {
>>>  error_setg(errp, "Device doesn't support drive option
>>> werror");
>>>  return;
>>>  }
>>
>> Accepting the report sounds sane to me, it matches what we actually
>> (always) do. Is the idea to allow users to spell it out explicitly in
>> the command line?
>>
> Actually, qemu supports user to spell it out explicitly in the command
> line like "enospc", "report" & "action". This patch just allows user to
> set werror as "report" for scsi-generic, which is a common scenario.
> 
>> Reviewed-by: Fam Zheng 
>>
>> .
>>

[Bug 1906156] Re: Host OS Reboot Required, for Guest kext to Load (Fully)

2020-12-02 Thread Russell Morris

Sorry for the delay on updating this - but pulling my hair out (and I'm
short enough of that already ... LOL). I can't get Ubuntu to let me run
the custom qemu executable. Really is looking like apparmor. Fighting
with that, but struggling to have it let me run it :-(.

Thanks.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1906156

Title:
  Host OS Reboot Required, for Guest kext to Load (Fully)

Status in QEMU:
  Incomplete

Bug description:
  Hi,

  Finding this one a bit odd, but I am loading a driver (kext) in a
  macOS guest ... and it works, on the first VM (domain) startup after a
  full / clean host OS boot (or reboot). However, if I even reboot the
  guest OS, then the driver load fails => can be "corrected" by a full
  host OS reboot (which seems very extreme).

  Is this a known issue, and/or is there a workaround?

  FYI, running,
  QEMU emulator version 5.0.0 (Debian 1:5.0-5ubuntu9.1)
  Copyright (c) 2003-2020 Fabrice Bellard and the QEMU Project developers

  This is for a macOS guest, on a Linux host.

  Thanks!

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1906156/+subscriptions

Re: [PATCH v10 0/9] pci_expander_brdige:acpi: Support pxb-pcie for ARM

2020-12-02 Thread Jiahui Cen

Hi Michael,

On 2020/12/2 17:53, Michael S. Tsirkin wrote:
> On Thu, Nov 19, 2020 at 09:48:32AM +0800, Jiahui Cen wrote:
>> Changes with v9
>> v9->v10:
>> Refactor patch2 to drop useless macros and variables.
>> Split patch2 into two patches.
> 
> I tagged this for after the release. To help make sure this is
> not lost pls ping me after the release.

OK. I'll ping you after the release.

Thanks,
Jiahui.

>> Changes with v8
>> v8->v9:
>> Rebase to master
>>
>> Changes with v7
>> v7->v8:
>> Fix the error:no member named 'fw_cfg' in 'struct PCMachineState'
>>
>> Changes with v6
>> v6->v7:
>> Refactor fw_cfg_write_extra_pci_roots
>> Add API PCI_GET_PCIE_HOST_STATE
>> Fix typos
>>
>> Changes with v5
>> v5->v6: stat crs_range_insert in aml_build.h
>>
>> Changes with v4
>> v4->v5: Not using specific resources for PXB.
>> Instead, the resources for pxb are composed of the bar space of the
>> pci-bridge/pcie-root-port behined it and the config space of devices
>> behind it.
>>
>> Only if the bios(uefi for arm) support multiple roots,
>> configure space of devices behind pxbs could be obtained.
>> The newest uefi work is updated for discussion by the following link:
>> https://edk2.groups.io/g/devel/topic/78135572#67173
>> [PATCH v2 0/4] Add extra pci roots support for Arm
>>
>> Currently pxb-pcie is not supported by arm,
>> the reason for it is pxb-pcie is not described in DSDT table
>> and only one main host bridge is described in acpi tables,
>> which means it is not impossible to present different io numas
>> for different devices.
>>
>> This series of patches make arm to support PXB-PCIE.
>>
>> Users can configure pxb-pcie with certain numa, Example command
>> is:
>>
>>-device pxb-pcie,id=pci.7,bus_nr=128,numa_node=0,bus=pcie.0,addr=0x9
>>
>> Jiahui Cen (2):
>>   fw_cfg: Refactor extra pci roots addition
>>   hw/arm/virt: Write extra pci roots into fw_cfg
>>
>> Yubo Miao (7):
>>   acpi/gpex: Extract two APIs from acpi_dsdt_add_pci
>>   acpi: Extract crs build form acpi_build.c
>>   acpi/gpex: Build tables for pxb
>>   acpi: Align the size to 128k
>>   unit-test: The files changed.
>>   unit-test: Add testcase for pxb
>>   unit-test: Add the binary file and clear diff.h
>>
>>  hw/acpi/aml-build.c| 285 +++
>>  hw/arm/virt-acpi-build.c   |  31 ++-
>>  hw/arm/virt.c  |   7 +-
>>  hw/i386/acpi-build.c   | 293 
>>  hw/i386/pc.c   |  18 +-
>>  hw/nvram/fw_cfg.c  |  23 ++
>>  hw/pci-host/gpex-acpi.c| 166 +++
>>  include/hw/acpi/aml-build.h|  22 ++
>>  include/hw/arm/virt.h  |   1 +
>>  include/hw/nvram/fw_cfg.h  |   9 +
>>  include/hw/pci-host/gpex.h |   1 +
>>  tests/data/acpi/virt/DSDT.pxb  | Bin 0 -> 7802 bytes
>>  tests/qtest/bios-tables-test.c |  58 +++-
>>  13 files changed, 545 insertions(+), 369 deletions(-)
>>  create mode 100644 tests/data/acpi/virt/DSDT.pxb
>>
>> -- 
>> 2.28.0
> 
> .
>

Re: [PATCH for-6.0] hw/ppc: Do not re-read the clock on pre_save if doing savevm

2020-12-02 Thread David Gibson

On Wed, Dec 02, 2020 at 06:28:26PM +0100, Greg Kurz wrote:
> A guest with enough RAM, eg. 128G, is likely to detect savevm downtime
> and to complain about stalled CPUs. This happens because we re-read
> the timebase just before migrating it and we thus don't account for
> all the time between VM stop and pre-save.
> 
> A very similar situation was already addressed for live migration of
> paused guests (commit d14f33976282). Extend the logic to do the same
> with savevm.
> 
> Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1893787
> Signed-off-by: Greg Kurz 

Applied to ppc-for-6.0, thanks.

> ---
>  hw/ppc/ppc.c |5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
> index 1b9827207676..5cbbff1f8d0c 100644
> --- a/hw/ppc/ppc.c
> +++ b/hw/ppc/ppc.c
> @@ -1027,7 +1027,8 @@ static void timebase_save(PPCTimebase *tb)
>   */
>  tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset;
>  
> -tb->runstate_paused = runstate_check(RUN_STATE_PAUSED);
> +tb->runstate_paused =
> +runstate_check(RUN_STATE_PAUSED) || 
> runstate_check(RUN_STATE_SAVE_VM);
>  }
>  
>  static void timebase_load(PPCTimebase *tb)
> @@ -1088,7 +1089,7 @@ static int timebase_pre_save(void *opaque)
>  {
>  PPCTimebase *tb = opaque;
>  
> -/* guest_timebase won't be overridden in case of paused guest */
> +/* guest_timebase won't be overridden in case of paused guest or savevm 
> */
>  if (!tb->runstate_paused) {
>  timebase_save(tb);
>  }
> 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [PATCH] xive: Add more trace events

2020-12-02 Thread David Gibson

On Fri, Nov 27, 2020 at 02:36:54PM +0100, Cédric Le Goater wrote:
> These are useful to understand IRQ requests from the OS.
> 
> Signed-off-by: Cédric Le Goater 
> ---
> 
> David, Feel free to merge with the previous.

Done, thanks.

> 
>  hw/intc/spapr_xive.c | 6 ++
>  hw/intc/spapr_xive_kvm.c | 5 +
>  hw/intc/trace-events | 7 +++
>  3 files changed, 18 insertions(+)
> 
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index 644cc85cbdc9..db6d0e7a3a12 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -563,6 +563,8 @@ static int spapr_xive_claim_irq(SpaprInterruptController 
> *intc, int lisn,
>  
>  assert(lisn < xive->nr_irqs);
>  
> +trace_spapr_xive_claim_irq(lisn, lsi);
> +
>  if (xive_eas_is_valid(>eat[lisn])) {
>  error_setg(errp, "IRQ %d is not free", lisn);
>  return -EBUSY;
> @@ -588,6 +590,8 @@ static void spapr_xive_free_irq(SpaprInterruptController 
> *intc, int lisn)
>  SpaprXive *xive = SPAPR_XIVE(intc);
>  assert(lisn < xive->nr_irqs);
>  
> +trace_spapr_xive_free_irq(lisn);
> +
>  xive->eat[lisn].w &= cpu_to_be64(~EAS_VALID);
>  }
>  
> @@ -654,6 +658,8 @@ static void spapr_xive_set_irq(SpaprInterruptController 
> *intc, int irq, int val)
>  {
>  SpaprXive *xive = SPAPR_XIVE(intc);
>  
> +trace_spapr_xive_set_irq(irq, val);
> +
>  if (spapr_xive_in_kernel(xive)) {
>  kvmppc_xive_source_set_irq(>source, irq, val);
>  } else {
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index e8667ce5f621..acc8c3650c4c 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -20,6 +20,7 @@
>  #include "hw/ppc/spapr_xive.h"
>  #include "hw/ppc/xive.h"
>  #include "kvm_ppc.h"
> +#include "trace.h"
>  
>  #include 
>  
> @@ -163,6 +164,8 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>  
>  vcpu_id = kvm_arch_vcpu_id(tctx->cs);
>  
> +trace_kvm_xive_cpu_connect(vcpu_id);
> +
>  ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
>vcpu_id, 0);
>  if (ret < 0) {
> @@ -308,6 +311,8 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, 
> uint32_t offset,
>  return xive_esb_rw(xsrc, srcno, offset, data, 1);
>  }
>  
> +trace_kvm_xive_source_reset(srcno);
> +
>  /*
>   * Special Load EOI handling for LSI sources. Q bit is never set
>   * and the interrupt should be re-triggered if the level is still
> diff --git a/hw/intc/trace-events b/hw/intc/trace-events
> index 77addc100f72..8ed397a0d587 100644
> --- a/hw/intc/trace-events
> +++ b/hw/intc/trace-events
> @@ -205,6 +205,9 @@ bcm2835_ic_set_gpu_irq(int irq, int level) "GPU irq #%d 
> level %d"
>  bcm2835_ic_set_cpu_irq(int irq, int level) "CPU irq #%d level %d"
>  
>  # spapr_xive.c
> +spapr_xive_claim_irq(uint32_t lisn, bool lsi) "lisn=0x%x lsi=%d"
> +spapr_xive_free_irq(uint32_t lisn) "lisn=0x%x"
> +spapr_xive_set_irq(uint32_t lisn, uint32_t val) "lisn=0x%x val=%d"
>  spapr_xive_get_source_info(uint64_t flags, uint64_t lisn) "flags=0x%"PRIx64" 
> lisn=0x%"PRIx64
>  spapr_xive_set_source_config(uint64_t flags, uint64_t lisn, uint64_t target, 
> uint64_t priority, uint64_t eisn) "flags=0x%"PRIx64" lisn=0x%"PRIx64" 
> target=0x%"PRIx64" priority=0x%"PRIx64" eisn=0x%"PRIx64
>  spapr_xive_get_source_config(uint64_t flags, uint64_t lisn) 
> "flags=0x%"PRIx64" lisn=0x%"PRIx64
> @@ -217,6 +220,10 @@ spapr_xive_esb(uint64_t flags, uint64_t lisn, uint64_t 
> offset, uint64_t data) "f
>  spapr_xive_sync(uint64_t flags, uint64_t lisn) "flags=0x%"PRIx64" 
> lisn=0x%"PRIx64
>  spapr_xive_reset(uint64_t flags) "flags=0x%"PRIx64
>  
> +# spapr_xive_kvm.c
> +kvm_xive_cpu_connect(uint32_t id) "connect CPU%d to KVM device"
> +kvm_xive_source_reset(uint32_t srcno) "IRQ 0x%x"
> +
>  # xive.c
>  xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, 
> uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x 
> CPPR=0x%02x NSR=0x%02x ACK"
>  xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, 
> uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x 
> CPPR=0x%02x NSR=0x%02x raise !"

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [PATCH v3 05/10] hvf: arm: Mark CPU as dirty on reset

2020-12-02 Thread Roman Bolshakov

On Wed, Dec 02, 2020 at 08:04:03PM +0100, Alexander Graf wrote:
> When clearing internal state of a CPU, we should also make sure that HVF
> knows about it and can push the new values down to vcpu state.
> 

I'm sorry if I'm asking something dumb. But isn't
cpu_synchronize_all_post_reset() is supposed to push QEMU state into HVF
(or any other accel) after reset?

For x86 it used to work:

  static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
run_on_cpu_data arg)
  {
  hvf_put_registers(cpu);   

 cpu->vcpu_dirty = false;
  }

Thanks,
Roman

> Make sure that with HVF enabled, we tell it that it should synchronize
> CPU state on next entry after a reset.
> 
> This fixes PSCI handling, because now newly pushed state such as X0 and
> PC on remote CPU enablement also get pushed into HVF.
> 
> Signed-off-by: Alexander Graf 
> ---
>  target/arm/arm-powerctl.c | 1 +
>  target/arm/cpu.c  | 2 ++
>  2 files changed, 3 insertions(+)
> 
> diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
> index b75f813b40..a49a5b32e6 100644
> --- a/target/arm/arm-powerctl.c
> +++ b/target/arm/arm-powerctl.c
> @@ -15,6 +15,7 @@
>  #include "arm-powerctl.h"
>  #include "qemu/log.h"
>  #include "qemu/main-loop.h"
> +#include "sysemu/hw_accel.h"
>  
>  #ifndef DEBUG_ARM_POWERCTL
>  #define DEBUG_ARM_POWERCTL 0
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index db6f7c34ed..9a501ea4bd 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -411,6 +411,8 @@ static void arm_cpu_reset(DeviceState *dev)
>  #ifndef CONFIG_USER_ONLY
>  if (kvm_enabled()) {
>  kvm_arm_reset_vcpu(cpu);
> +} else if (hvf_enabled()) {
> +s->vcpu_dirty = true;
>  }
>  #endif
>  
> -- 
> 2.24.3 (Apple Git-128)
>

Re: [PATCH v3 04/10] arm: Set PSCI to 0.2 for HVF

2020-12-02 Thread Roman Bolshakov

On Wed, Dec 02, 2020 at 08:04:02PM +0100, Alexander Graf wrote:
> In Hypervisor.framework, we just pass PSCI calls straight on to the QEMU 
> emulation
> of it. That means, if TCG is compatible with PSCI 0.2, so are we. Let's 
> transpose
> that fact in code too.
> 
> Signed-off-by: Alexander Graf 
> ---
>  target/arm/cpu.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index 07492e9f9a..db6f7c34ed 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -1062,6 +1062,10 @@ static void arm_cpu_initfn(Object *obj)
>  if (tcg_enabled()) {
>  cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
>  }
> +
> +if (hvf_enabled()) {
> +cpu->psci_version = 2; /* HVF uses TCG's PSCI */
> +}

If HVF is piggybacking on TCG and they're both the same versions would
it be better if:

> - if (tcg_enabled()) {
> -cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
> + if (tcg_enabled() || hvf_enabled()) {
> +cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
>  }

That'd avoid a case where they get out of sync.

Thanks,
Roman

>  }
>  
>  static Property arm_cpu_gt_cntfrq_property =
> -- 
> 2.24.3 (Apple Git-128)
>

Re: [PATCH v3 03/10] hvf: Introduce hvf vcpu struct

2020-12-02 Thread Roman Bolshakov

On Wed, Dec 02, 2020 at 08:04:01PM +0100, Alexander Graf wrote:
> We will need more than a single field for hvf going forward. To keep
> the global vcpu struct uncluttered, let's allocate a special hvf vcpu
> struct, similar to how hax does it.
> 

Reviewed-by: Roman Bolshakov 
Tested-by: Roman Bolshakov 

Thanks,
Roman

Re: [PATCH v3 02/10] hvf: Move common code out

2020-12-02 Thread Roman Bolshakov

On Wed, Dec 02, 2020 at 08:04:00PM +0100, Alexander Graf wrote:
> Until now, Hypervisor.framework has only been available on x86_64 systems.
> With Apple Silicon shipping now, it extends its reach to aarch64. To
> prepare for support for multiple architectures, let's move common code out
> into its own accel directory.
> 
> [...]
>
> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> new file mode 100644
> index 00..de9bad23a8
> --- /dev/null
> +++ b/include/sysemu/hvf_int.h
> @@ -0,0 +1,69 @@
> +/*
> + * QEMU Hypervisor.framework (HVF) support > + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +/* header to be included in HVF-specific code */
> +
> +#ifndef HVF_INT_H
> +#define HVF_INT_H
> +
> +#include 
> +

>From here

> +#define HVF_MAX_VCPU 0x10
> +
> +extern struct hvf_state hvf_global;
> +
> +struct hvf_vm {
> +int id;
> +struct hvf_vcpu_state *vcpus[HVF_MAX_VCPU];
> +};
> +
> +struct hvf_state {
> +uint32_t version;
> +struct hvf_vm *vm;
> +uint64_t mem_quota;
> +};
> +

and down to here, it doesn't seem to be a common code, rather unused
definitions. They can be safely dropped:

diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
index e0edffd077..e31938e5ff 100644
--- a/target/i386/hvf/hvf-i386.h
+++ b/target/i386/hvf/hvf-i386.h
@@ -21,21 +21,6 @@
 #include "cpu.h"
 #include "x86.h"

-#define HVF_MAX_VCPU 0x10
-
-extern struct hvf_state hvf_global;
-
-struct hvf_vm {
-int id;
-struct hvf_vcpu_state *vcpus[HVF_MAX_VCPU];
-};
-
-struct hvf_state {
-uint32_t version;
-struct hvf_vm *vm;
-uint64_t mem_quota;
-};
-
 /* hvf_slot flags */
 #define HVF_SLOT_LOG (1 << 0)

@@ -75,7 +60,6 @@ hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);

 /* Host specific functions */
 int hvf_inject_interrupt(CPUArchState *env, int vector);
-int hvf_vcpu_run(struct hvf_vcpu_state *vcpu);
 #endif

 #endif

Re: [PATCH for-6.0 v2 1/3] spapr: Improve naming of some vCPU id related items

2020-12-02 Thread David Gibson

On Mon, Nov 30, 2020 at 05:52:56PM +0100, Greg Kurz wrote:
> The machine tells the IC backend the number of vCPU ids it will be
> exposed to, in order to:
> - fill the "ibm,interrupt-server-ranges" property in the DT (XICS)
> - size the VP block used by the in-kernel chip (XICS-on-XIVE, XIVE)
> 
> The current "nr_servers" and "spapr_max_server_number" naming can
> mislead people info thinking it is about a quantity of CPUs. Make
> it clear this is all about vCPU ids.
> 
> Signed-off-by: Greg Kurz 

I know it seems very finicky, but can you please
s/max_vcpu_ids/max_vcpu_id/g

At least to be "max_vcpu_ids" has some of the same confusion as the
existing code - it reads like the maximum *number* of IDs, rather than
the maximum *value* of IDs.

> ---
>  include/hw/ppc/spapr.h  |  2 +-
>  include/hw/ppc/spapr_irq.h  |  8 
>  include/hw/ppc/spapr_xive.h |  2 +-
>  include/hw/ppc/xics_spapr.h |  2 +-
>  hw/intc/spapr_xive.c|  8 
>  hw/intc/spapr_xive_kvm.c|  4 ++--
>  hw/intc/xics_kvm.c  |  4 ++--
>  hw/intc/xics_spapr.c|  8 
>  hw/ppc/spapr.c  |  8 
>  hw/ppc/spapr_irq.c  | 18 +-
>  10 files changed, 32 insertions(+), 32 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index b7ced9faebf5..dc99d45e2852 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -849,7 +849,7 @@ int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
>  int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp);
>  void spapr_clear_pending_events(SpaprMachineState *spapr);
>  void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr);
> -int spapr_max_server_number(SpaprMachineState *spapr);
> +int spapr_max_vcpu_ids(SpaprMachineState *spapr);
>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
>uint64_t pte0, uint64_t pte1);
>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> index c22a72c9e270..2e53fc9e6cbb 100644
> --- a/include/hw/ppc/spapr_irq.h
> +++ b/include/hw/ppc/spapr_irq.h
> @@ -43,7 +43,7 @@ DECLARE_CLASS_CHECKERS(SpaprInterruptControllerClass, 
> SPAPR_INTC,
>  struct SpaprInterruptControllerClass {
>  InterfaceClass parent;
>  
> -int (*activate)(SpaprInterruptController *intc, uint32_t nr_servers,
> +int (*activate)(SpaprInterruptController *intc, uint32_t max_vcpu_ids,
>  Error **errp);
>  void (*deactivate)(SpaprInterruptController *intc);
>  
> @@ -62,7 +62,7 @@ struct SpaprInterruptControllerClass {
>  /* These methods should only be called on the active intc */
>  void (*set_irq)(SpaprInterruptController *intc, int irq, int val);
>  void (*print_info)(SpaprInterruptController *intc, Monitor *mon);
> -void (*dt)(SpaprInterruptController *intc, uint32_t nr_servers,
> +void (*dt)(SpaprInterruptController *intc, uint32_t max_vcpu_ids,
> void *fdt, uint32_t phandle);
>  int (*post_load)(SpaprInterruptController *intc, int version_id);
>  };
> @@ -74,7 +74,7 @@ int spapr_irq_cpu_intc_create(struct SpaprMachineState 
> *spapr,
>  void spapr_irq_cpu_intc_reset(struct SpaprMachineState *spapr, PowerPCCPU 
> *cpu);
>  void spapr_irq_cpu_intc_destroy(struct SpaprMachineState *spapr, PowerPCCPU 
> *cpu);
>  void spapr_irq_print_info(struct SpaprMachineState *spapr, Monitor *mon);
> -void spapr_irq_dt(struct SpaprMachineState *spapr, uint32_t nr_servers,
> +void spapr_irq_dt(struct SpaprMachineState *spapr, uint32_t max_vcpu_ids,
>void *fdt, uint32_t phandle);
>  
>  uint32_t spapr_irq_nr_msis(struct SpaprMachineState *spapr);
> @@ -105,7 +105,7 @@ typedef int 
> (*SpaprInterruptControllerInitKvm)(SpaprInterruptController *,
>  
>  int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
> SpaprInterruptController *intc,
> -   uint32_t nr_servers,
> +   uint32_t max_vcpu_ids,
> Error **errp);
>  
>  /*
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 26c8d90d7196..643129b13536 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -79,7 +79,7 @@ int spapr_xive_end_to_target(uint8_t end_blk, uint32_t 
> end_idx,
>  /*
>   * KVM XIVE device helpers
>   */
> -int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
> +int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t 
> max_vcpu_ids,
>  Error **errp);
>  void kvmppc_xive_disconnect(SpaprInterruptController *intc);
>  void kvmppc_xive_reset(SpaprXive *xive, Error **errp);
> diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h
> index de752c0d2c7e..5c0e9430a964 100644
> --- a/include/hw/ppc/xics_spapr.h
> +++ b/include/hw/ppc/xics_spapr.h
> @@ -35,7 +35,7 @@
>

Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Peter Collingbourne

On Wed, Dec 2, 2020 at 3:26 PM Frank Yang  wrote:
>
>
>
> On Wed, Dec 2, 2020 at 2:57 PM Alexander Graf  wrote:
>>
>>
>> On 02.12.20 23:46, Frank Yang wrote:
>>
>>
>>
>> On Wed, Dec 2, 2020 at 2:28 PM Alexander Graf  wrote:
>>>
>>>
>>> On 02.12.20 23:19, Frank Yang wrote:
>>>
>>>
>>> From downstream: 
>>> https://android-review.googlesource.com/c/platform/external/qemu/+/1515002
>>>
>>> Based on v3 of Alexander Graf's patches
>>>
>>> https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de
>>>
>>> We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
>>> can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
>>> require effort to do that accurately---with individual values, even if
>>> they are a tiny bit off it can result in a lockup due to inconsistent
>>> time differences between vCPUs. So just use a global approximate value
>>> for now.
>>>
>>> Not tested in upstream yet, but Android emulator snapshots work without
>>> time warp now.
>>>
>>> Signed-off-by: Lingfeng Yang 
>>>
>>>
>>> If we just always make CNTV start at the same 0 as QEMU_CLOCK_VIRTUAL, we 
>>> should be able to just recover the offset after migration by looking at 
>>> QEMU_CLOCK_VIRTUAL to set CNTVOFF, right?
>>>
>>> That would end up much easier than this patch I hope.
>>>
>>>
>>
>> The virtual clock interfaces/implementations in QEMU seem complex to me 
>> relative to the fix needed here and they don't seem to compute ticks with 
>> mach_absolute_time() (which in this case we want since we want to compute in 
>> timer ticks instead of having to mess with ns / cycle conversions). I do 
>> agree this patch does seem more complicated on the surface though versus 
>> "just" setting cntvoff directly to some value. Maybe we should simplify the 
>> QEMU_CLOCK_VIRTUAL implementation first to maintain CNTVOFF_EL2/CNTV using 
>> mach_absolute_time() first?
>>
>>
>> So QEMU_CLOCK_VIRTUAL calls cpu_get_clock() which just adds an offset to 
>> gettimeofday(). This offset is already part of the live migration stream[1]. 
>> So if you just configure CNTVOFF_EL2 based on QEMU_CLOCK_VIRTUAL adjusted by 
>> the clock frequency on vcpu init, you should have everything you need. You 
>> can do that on every CPU init even, as the virtual clock will just be 0 on 
>> start.
>>
>> The only thing we need to change then is to move the WFI from a direct call 
>> to mach_absolute_time() to also check the virtual clock instead. I would 
>> hope that gettimeofday() calls mach_absolute_time() in the background too to 
>> speed it up.
>>
> Sounds plausible, but I noticed that we also have cpu_ticks_offset as part of 
> the migration stream, and I prefer mach_absolute_time() (ticks) instead of 
> seconds in WFI as well as it makes the calculation more accurate (ticks 
> against ticks instead of conversion between ns and ticks).
>
> Should we look at integrating this with cpu_ticks_offset instead?

Seems plausible to me. As far as I can tell the intent is that
cpu_get_host_ticks() does not increment while asleep (e.g. on x86 it
uses RDTSC which as far as I know does not increment while asleep), so
we could provide an implementation on Mac that calls
mach_absolute_time().

Peter

Re: [PATCH v3 01/10] hvf: Add hypervisor entitlement to output binaries

2020-12-02 Thread Roman Bolshakov

On Wed, Dec 02, 2020 at 08:03:59PM +0100, Alexander Graf wrote:
> In macOS 11, QEMU only gets access to Hypervisor.framework if it has the
> respective entitlement. Add an entitlement template and automatically self
> sign and apply the entitlement in the build.
> 
> Signed-off-by: Alexander Graf 
> 
> ---
> 
> v1 -> v2:
> 
>   - Make safe to ctrl-C
> ---
>  accel/hvf/entitlements.plist |  8 
>  meson.build  | 30 ++
>  scripts/entitlement.sh   | 13 +
>  3 files changed, 47 insertions(+), 4 deletions(-)
>  create mode 100644 accel/hvf/entitlements.plist
>  create mode 100755 scripts/entitlement.sh
> 
> diff --git a/accel/hvf/entitlements.plist b/accel/hvf/entitlements.plist
> new file mode 100644
> index 00..154f3308ef
> --- /dev/null
> +++ b/accel/hvf/entitlements.plist
> @@ -0,0 +1,8 @@
> +
> + "http://www.apple.com/DTDs/PropertyList-1.0.dtd;>
> +
> +
> +com.apple.security.hypervisor
> +
> +
> +
> diff --git a/meson.build b/meson.build
> index 5062407c70..2a7ff5560c 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1844,9 +1844,14 @@ foreach target : target_dirs
>  }]
>endif
>foreach exe: execs
> -emulators += {exe['name']:
> - executable(exe['name'], exe['sources'],
> -   install: true,
> +exe_name = exe['name']
> +exe_sign = 'CONFIG_HVF' in config_target
> +if exe_sign
> +  exe_name += '-unsigned'
> +endif
> +
> +emulator = executable(exe_name, exe['sources'],
> +   install: not exe_sign,
> c_args: c_args,
> dependencies: arch_deps + deps + exe['dependencies'],
> objects: lib.extract_all_objects(recursive: true),
> @@ -1854,7 +1859,24 @@ foreach target : target_dirs
> link_depends: [block_syms, qemu_syms] + 
> exe.get('link_depends', []),
> link_args: link_args,
> gui_app: exe['gui'])
> -}
> +
> +if exe_sign
> +  exe_full = meson.current_build_dir() / exe['name']

It's defined but not used.

> +  emulators += {exe['name'] : custom_target(exe['name'],
> +   install: true,
> +   install_dir: get_option('bindir'),
> +   depends: emulator,
> +   output: exe['name'],
> +   command: [
> + meson.current_source_dir() / 'scripts/entitlement.sh',
> + meson.current_build_dir() / exe['name'] + '-unsigned',

exe_name might be used instead of:
exe['name'] + '-unsigned'

Thanks,
Roman

> + meson.current_build_dir() / exe['name'],
> + meson.current_source_dir() / 
> 'accel/hvf/entitlements.plist'
> +   ])
> +  }
> +else
> +  emulators += {exe['name']: emulator}
> +endif
>  
>  if 'CONFIG_TRACE_SYSTEMTAP' in config_host
>foreach stp: [
> diff --git a/scripts/entitlement.sh b/scripts/entitlement.sh
> new file mode 100755
> index 00..c540fa6435
> --- /dev/null
> +++ b/scripts/entitlement.sh
> @@ -0,0 +1,13 @@
> +#!/bin/sh -e
> +#
> +# Helper script for the build process to apply entitlements
> +
> +SRC="$1"
> +DST="$2"
> +ENTITLEMENT="$3"
> +
> +trap 'rm "$DST.tmp"' exit
> +cp -af "$SRC" "$DST.tmp"
> +codesign --entitlements "$ENTITLEMENT" --force -s - "$DST.tmp"
> +mv "$DST.tmp" "$DST"
> +trap '' exit
> -- 
> 2.24.3 (Apple Git-128)
>

Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Peter Collingbourne

On Wed, Dec 2, 2020 at 2:57 PM Alexander Graf  wrote:
>
>
> On 02.12.20 23:46, Frank Yang wrote:
>
>
>
> On Wed, Dec 2, 2020 at 2:28 PM Alexander Graf  wrote:
>>
>>
>> On 02.12.20 23:19, Frank Yang wrote:
>>
>>
>> From downstream: 
>> https://android-review.googlesource.com/c/platform/external/qemu/+/1515002
>>
>> Based on v3 of Alexander Graf's patches
>>
>> https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de
>>
>> We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
>> can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
>> require effort to do that accurately---with individual values, even if
>> they are a tiny bit off it can result in a lockup due to inconsistent
>> time differences between vCPUs. So just use a global approximate value
>> for now.
>>
>> Not tested in upstream yet, but Android emulator snapshots work without
>> time warp now.
>>
>> Signed-off-by: Lingfeng Yang 
>>
>>
>> If we just always make CNTV start at the same 0 as QEMU_CLOCK_VIRTUAL, we 
>> should be able to just recover the offset after migration by looking at 
>> QEMU_CLOCK_VIRTUAL to set CNTVOFF, right?
>>
>> That would end up much easier than this patch I hope.
>>
>>
>
> The virtual clock interfaces/implementations in QEMU seem complex to me 
> relative to the fix needed here and they don't seem to compute ticks with 
> mach_absolute_time() (which in this case we want since we want to compute in 
> timer ticks instead of having to mess with ns / cycle conversions). I do 
> agree this patch does seem more complicated on the surface though versus 
> "just" setting cntvoff directly to some value. Maybe we should simplify the 
> QEMU_CLOCK_VIRTUAL implementation first to maintain CNTVOFF_EL2/CNTV using 
> mach_absolute_time() first?
>
>
> So QEMU_CLOCK_VIRTUAL calls cpu_get_clock() which just adds an offset to 
> gettimeofday(). This offset is already part of the live migration stream[1]. 
> So if you just configure CNTVOFF_EL2 based on QEMU_CLOCK_VIRTUAL adjusted by 
> the clock frequency on vcpu init, you should have everything you need. You 
> can do that on every CPU init even, as the virtual clock will just be 0 on 
> start.
>
> The only thing we need to change then is to move the WFI from a direct call 
> to mach_absolute_time() to also check the virtual clock instead. I would hope 
> that gettimeofday() calls mach_absolute_time() in the background too to speed 
> it up.

I'm not sure that something based on gettimeofday() (or
clock_gettime(CLOCK_MONOTONIC) which it looks like cpu_get_clock() can
also call) will work. It will include time spent asleep so it won't
correspond to mach_absolute_time() aka guest CNTVCT_EL0.

Peter

Re: [PATCH v1 4/9] vfio: Support for RamDiscardMgr in the !vIOMMU case

2020-12-02 Thread Alex Williamson

On Thu, 19 Nov 2020 16:39:13 +0100
David Hildenbrand  wrote:

> Implement support for RamDiscardMgr, to prepare for virtio-mem
> support. Instead of mapping the whole memory section, we only map
> "populated" parts and update the mapping when notified about
> discarding/population of memory via the RamDiscardListener. Similarly, when
> syncing the dirty bitmaps, sync only the actually mapped (populated) parts
> by replaying via the notifier.
> 
> Small mapping granularity is problematic for vfio, because we might run out
> of mappings. Warn to at least make users aware that there is such a
> limitation and that we are dealing with a setup issue e.g., of
> virtio-mem devices.
> 
> Using virtio-mem with vfio is still blocked via
> ram_block_discard_disable()/ram_block_discard_require() after this patch.
> 
> Cc: Paolo Bonzini 
> Cc: "Michael S. Tsirkin" 
> Cc: Alex Williamson 
> Cc: Dr. David Alan Gilbert 
> Cc: Igor Mammedov 
> Cc: Pankaj Gupta 
> Cc: Peter Xu 
> Cc: Auger Eric 
> Cc: Wei Yang 
> Cc: teawater 
> Cc: Marek Kedzierski 
> Signed-off-by: David Hildenbrand 
> ---
>  hw/vfio/common.c  | 233 ++
>  include/hw/vfio/vfio-common.h |  12 ++
>  2 files changed, 245 insertions(+)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index c1fdbf17f2..d52e7356cb 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
...
> +static void vfio_register_ram_discard_notifier(VFIOContainer *container,
> +   MemoryRegionSection *section)
> +{
> +RamDiscardMgr *rdm = memory_region_get_ram_discard_mgr(section->mr);
> +RamDiscardMgrClass *rdmc = RAM_DISCARD_MGR_GET_CLASS(rdm);
> +MachineState *ms = MACHINE(qdev_get_machine());
> +uint64_t suggested_granularity;
> +VFIORamDiscardListener *vrdl;
> +int ret;
> +
> +vrdl = g_new0(VFIORamDiscardListener, 1);
> +vrdl->container = container;
> +vrdl->mr = section->mr;
> +vrdl->offset_within_region = section->offset_within_region;
> +vrdl->offset_within_address_space = section->offset_within_address_space;
> +vrdl->size = int128_get64(section->size);
> +vrdl->granularity = rdmc->get_min_granularity(rdm, section->mr);
> +
> +/* Ignore some corner cases not relevant in practice. */
> +g_assert(QEMU_IS_ALIGNED(vrdl->offset_within_region, TARGET_PAGE_SIZE));
> +g_assert(QEMU_IS_ALIGNED(vrdl->offset_within_address_space,
> + TARGET_PAGE_SIZE));
> +g_assert(QEMU_IS_ALIGNED(vrdl->size, TARGET_PAGE_SIZE));
> +
> +/*
> + * We assume initial RAM never has a RamDiscardMgr and that all memory
> + * to eventually get hotplugged later could be coordinated via a
> + * RamDiscardMgr ("worst case").
> + *
> + * We assume the Linux kernel is configured ("dma_entry_limit") for the
> + * maximum of 65535 mappings and that we can consume roughly half of that


s/maximum/default/

Deciding we should only use half of it seems arbitrary.


> + * for this purpose.
> + *
> + * In reality, we might also have RAM without a RamDiscardMgr in our 
> device
> + * memory region and might be able to consume more mappings.
> + */
> +suggested_granularity = pow2ceil((ms->maxram_size - ms->ram_size) / 
> 32768);
> +suggested_granularity = MAX(suggested_granularity, 1 * MiB);
> +if (vrdl->granularity < suggested_granularity) {
> +warn_report("%s: eventually problematic mapping granularity (%" 
> PRId64
> +" MiB) with coordinated discards (e.g., 'block-size' in"
> +" virtio-mem). Suggested minimum granularity: %" PRId64
> +" MiB", __func__, vrdl->granularity / MiB,
> +suggested_granularity / MiB);
> +}


Starting w/ kernel 5.10 we have a way to get the instantaneous count of
available DMA mappings, so we could avoid assuming 64k when that's
available (see ex. s390_pci_update_dma_avail()).  Thanks,

Alex

Re: [PATCH v1 1/9] memory: Introduce RamDiscardMgr for RAM memory regions

2020-12-02 Thread Alex Williamson

On Thu, 19 Nov 2020 16:39:10 +0100
David Hildenbrand  wrote:

> We have some special RAM memory regions (managed by virtio-mem), whereby
> the guest agreed to only use selected memory ranges. "unused" parts are
> discarded so they won't consume memory - to logically unplug these memory
> ranges. Before the VM is allowed to use such logically unplugged memory
> again, coordination with the hypervisor is required.
> 
> This results in "sparse" mmaps/RAMBlocks/memory regions, whereby only
> coordinated parts are valid to be used/accessed by the VM.
> 
> In most cases, we don't care about that - e.g., in KVM, we simply have a
> single KVM memory slot. However, in case of vfio, registering the
> whole region with the kernel results in all pages getting pinned, and
> therefore an unexpected high memory consumption - discarding of RAM in
> that context is broken.
> 
> Let's introduce a way to coordinate discarding/populating memory within a
> RAM memory region with such special consumers of RAM memory regions: they
> can register as listeners and get updates on memory getting discarded and
> populated. Using this machinery, vfio will be able to map only the
> currently populated parts, resulting in discarded parts not getting pinned
> and not consuming memory.
> 
> A RamDiscardMgr has to be set for a memory region before it is getting
> mapped, and cannot change while the memory region is mapped.
> 
> Note: At some point, we might want to let RAMBlock users (esp. vfio used
> for nvme://) consume this interface as well. We'll need RAMBlock notifier
> calls when a RAMBlock is getting mapped/unmapped (via the corresponding
> memory region), so we can properly register a listener there as well.
> 
> Cc: Paolo Bonzini 
> Cc: "Michael S. Tsirkin" 
> Cc: Alex Williamson 
> Cc: Dr. David Alan Gilbert 
> Cc: Igor Mammedov 
> Cc: Pankaj Gupta 
> Cc: Peter Xu 
> Cc: Auger Eric 
> Cc: Wei Yang 
> Cc: teawater 
> Cc: Marek Kedzierski 
> Signed-off-by: David Hildenbrand 
> ---
>  include/exec/memory.h | 225 ++
>  softmmu/memory.c  |  22 +
>  2 files changed, 247 insertions(+)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 0f3e6bcd5e..468cbb53a4 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
...
> @@ -425,6 +501,120 @@ struct IOMMUMemoryRegionClass {
>   Error **errp);
>  };
>  
> +/*
> + * RamDiscardMgrClass:
> + *
> + * A #RamDiscardMgr coordinates which parts of specific RAM #MemoryRegion
> + * regions are currently populated to be used/accessed by the VM, notifying
> + * after parts were discarded (freeing up memory) and before parts will be
> + * populated (consuming memory), to be used/acessed by the VM.
> + *
> + * A #RamDiscardMgr can only be set for a RAM #MemoryRegion while the
> + * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion 
> is
> + * mapped.
> + *
> + * The #RamDiscardMgr is intended to be used by technologies that are
> + * incompatible with discarding of RAM (e.g., VFIO, which may pin all
> + * memory inside a #MemoryRegion), and require proper coordination to only
> + * map the currently populated parts, to hinder parts that are expected to
> + * remain discarded from silently getting populated and consuming memory.
> + * Technologies that support discarding of RAM don't have to bother and can
> + * simply map the whole #MemoryRegion.
> + *
> + * An example #RamDiscardMgr is virtio-mem, which logically (un)plugs
> + * memory within an assigned RAM #MemoryRegion, coordinated with the VM.
> + * Logically unplugging memory consists of discarding RAM. The VM agreed to 
> not
> + * access unplugged (discarded) memory - especially via DMA. virtio-mem will
> + * properly coordinate with listeners before memory is plugged (populated),
> + * and after memory is unplugged (discarded).
> + *
> + * Listeners are called in multiples of the minimum granularity and changes 
> are
> + * aligned to the minimum granularity within the #MemoryRegion. Listeners 
> have
> + * to prepare for memory becomming discarded in a different granularity than 
> it
> + * was populated and the other way around.
> + */
> +struct RamDiscardMgrClass {
> +/* private */
> +InterfaceClass parent_class;
> +
> +/* public */
> +
> +/**
> + * @get_min_granularity:
> + *
> + * Get the minimum granularity in which listeners will get notified
> + * about changes within the #MemoryRegion via the #RamDiscardMgr.
> + *
> + * @rdm: the #RamDiscardMgr
> + * @mr: the #MemoryRegion
> + *
> + * Returns the minimum granularity.
> + */
> +uint64_t (*get_min_granularity)(const RamDiscardMgr *rdm,
> +const MemoryRegion *mr);
> +
> +/**
> + * @is_populated:
> + *
> + * Check whether the given range within the #MemoryRegion is completely
> + * populated (i.e., no parts are currently discarded). There

Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Frank Yang

On Wed, Dec 2, 2020 at 2:57 PM Alexander Graf  wrote:

>
> On 02.12.20 23:46, Frank Yang wrote:
>
>
>
> On Wed, Dec 2, 2020 at 2:28 PM Alexander Graf  wrote:
>
>>
>> On 02.12.20 23:19, Frank Yang wrote:
>>
>>
>> From downstream:
>> https://android-review.googlesource.com/c/platform/external/qemu/+/1515002
>>
>> Based on v3 of Alexander Graf's patches
>>
>> https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de
>>
>> We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
>> can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
>> require effort to do that accurately---with individual values, even if
>> they are a tiny bit off it can result in a lockup due to inconsistent
>> time differences between vCPUs. So just use a global approximate value
>> for now.
>>
>> Not tested in upstream yet, but Android emulator snapshots work without
>> time warp now.
>>
>> Signed-off-by: Lingfeng Yang 
>>
>>
>> If we just always make CNTV start at the same 0 as QEMU_CLOCK_VIRTUAL, we
>> should be able to just recover the offset after migration by looking at
>> QEMU_CLOCK_VIRTUAL to set CNTVOFF, right?
>>
>> That would end up much easier than this patch I hope.
>>
>>
>>
> The virtual clock interfaces/implementations in QEMU seem complex to me
> relative to the fix needed here and they don't seem to compute ticks with
> mach_absolute_time() (which in this case we want since we want to compute
> in timer ticks instead of having to mess with ns / cycle conversions). I do
> agree this patch does seem more complicated on the surface though versus
> "just" setting cntvoff directly to some value. Maybe we should simplify the
> QEMU_CLOCK_VIRTUAL implementation first to maintain CNTVOFF_EL2/CNTV using
> mach_absolute_time() first?
>
>
> So QEMU_CLOCK_VIRTUAL calls cpu_get_clock() which just adds an offset to
> gettimeofday(). This offset is already part of the live migration
> stream[1]. So if you just configure CNTVOFF_EL2 based on QEMU_CLOCK_VIRTUAL
> adjusted by the clock frequency on vcpu init, you should have everything
> you need. You can do that on every CPU init even, as the virtual clock will
> just be 0 on start.
>
> The only thing we need to change then is to move the WFI from a direct
> call to mach_absolute_time() to also check the virtual clock instead. I
> would hope that gettimeofday() calls mach_absolute_time() in the background
> too to speed it up.
>
> Sounds plausible, but I noticed that we also have cpu_ticks_offset as part
of the migration stream, and I prefer mach_absolute_time() (ticks) instead
of seconds in WFI as well as it makes the calculation more accurate (ticks
against ticks instead of conversion between ns and ticks).

Should we look at integrating this with cpu_ticks_offset instead?



>
> Alex
>
>
> [1]
> https://git.qemu.org/?p=qemu.git;a=blob;f=softmmu/cpu-timers.c;h=1eb7c675c18bda7773d4a9c549f0157c6e978a83;hb=HEAD#l229
>

Re: [PATCH] hw/block: m25p80: Fix fast read for SST flashes

2020-12-02 Thread Alistair Francis

On Wed, Dec 2, 2020 at 3:09 PM Bin Meng  wrote:
>
> Hi Alistair,
>
> On Thu, Dec 3, 2020 at 3:52 AM Alistair Francis  wrote:
> >
> > On Sun, Nov 29, 2020 at 6:55 PM Bin Meng  wrote:
> > >
> > > From: Bin Meng 
> > >
> > > SST flashes require a dummy byte after the address bits.
> > >
> > > Signed-off-by: Bin Meng 
> >
> > I couldn't find a datasheet that says this... But the actual code
> > change looks fine, so:
> >
>
> Please find the SST25VF016B datasheet at
> http://ww1.microchip.com/downloads/en/devicedoc/s71271_04.pdf. The
> fast read sequence is on page 11.

Ah cool. I thought it would be somewhere, I just couldn't find it.

Alistair

>
> > Acked-by: Alistair Francis 
> >
>
> Thanks!
>
> Regards,
> Bin

Re: [PATCH] hw/block: m25p80: Fix fast read for SST flashes

2020-12-02 Thread Bin Meng

Hi Alistair,

On Thu, Dec 3, 2020 at 3:52 AM Alistair Francis  wrote:
>
> On Sun, Nov 29, 2020 at 6:55 PM Bin Meng  wrote:
> >
> > From: Bin Meng 
> >
> > SST flashes require a dummy byte after the address bits.
> >
> > Signed-off-by: Bin Meng 
>
> I couldn't find a datasheet that says this... But the actual code
> change looks fine, so:
>

Please find the SST25VF016B datasheet at
http://ww1.microchip.com/downloads/en/devicedoc/s71271_04.pdf. The
fast read sequence is on page 11.

> Acked-by: Alistair Francis 
>

Thanks!

Regards,
Bin

Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Alexander Graf



On 02.12.20 23:46, Frank Yang wrote:



On Wed, Dec 2, 2020 at 2:28 PM Alexander Graf > wrote:



On 02.12.20 23:19, Frank Yang wrote:


From downstream:
https://android-review.googlesource.com/c/platform/external/qemu/+/1515002


Based on v3 of Alexander Graf's patches

https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de


We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even
though we
can set separate CNTVOFF_EL2 values per vCPU, it just is not
worth the
require effort to do that accurately---with individual values,
even if
they are a tiny bit off it can result in a lockup due to inconsistent
time differences between vCPUs. So just use a global approximate
value
for now.

Not tested in upstream yet, but Android emulator snapshots work
without
time warp now.

Signed-off-by: Lingfeng Yang mailto:l...@google.com>>



If we just always make CNTV start at the same 0 as
QEMU_CLOCK_VIRTUAL, we should be able to just recover the offset
after migration by looking at QEMU_CLOCK_VIRTUAL to set CNTVOFF,
right?

That would end up much easier than this patch I hope.



The virtual clock interfaces/implementations in QEMU seem complex to 
me relative to the fix needed here and they don't seem to compute 
ticks with mach_absolute_time() (which in this case we want since we 
want to compute in timer ticks instead of having to mess with ns / 
cycle conversions). I do agree this patch does seem more complicated 
on the surface though versus "just" setting cntvoff directly to some 
value. Maybe we should simplify the QEMU_CLOCK_VIRTUAL implementation 
first to maintain CNTVOFF_EL2/CNTV using mach_absolute_time() first?



So QEMU_CLOCK_VIRTUAL calls cpu_get_clock() which just adds an offset to 
gettimeofday(). This offset is already part of the live migration 
stream[1]. So if you just configure CNTVOFF_EL2 based on 
QEMU_CLOCK_VIRTUAL adjusted by the clock frequency on vcpu init, you 
should have everything you need. You can do that on every CPU init even, 
as the virtual clock will just be 0 on start.


The only thing we need to change then is to move the WFI from a direct 
call to mach_absolute_time() to also check the virtual clock instead. I 
would hope that gettimeofday() calls mach_absolute_time() in the 
background too to speed it up.



Alex


[1] 
https://git.qemu.org/?p=qemu.git;a=blob;f=softmmu/cpu-timers.c;h=1eb7c675c18bda7773d4a9c549f0157c6e978a83;hb=HEAD#l229

Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Frank Yang

On Wed, Dec 2, 2020 at 2:28 PM Alexander Graf  wrote:

>
> On 02.12.20 23:19, Frank Yang wrote:
>
>
> From downstream:
> https://android-review.googlesource.com/c/platform/external/qemu/+/1515002
>
> Based on v3 of Alexander Graf's patches
>
> https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de
>
> We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
> can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
> require effort to do that accurately---with individual values, even if
> they are a tiny bit off it can result in a lockup due to inconsistent
> time differences between vCPUs. So just use a global approximate value
> for now.
>
> Not tested in upstream yet, but Android emulator snapshots work without
> time warp now.
>
> Signed-off-by: Lingfeng Yang 
>
>
> If we just always make CNTV start at the same 0 as QEMU_CLOCK_VIRTUAL, we
> should be able to just recover the offset after migration by looking at
> QEMU_CLOCK_VIRTUAL to set CNTVOFF, right?
>
> That would end up much easier than this patch I hope.
>
>
>
The virtual clock interfaces/implementations in QEMU seem complex to me
relative to the fix needed here and they don't seem to compute ticks with
mach_absolute_time() (which in this case we want since we want to compute
in timer ticks instead of having to mess with ns / cycle conversions). I do
agree this patch does seem more complicated on the surface though versus
"just" setting cntvoff directly to some value. Maybe we should simplify the
QEMU_CLOCK_VIRTUAL implementation first to maintain CNTVOFF_EL2/CNTV using
mach_absolute_time() first?

> Alex
>
>
>

Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Alexander Graf



On 02.12.20 23:19, Frank Yang wrote:


From downstream: 
https://android-review.googlesource.com/c/platform/external/qemu/+/1515002 



Based on v3 of Alexander Graf's patches

https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de 



We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
require effort to do that accurately---with individual values, even if
they are a tiny bit off it can result in a lockup due to inconsistent
time differences between vCPUs. So just use a global approximate value
for now.

Not tested in upstream yet, but Android emulator snapshots work without
time warp now.

Signed-off-by: Lingfeng Yang mailto:l...@google.com>>



If we just always make CNTV start at the same 0 as QEMU_CLOCK_VIRTUAL, 
we should be able to just recover the offset after migration by looking 
at QEMU_CLOCK_VIRTUAL to set CNTVOFF, right?


That would end up much easier than this patch I hope.


Alex

Re: [PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Frank Yang

We've gotten the Android Emulator snapshots working again on M1 and noticed
a time warp issue where the stopwatch app would, on a snapshot load, resume
including the time the emulator wasn't running. This seems to fix it. Now
we have snapshots mostly working (though file backed ram is a bit busted,
still working on that)

On Wed, Dec 2, 2020 at 2:19 PM Frank Yang  wrote:

>
> From downstream:
> https://android-review.googlesource.com/c/platform/external/qemu/+/1515002
>
> Based on v3 of Alexander Graf's patches
>
> https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de
>
> We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
> can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
> require effort to do that accurately---with individual values, even if
> they are a tiny bit off it can result in a lockup due to inconsistent
> time differences between vCPUs. So just use a global approximate value
> for now.
>
> Not tested in upstream yet, but Android emulator snapshots work without
> time warp now.
>
> Signed-off-by: Lingfeng Yang 
> ---
>  accel/hvf/hvf-cpus.c |  3 +++
>  include/sysemu/hvf_int.h |  4 
>  target/arm/hvf/hvf.c | 43 +++-
>  target/i386/hvf/hvf.c|  4 
>  4 files changed, 53 insertions(+), 1 deletion(-)
>
> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> index a981ccde70..484c7717f5 100644
> --- a/accel/hvf/hvf-cpus.c
> +++ b/accel/hvf/hvf-cpus.c
> @@ -456,6 +456,9 @@ static int hvf_accel_init(MachineState *ms)
>  hvf_state = s;
>  memory_listener_register(_memory_listener, _space_memory);
>  cpus_register_accel(_cpus);
> +
> +hvf_arch_init(s);
> +
>  return 0;
>  }
>
> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> index 13adf6ea77..08830782c9 100644
> --- a/include/sysemu/hvf_int.h
> +++ b/include/sysemu/hvf_int.h
> @@ -55,6 +55,9 @@ struct HVFState {
>  hvf_slot slots[32];
>  int num_slots;
>
> +#if defined(__aarch64__)
> +uint64_t ticks;
> +#endif
>  hvf_vcpu_caps *hvf_caps;
>  };
>  extern HVFState *hvf_state;
> @@ -73,5 +76,6 @@ void hvf_arch_vcpu_destroy(CPUState *cpu);
>  int hvf_vcpu_exec(CPUState *cpu);
>  hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
>  void hvf_kick_vcpu_thread(CPUState *cpu);
> +void hvf_arch_init(HVFState* s);
>
>  #endif
> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> index 9442e2f232..37380c6c53 100644
> --- a/target/arm/hvf/hvf.c
> +++ b/target/arm/hvf/hvf.c
> @@ -312,6 +312,10 @@ int hvf_put_registers(CPUState *cpu)
>  uint64_t val;
>  int i;
>
> +/* Sync up CNTVOFF_EL2 */
> +env->cp15.cntvoff_el2 = hvf_state->ticks;
> +hv_vcpu_set_vtimer_offset(cpu->hvf->fd, env->cp15.cntvoff_el2);
> +
>  for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
>  val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset);
>  ret = hv_vcpu_set_reg(cpu->hvf->fd, hvf_reg_match[i].reg, val);
> @@ -418,6 +422,8 @@ void hvf_arch_vcpu_destroy(CPUState *cpu)
>  {
>  }
>
> +static HVFState* hvf_state = 0;
> +
>  int hvf_arch_init_vcpu(CPUState *cpu)
>  {
>  ARMCPU *arm_cpu = ARM_CPU(cpu);
> @@ -795,7 +801,11 @@ int hvf_vcpu_exec(CPUState *cpu)
>  );
>  assert_hvf_ok(r);
>
> -int64_t ticks_to_sleep = cval - mach_absolute_time();
> +/* mach_absolute_time() is an absolute host tick number.
> We
> + * have set up the guest to use the host tick number
> offset
> + * by env->cp15.cntvoff_el2.
> + */
> +int64_t ticks_to_sleep = cval - (mach_absolute_time() -
> env->cp15.cntvoff_el2);
>  if (ticks_to_sleep < 0) {
>  break;
>  }
> @@ -855,3 +865,34 @@ int hvf_vcpu_exec(CPUState *cpu)
>  }
>  }
>  }
> +
> +static int hvf_mig_state_pre_save(void* opaque) {
> +struct HVFState* s = opaque;
> +s->ticks -= mach_absolute_time();
> +return 0;
> +}
> +
> +static int hvf_mig_state_post_load(void* opaque) {
> +struct HVFState* s = opaque;
> +m->ticks += mach_absolute_time();
> +return 0;
> +}
> +
> +
> +const VMStateDescription vmstate_hvf_migration = {
> +.name = "hvf-migration",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.pre_save = hvf_mig_state_pre_save,
> +.post_load = hvf_mig_state_post_load,
> +.fields = (VMStateField[]) {
> +VMSTATE_UINT64(ticks_to_save, HVFState),
> +VMSTATE_END_OF_LIST()
> +},
> +};
> +
> +void hvf_arch_init(HVFState* s) {
> +hvf_state = s;
> +hvf_state->ticks = 0;
> +vmstate_register(NULL, 0, _hvf_migration, hvf_state);
> +}
> diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
> index 08b4adecd9..7ca6387620 100644
> --- a/target/i386/hvf/hvf.c
> +++ b/target/i386/hvf/hvf.c
> @@ -557,3 +557,7 @@ int hvf_vcpu_exec(CPUState *cpu)
>
>  return ret;
>

[PATCH v1 1/1] hvf: arm: Properly sync guest time on migration

2020-12-02 Thread Frank Yang

>From downstream:
https://android-review.googlesource.com/c/platform/external/qemu/+/1515002

Based on v3 of Alexander Graf's patches

https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de

We need to adjust CNTVOFF_EL2 so that time doesnt warp.  Even though we
can set separate CNTVOFF_EL2 values per vCPU, it just is not worth the
require effort to do that accurately---with individual values, even if
they are a tiny bit off it can result in a lockup due to inconsistent
time differences between vCPUs. So just use a global approximate value
for now.

Not tested in upstream yet, but Android emulator snapshots work without
time warp now.

Signed-off-by: Lingfeng Yang 
---
 accel/hvf/hvf-cpus.c |  3 +++
 include/sysemu/hvf_int.h |  4 
 target/arm/hvf/hvf.c | 43 +++-
 target/i386/hvf/hvf.c|  4 
 4 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index a981ccde70..484c7717f5 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -456,6 +456,9 @@ static int hvf_accel_init(MachineState *ms)
 hvf_state = s;
 memory_listener_register(_memory_listener, _space_memory);
 cpus_register_accel(_cpus);
+
+hvf_arch_init(s);
+
 return 0;
 }

diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 13adf6ea77..08830782c9 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -55,6 +55,9 @@ struct HVFState {
 hvf_slot slots[32];
 int num_slots;

+#if defined(__aarch64__)
+uint64_t ticks;
+#endif
 hvf_vcpu_caps *hvf_caps;
 };
 extern HVFState *hvf_state;
@@ -73,5 +76,6 @@ void hvf_arch_vcpu_destroy(CPUState *cpu);
 int hvf_vcpu_exec(CPUState *cpu);
 hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
 void hvf_kick_vcpu_thread(CPUState *cpu);
+void hvf_arch_init(HVFState* s);

 #endif
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 9442e2f232..37380c6c53 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -312,6 +312,10 @@ int hvf_put_registers(CPUState *cpu)
 uint64_t val;
 int i;

+/* Sync up CNTVOFF_EL2 */
+env->cp15.cntvoff_el2 = hvf_state->ticks;
+hv_vcpu_set_vtimer_offset(cpu->hvf->fd, env->cp15.cntvoff_el2);
+
 for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
 val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset);
 ret = hv_vcpu_set_reg(cpu->hvf->fd, hvf_reg_match[i].reg, val);
@@ -418,6 +422,8 @@ void hvf_arch_vcpu_destroy(CPUState *cpu)
 {
 }

+static HVFState* hvf_state = 0;
+
 int hvf_arch_init_vcpu(CPUState *cpu)
 {
 ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -795,7 +801,11 @@ int hvf_vcpu_exec(CPUState *cpu)
 );
 assert_hvf_ok(r);

-int64_t ticks_to_sleep = cval - mach_absolute_time();
+/* mach_absolute_time() is an absolute host tick number. We
+ * have set up the guest to use the host tick number offset
+ * by env->cp15.cntvoff_el2.
+ */
+int64_t ticks_to_sleep = cval - (mach_absolute_time() -
env->cp15.cntvoff_el2);
 if (ticks_to_sleep < 0) {
 break;
 }
@@ -855,3 +865,34 @@ int hvf_vcpu_exec(CPUState *cpu)
 }
 }
 }
+
+static int hvf_mig_state_pre_save(void* opaque) {
+struct HVFState* s = opaque;
+s->ticks -= mach_absolute_time();
+return 0;
+}
+
+static int hvf_mig_state_post_load(void* opaque) {
+struct HVFState* s = opaque;
+m->ticks += mach_absolute_time();
+return 0;
+}
+
+
+const VMStateDescription vmstate_hvf_migration = {
+.name = "hvf-migration",
+.version_id = 1,
+.minimum_version_id = 1,
+.pre_save = hvf_mig_state_pre_save,
+.post_load = hvf_mig_state_post_load,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(ticks_to_save, HVFState),
+VMSTATE_END_OF_LIST()
+},
+};
+
+void hvf_arch_init(HVFState* s) {
+hvf_state = s;
+hvf_state->ticks = 0;
+vmstate_register(NULL, 0, _hvf_migration, hvf_state);
+}
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 08b4adecd9..7ca6387620 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -557,3 +557,7 @@ int hvf_vcpu_exec(CPUState *cpu)

 return ret;
 }
+
+void hvf_arch_init(HVFState* s) {
+(void)s;
+}
-- 
2.24.3 (Apple Git-128)

[PATCH-for-5.2? 0/1] Acceptance tests: bump Fedora to 32

2020-12-02 Thread Cleber Rosa

I believe this may be a candidate for "right now" because the code
changes here simply sync with external infrastructure changes, that
is, the retirement of Fedora 31 from the official repository
locations).

The following jobs contain a validation of this bump:

 - https://gitlab.com/cleber.gnu/qemu/-/jobs/886864642#L72
 - https://gitlab.com/cleber.gnu/qemu/-/jobs/886864633#L72

Thanks,
- Cleber.

Cleber Rosa (1):
  Acceptance tests: bump Fedora to 32

 tests/acceptance/boot_linux.py | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

-- 
2.25.4

[PATCH-for-5.2? 1/1] Acceptance tests: bump Fedora to 32

2020-12-02 Thread Cleber Rosa

Currently in use Fedora 31 has been moved out of the standard download
locations that are supported by the functionality provided by
avocado.utils.vmimage.  So right now, the boot_linux.py tests will get
canceled by not being able to find those specific images.

Ideally, this would be bumped to version 33.  But, I've found issues
with the aarch64 images, with various systemd services failing to
start.  So to keep all archs consistent, I've settled on 32.

Signed-off-by: Cleber Rosa 
---
 tests/acceptance/boot_linux.py | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/acceptance/boot_linux.py b/tests/acceptance/boot_linux.py
index 1da4a53d6a..0824de008e 100644
--- a/tests/acceptance/boot_linux.py
+++ b/tests/acceptance/boot_linux.py
@@ -42,13 +42,13 @@ class BootLinuxBase(Test):
 vmimage.QEMU_IMG = qemu_img
 
 self.log.info('Downloading/preparing boot image')
-# Fedora 31 only provides ppc64le images
+# Fedora 32 only provides ppc64le images
 image_arch = self.arch
 if image_arch == 'ppc64':
 image_arch = 'ppc64le'
 try:
 boot = vmimage.get(
-'fedora', arch=image_arch, version='31',
+'fedora', arch=image_arch, version='32',
 checksum=self.chksum,
 algorithm='sha256',
 cache_dir=self.cache_dirs[0],
@@ -111,7 +111,7 @@ class BootLinuxX8664(BootLinux):
 :avocado: tags=arch:x86_64
 """
 
-chksum = 'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'
+chksum = '423a4ce32fa32c50c11e3d3ff392db97a762533b81bef9d00599de518a7469c8'
 
 def test_pc_i440fx_tcg(self):
 """
@@ -161,7 +161,7 @@ class BootLinuxAarch64(BootLinux):
 :avocado: tags=machine:gic-version=2
 """
 
-chksum = '1e18d9c0cf734940c4b5d5ec592facaed2af0ad0329383d5639c997fdf16fe49'
+chksum = 'b367755c664a2d7a26955bbfff985855adfa2ca15e908baf15b4b176d68d3967'
 
 def add_common_args(self):
 self.vm.add_args('-bios',
@@ -217,7 +217,7 @@ class BootLinuxPPC64(BootLinux):
 :avocado: tags=arch:ppc64
 """
 
-chksum = '7c3528b85a3df4b2306e892199a9e1e43f991c506f2cc390dc4efa2026ad2f58'
+chksum = 'dd989a078d641713c55720ba3e4320b204ade6954e2bfe4570c8058dc36e2e5d'
 
 def test_pseries_tcg(self):
 """
@@ -235,7 +235,7 @@ class BootLinuxS390X(BootLinux):
 :avocado: tags=arch:s390x
 """
 
-chksum = '4caaab5a434fd4d1079149a072fdc7891e354f834d355069ca982fdcaf5a122d'
+chksum = '93e49b98fa016797a6864a95cbb7beaec86ffd61dbcd42a951158ae732dae1ec'
 
 @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
 def test_s390_ccw_virtio_tcg(self):
-- 
2.25.4

Re: [PATCH v3 3/3] arm/hvf: Add a WFI handler

2020-12-02 Thread Peter Collingbourne

On Wed, Dec 2, 2020 at 10:49 AM Alexander Graf  wrote:
>
>
> On 02.12.20 05:44, Peter Collingbourne wrote:
> > Sleep on WFI until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > Signed-off-by: Peter Collingbourne 
> > ---
> > v3:
> > - move the simplified locking to a separate patch
> > - spin on sleep <2ms
> >
> > v2:
> > - simplify locking further
> > - wait indefinitely on disabled or masked timers
> >
> >   accel/hvf/hvf-cpus.c |  4 +--
> >   include/sysemu/hvf_int.h |  1 +
> >   target/arm/hvf/hvf.c | 56 
> >   3 files changed, 59 insertions(+), 2 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index e613c22ad0..b2c8fb57f6 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -344,8 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >   sigact.sa_handler = dummy_signal;
> >   sigaction(SIG_IPI, , NULL);
> >
> > -pthread_sigmask(SIG_BLOCK, NULL, );
> > -sigdelset(, SIG_IPI);
> > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
>
>
> That turns set into an unused variable, no? I'll fix it up while
> applying though. The rest looks great, I'll push it as part of my next
> patch set.

Yes, thanks for spotting that, your fixup looks good.

Peter

Re: [DISCUSSION] How to set properties of non-pluggable devices?

2020-12-02 Thread BALATON Zoltan via


Hello,

On Wed, 2 Dec 2020, Doug Evans wrote:

Hi.

Suppose I want to set a property of a non-pluggable device that cannot be
set after the device has been realized (e.g., I can't use qmp to set the
property after QEMU has started).
Being non-pluggable means I can't use "-device foo,bar=baz" on the command
line.
[But I can use "-device foo,help" to list its properties :-)  (if I also
specify -M bar) ]

How do people do this?


I don't know but there's a -global option than may be what you need but I 
never know how to use it. You may be able to find examples in the doc dir 
or hopefully someone who knows it better will correct me.


Hope this helps or sorry if this is not the right answer.

Regards,
BALATON Zoltan


The device is part of a "machine" (board really), so I could add the
property to the machine to be passed on to the device when it's realized
(at least I think I can), but that doesn't feel right: The machine has lots
of devices -> it feels cleaner to associate the property with the device
and not the machine (lest the machine over time collect a myriad of random
properties to pass on to its devices). Things get a little complicated
because the machine can have multiple copies of a device: specifying the
device's name is insufficient.

The device has an object path: /machine/foo/bar/device[0]. There's also
/.../device[1].
IWBN to be able to do something along the lines of:
-device-property /device/path[,PROP1=VALUE1,...]
copying the syntax used for "-object".

It's perhaps even nicer if this could be accomplished with -device:
avoiding further confusion on what -device can and can't be used for (e.g.,
can I use -device-property to set a property that could also be set with
-device?).

If what I'm asking for is reasonable and isn't doable today (I'm certainly
willing to believe I'm missing something), I'm happy to work on the patch
(with some guidance as to what would be acceptable).

One thought that comes to mind is to use -object, store the properties
there, and have the machine collect them from there when realizing its
devices. Or is that an abuse of -object ?

Re: [PATCH] docs: set CONFDIR when running sphinx

2020-12-02 Thread Eduardo Habkost

On Wed, Dec 02, 2020 at 10:05:50AM +0100, Paolo Bonzini wrote:
> On 01/12/20 19:37, marcandre.lur...@redhat.com wrote:
> > From: Marc-André Lureau 
> > 
> > The default configuration path /etc/qemu can be overriden with configure
> > options, and the generated documentation used to reflect it.
> > 
> > Fixes regression introduced in commit
> > f8aa24ea9a82da38370470c6bc0eaa393999edfe ("meson: sphinx-build").
> > 
> > Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1902537
> > Signed-off-by: Marc-André Lureau 
> > ---
> >   docs/meson.build | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/docs/meson.build b/docs/meson.build
> > index ebd85d59f9..bb8fe4c9e4 100644
> > --- a/docs/meson.build
> > +++ b/docs/meson.build
> > @@ -9,7 +9,7 @@ endif
> >   # Check if tools are available to build documentation.
> >   build_docs = false
> >   if sphinx_build.found()
> > -  SPHINX_ARGS = [sphinx_build]
> > +  SPHINX_ARGS = ['env', 'CONFDIR=' + qemu_confdir, sphinx_build]
> > # If we're making warnings fatal, apply this to Sphinx runs as well
> > if get_option('werror')
> >   SPHINX_ARGS += [ '-W' ]
> > 
> 
> I can queue the patch, but I also wouldn't mind removing support for
> /etc/qemu completely.  I'm not sure why one would use it.  Eduardo?

I agree, and I had a series for this 3 years ago.

I guess I need to my keep my word and finally submit v5 of the series:
https://lore.kernel.org/qemu-devel/20171005123414.GE4015@localhost.localdomain/

-- 
Eduardo

Re: [PATCH] hw/block: m25p80: Fix fast read for SST flashes

2020-12-02 Thread Alistair Francis

On Sun, Nov 29, 2020 at 6:55 PM Bin Meng  wrote:
>
> From: Bin Meng 
>
> SST flashes require a dummy byte after the address bits.
>
> Signed-off-by: Bin Meng 

I couldn't find a datasheet that says this... But the actual code
change looks fine, so:

Acked-by: Alistair Francis 

Alistair

> ---
>
>  hw/block/m25p80.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 483925f..9b36762 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -825,6 +825,9 @@ static void decode_fast_read_cmd(Flash *s)
>  s->needed_bytes = get_addr_length(s);
>  switch (get_man(s)) {
>  /* Dummy cycles - modeled with bytes writes instead of bits */
> +case MAN_SST:
> +s->needed_bytes += 1;
> +break;
>  case MAN_WINBOND:
>  s->needed_bytes += 8;
>  break;
> --
> 2.7.4
>
>

Re: [PATCH 00/18] qapi/qom: QAPIfy object-add

2020-12-02 Thread Eduardo Habkost

On Wed, Dec 02, 2020 at 06:35:06PM +0100, Kevin Wolf wrote:
> Am 02.12.2020 um 17:05 hat Eduardo Habkost geschrieben:
> > > > Looks nice as end goal.  Then, these are a few questions I would
> > > > have about the transition plan:
> > > > 
> > > > Would it require changing both device implementation and device
> > > > users in lockstep?  Should we have a compatibility layer to allow
> > > > existing qdev_new()+qdev_prop_set_*() code to keep working after
> > > > the devices are converted to the new system?  If not, why not?
> > > 
> > > Technically, it doesn't strictly require a lockstep update. You can have
> > > two code paths leading to a fully initialised device, one being the
> > > traditional way of setting properties and finally calling dc->realize,
> > > the other being a new method that takes the configuration in its
> > > parameters and also sets dev->realized = true at the end.
> > > 
> > > If at all possible, I would however prefer a lockstep update because
> > > having two paths is a weird intermediate state and the code paths could
> > > easily diverge. Keeping the old way around for a device also means that
> > > property setters are still doing two different jobs (initial
> > > configuration and updates at runtime).
> > 
> > I'd like to understand better how that intermediate state would
> > look like and why there's risk of separate code paths diverging.
> >
> > Could we have an intermediate state that doesn't require any
> > duplication and thus have no separate code paths that could
> > diverge?
> 
> The one requirement we have for an intermediate state is that it
> supports both interfaces: The well-know create/set properties/realize
> dance, and a new DeviceClass method, say .create(), that takes the
> configuration in parameters instead of relying on previously set
> properties.

I agree completely.

> 
> I assumed two separate implementations of transferring the configuration
> into the internal state. On second thought, this assumption is maybe
> wrong.
> 
> You can implement the new method as wrapper around the old way: It could
> just set all the properties and call realize. Of course, you don't win
> much in terms of improving the class implementation this way, but just
> support the new interface, but I guess it can be a reasonable
> intermediate step to resolve complicated dependencies etc.
> 
> It would be much nicer to do the wrapper the other way round, i.e.
> setting properties before the device is realized would update a
> configuration struct and realize would then call .create() with that
> struct. To me, this sounds much harder, though also a more useful state.

Comment about this below (look for [1]).

> 
> As you have worked a lot with properties recently, maybe you have a good
> idea how we could get an intermediate state closer to this?

I'd have to re-read this whole thread and think about it.

> 
> > > > If we add a compatibility layer, is the end goal to convert all
> > > > existing qdev_new() users to the new system?  If yes, why?  If
> > > > not, why not?
> > > 
> > > My personal goal is covering -object and -device, i.e. the external
> > > interfaces. Converting purely internally created devices is not as
> > > interesting (especially as long as we focus only on object creation),
> > > but might be desirable for consistency.
> > 
> > I wonder how much consistency we will lose and how much confusion
> > we'll cause if we end up with two completely separate methods for
> > creating devices.
> 
> I do think we should follow through and convert everything. It's just
> not my main motivation, and if the people who work more with qdev think
> it's better to leave that part unchanged (or that it won't make much of
> a difference), I won't insist.

This worries me.  Converting thousands of lines of code that
don't involve user-visible interfaces seems complicated and maybe
pointless.  On the other hand, having two separate APIs for
creating objects internally would cause confusion.

Maybe we should accept the fact that the 2 APIs will exist, and
address the confusion part: we should guarantee the two APIs to
be 100% equivalent, except for the fact that the newer one gives
us type safety in the C code.

I'd like to avoid a case like qdev vs QOM APIs, where they have
similar but slightly different features, and nobody knows which
one to use.

> 
> > > > What about subclasses?  Would base classes need to be converted
> > > > in lockstep with all subclasses?  How would the transition
> > > > process of (e.g.) PCI devices look like?
> > > 
> > > I don't think so.
> > > 
> > > If you want to convert base classes first, you may need to take the
> > > path shown above where both initialisation paths coexist while the
> > > children are converted because instantiation of a child class involves
> > > setting properties of the base class. So you can only restrict these
> > > properties to runtime-only after all children have been converted.
> > > 
> > > The other way around

Re: [PATCH v3 00/10] hvf: Implement Apple Silicon Support

2020-12-02 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20201202190408.2041-1-ag...@csgraf.de/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20201202190408.2041-1-ag...@csgraf.de
Subject: [PATCH v3 00/10] hvf: Implement Apple Silicon Support

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] patchew/20201202190408.2041-1-ag...@csgraf.de -> 
patchew/20201202190408.2041-1-ag...@csgraf.de
Switched to a new branch 'test'
4d827f3 hvf: arm: Implement -cpu host
bde0110 hvf: arm: Add support for GICv3
5c824d7 arm/hvf: Add a WFI handler
68f28c6 arm: Add Hypervisor.framework build target
ae48800 hvf: Add Apple Silicon support
ced03a5 hvf: arm: Mark CPU as dirty on reset
9830bf6 arm: Set PSCI to 0.2 for HVF
b2218df hvf: Introduce hvf vcpu struct
0d5f075 hvf: Move common code out
6ae373a hvf: Add hypervisor entitlement to output binaries

=== OUTPUT BEGIN ===
1/10 Checking commit 6ae373af35d9 (hvf: Add hypervisor entitlement to output 
binaries)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#16: 
new file mode 100644

total: 0 errors, 1 warnings, 63 lines checked

Patch 1/10 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
2/10 Checking commit 0d5f07559d56 (hvf: Move common code out)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#38: 
new file mode 100644

total: 0 errors, 1 warnings, 1088 lines checked

Patch 2/10 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
3/10 Checking commit b2218df9dabc (hvf: Introduce hvf vcpu struct)
WARNING: line over 80 characters
#138: FILE: target/i386/hvf/hvf.c:213:
+wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS, 
cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,

ERROR: "(foo*)" should be "(foo *)"
#746: FILE: target/i386/hvf/x86hvf.c:85:
+if (hv_vcpu_write_fpstate(cpu_state->hvf->fd, (void*)xsave, 4096)) {

ERROR: "(foo*)" should be "(foo *)"
#827: FILE: target/i386/hvf/x86hvf.c:167:
+if (hv_vcpu_read_fpstate(cpu_state->hvf->fd, (void*)xsave, 4096)) {

total: 2 errors, 1 warnings, 996 lines checked

Patch 3/10 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

4/10 Checking commit 9830bf633313 (arm: Set PSCI to 0.2 for HVF)
5/10 Checking commit ced03a5fa1eb (hvf: arm: Mark CPU as dirty on reset)
6/10 Checking commit ae4880007b12 (hvf: Add Apple Silicon support)
WARNING: architecture specific defines should be avoided
#47: FILE: accel/hvf/hvf-cpus.c:63:
+#ifdef __aarch64__

WARNING: architecture specific defines should be avoided
#58: FILE: accel/hvf/hvf-cpus.c:350:
+#ifdef __aarch64__

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#111: 
new file mode 100644

WARNING: line over 80 characters
#575: FILE: target/arm/hvf/hvf.c:460:
+hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_FIQ, 
true);

WARNING: line over 80 characters
#580: FILE: target/arm/hvf/hvf.c:465:
+hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_IRQ, 
true);

total: 0 errors, 5 warnings, 688 lines checked

Patch 6/10 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
7/10 Checking commit 68f28c62f682 (arm: Add Hypervisor.framework build target)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#47: 
new file mode 100644

total: 0 errors, 1 warnings, 36 lines checked

Patch 7/10 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
8/10 Checking commit 5c824d7a4a5e (arm/hvf: Add a WFI handler)
9/10 Checking commit bde0110d9163 (hvf: arm: Add support for GICv3)
10/10 Checking commit 4d827f39b205 (hvf: arm: Implement -cpu host)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20201202190408.2041-1-ag...@csgraf.de/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH v2 1/1] Fix to show vfio migration stat in migration status

2020-12-02 Thread Alex Williamson

On Wed, 2 Dec 2020 00:43:14 +0530
Kirti Wankhede  wrote:

> Header file where CONFIG_VFIO is defined is not included in migration.c
> file.
> 
> Moved populate_vfio_info() to hw/vfio/common.c file. Added its stub in
> stubs/vfio.c file. Updated header files and meson file accordingly.
> 
> Fixes: 3710586caa5d ("qapi: Add VFIO devices migration stats in Migration
> stats")
> 
> Signed-off-by: Kirti Wankhede 
> ---
>  hw/vfio/common.c  | 12 +++-
>  include/hw/vfio/vfio-common.h |  1 -
>  include/hw/vfio/vfio.h|  2 ++
>  migration/migration.c | 16 +---
>  stubs/meson.build |  1 +
>  stubs/vfio.c  |  7 +++
>  6 files changed, 22 insertions(+), 17 deletions(-)
>  create mode 100644 stubs/vfio.c
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 6ff1daa763f8..4868c0fef504 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -25,6 +25,7 @@
>  #endif
>  #include 
>  
> +#include "qapi/qapi-types-migration.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "hw/vfio/vfio.h"
>  #include "exec/address-spaces.h"
> @@ -292,7 +293,7 @@ const MemoryRegionOps vfio_region_ops = {
>   * Device state interfaces
>   */
>  
> -bool vfio_mig_active(void)
> +static bool vfio_mig_active(void)
>  {
>  VFIOGroup *group;
>  VFIODevice *vbasedev;
> @@ -311,6 +312,15 @@ bool vfio_mig_active(void)
>  return true;
>  }
>  
> +void populate_vfio_info(MigrationInfo *info)
> +{
> +if (vfio_mig_active()) {
> +info->has_vfio = true;
> +info->vfio = g_malloc0(sizeof(*info->vfio));
> +info->vfio->transferred = vfio_mig_bytes_transferred();
> +}
> +}
> +
>  static bool vfio_devices_all_saving(VFIOContainer *container)
>  {
>  VFIOGroup *group;
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 6141162d7aea..cc47bd7d4456 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -205,7 +205,6 @@ extern const MemoryRegionOps vfio_region_ops;
>  typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
>  extern VFIOGroupList vfio_group_list;
>  
> -bool vfio_mig_active(void);
>  int64_t vfio_mig_bytes_transferred(void);
>  
>  #ifdef CONFIG_LINUX
> diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
> index 86248f54360a..d1e6f4b26f35 100644
> --- a/include/hw/vfio/vfio.h
> +++ b/include/hw/vfio/vfio.h
> @@ -4,4 +4,6 @@
>  bool vfio_eeh_as_ok(AddressSpace *as);
>  int vfio_eeh_as_op(AddressSpace *as, uint32_t op);
>  
> +void populate_vfio_info(MigrationInfo *info);
> +
>  #endif
> diff --git a/migration/migration.c b/migration/migration.c
> index 87a9b59f83f4..c164594c1d8d 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -56,10 +56,7 @@
>  #include "net/announce.h"
>  #include "qemu/queue.h"
>  #include "multifd.h"
> -
> -#ifdef CONFIG_VFIO
> -#include "hw/vfio/vfio-common.h"
> -#endif
> +#include "hw/vfio/vfio.h"
>  
>  #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed 
> throttling */
>  
> @@ -1041,17 +1038,6 @@ static void populate_disk_info(MigrationInfo *info)
>  }
>  }
>  
> -static void populate_vfio_info(MigrationInfo *info)
> -{
> -#ifdef CONFIG_VFIO
> -if (vfio_mig_active()) {
> -info->has_vfio = true;
> -info->vfio = g_malloc0(sizeof(*info->vfio));
> -info->vfio->transferred = vfio_mig_bytes_transferred();
> -}
> -#endif
> -}
> -
>  static void fill_source_migration_info(MigrationInfo *info)
>  {
>  MigrationState *s = migrate_get_current();
> diff --git a/stubs/meson.build b/stubs/meson.build
> index 82b7ba60abe5..909956674847 100644
> --- a/stubs/meson.build
> +++ b/stubs/meson.build
> @@ -53,3 +53,4 @@ if have_system
>stub_ss.add(files('semihost.c'))
>stub_ss.add(files('xen-hw-stub.c'))
>  endif
> +stub_ss.add(files('vfio.c'))
> diff --git a/stubs/vfio.c b/stubs/vfio.c
> new file mode 100644
> index ..9cc8753cd102
> --- /dev/null
> +++ b/stubs/vfio.c
> @@ -0,0 +1,7 @@
> +#include "qemu/osdep.h"
> +#include "qapi/qapi-types-migration.h"
> +#include "hw/vfio/vfio.h"
> +
> +void populate_vfio_info(MigrationInfo *info)
> +{
> +}

[989/8466] Compiling C object libqemu-s390x-softmmu.fa.p/hw_vfio_ap.c.o
FAILED: libqemu-s390x-softmmu.fa.p/hw_vfio_ap.c.o 
cc -Ilibqemu-s390x-softmmu.fa.p -I. -I.. -Itarget/s390x -I../target/s390x 
-Iqapi -Itrace -Iui -Iui/shader -I/usr/include/spice-1 
-I/usr/include/spice-server -I/usr/include/cacard -I/usr/include/glib-2.0 
-I/usr/lib64/glib-2.0/include -I/usr/include/nss3 -I/usr/include/nspr4 
-I/usr/include/libmount -I/usr/include/blkid -I/usr/include/pixman-1 
-I/usr/include/capstone -fdiagnostics-color=auto -pipe -Wall -Winvalid-pch 
-std=gnu99 -O2 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -m64 -mcx16 
-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv

[PATCH v3 06/10] hvf: Add Apple Silicon support

2020-12-02 Thread Alexander Graf

With Apple Silicon available to the masses, it's a good time to add support
for driving its virtualization extensions from QEMU.

This patch adds all necessary architecture specific code to get basic VMs
working. It's still pretty raw, but definitely functional.

Known limitations:

  - Vtimer acknowledgement is hacky
  - Should implement more sysregs and fault on invalid ones then
  - WFI handling is missing, need to marry it with vtimer

Signed-off-by: Alexander Graf 

---

v1 -> v2:

  - Merge vcpu kick function patch
  - Implement WFI handling (allows vCPUs to sleep)
  - Synchronize system registers (fixes OVMF crashes and reboot)
  - Don't always call cpu_synchronize_state()
  - Use more fine grained iothread locking
  - Populate aa64mmfr0 from hardware

v2 -> v3:

  - Advance PC on SMC
  - Use cp list interface for sysreg syncs
  - Do not set current_cpu
  - Fix sysreg isread mask
  - Move sysreg handling to functions
  - Remove WFI logic again
  - Revert to global iothread locking
---
 MAINTAINERS  |   5 +
 accel/hvf/hvf-cpus.c |  14 +
 include/sysemu/hvf_int.h |   5 +-
 target/arm/hvf/hvf.c | 620 +++
 4 files changed, 643 insertions(+), 1 deletion(-)
 create mode 100644 target/arm/hvf/hvf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index ca4b6d9279..9cd1d9d448 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -439,6 +439,11 @@ F: accel/accel.c
 F: accel/Makefile.objs
 F: accel/stubs/Makefile.objs
 
+Apple Silicon HVF CPUs
+M: Alexander Graf 
+S: Maintained
+F: target/arm/hvf/
+
 X86 HVF CPUs
 M: Cameron Esfahani 
 M: Roman Bolshakov 
diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index a423f629d5..e613c22ad0 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -60,6 +60,10 @@
 
 #include 
 
+#ifdef __aarch64__
+#define HV_VM_DEFAULT NULL
+#endif
+
 /* Memory slots */
 
 struct mac_slot {
@@ -343,7 +347,11 @@ static int hvf_init_vcpu(CPUState *cpu)
 pthread_sigmask(SIG_BLOCK, NULL, );
 sigdelset(, SIG_IPI);
 
+#ifdef __aarch64__
+r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
NULL);
+#else
 r = hv_vcpu_create((hv_vcpuid_t *)>hvf->fd, HV_VCPU_DEFAULT);
+#endif
 cpu->vcpu_dirty = 1;
 assert_hvf_ok(r);
 
@@ -414,8 +422,14 @@ static void hvf_start_vcpu_thread(CPUState *cpu)
cpu, QEMU_THREAD_JOINABLE);
 }
 
+__attribute__((weak)) void hvf_kick_vcpu_thread(CPUState *cpu)
+{
+cpus_kick_thread(cpu);
+}
+
 static const CpusAccel hvf_cpus = {
 .create_vcpu_thread = hvf_start_vcpu_thread,
+.kick_vcpu_thread = hvf_kick_vcpu_thread,
 
 .synchronize_post_reset = hvf_cpu_synchronize_post_reset,
 .synchronize_post_init = hvf_cpu_synchronize_post_init,
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 7967e33727..5f15119184 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -11,6 +11,7 @@
 #ifndef HVF_INT_H
 #define HVF_INT_H
 
+#include "qemu/osdep.h"
 #include 
 
 #define HVF_MAX_VCPU 0x10
@@ -59,7 +60,8 @@ struct HVFState {
 extern HVFState *hvf_state;
 
 struct hvf_vcpu_state {
-int fd;
+uint64_t fd;
+void *exit;
 };
 
 void assert_hvf_ok(hv_return_t ret);
@@ -69,5 +71,6 @@ int hvf_arch_init_vcpu(CPUState *cpu);
 void hvf_arch_vcpu_destroy(CPUState *cpu);
 int hvf_vcpu_exec(CPUState *cpu);
 hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
+void hvf_kick_vcpu_thread(CPUState *cpu);
 
 #endif
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
new file mode 100644
index 00..5ecce36d4a
--- /dev/null
+++ b/target/arm/hvf/hvf.c
@@ -0,0 +1,620 @@
+/*
+ * QEMU Hypervisor.framework support for Apple Silicon
+
+ * Copyright 2020 Alexander Graf 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+
+#include "sysemu/runstate.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
+#include "sysemu/hw_accel.h"
+
+#include 
+
+#include "exec/address-spaces.h"
+#include "hw/irq.h"
+#include "qemu/main-loop.h"
+#include "sysemu/accel.h"
+#include "sysemu/cpus.h"
+#include "target/arm/cpu.h"
+#include "target/arm/internals.h"
+
+#define HVF_DEBUG 0
+#define DPRINTF(...)\
+if (HVF_DEBUG) {\
+fprintf(stderr, "HVF %s:%d ", __func__, __LINE__);  \
+fprintf(stderr, __VA_ARGS__);   \
+fprintf(stderr, "\n");  \
+}
+
+#define HVF_SYSREG(crn, crm, op0, op1, op2) \
+ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
+#define PL1_WRITE_MASK 0x4
+
+#define SYSREG(op0, op1, op2, crn, crm) \
+((op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (crm << 1))
+#define SYSREG_MASK   SYSREG(0x3, 0x7, 0x7, 0xf, 0xf)
+#define SYSREG_CNTPCT_EL0

[PATCH v3 10/10] hvf: arm: Implement -cpu host

2020-12-02 Thread Alexander Graf

Now that we have working system register sync, we push more target CPU
properties into the virtual machine. That might be useful in some
situations, but is not the typical case that users want.

So let's add a -cpu host option that allows them to explicitly pass all
CPU capabilities of their host CPU into the guest.

Signed-off-by: Alexander Graf 
---
 include/sysemu/hvf.h |  2 ++
 target/arm/cpu.c |  9 ++---
 target/arm/cpu.h |  2 ++
 target/arm/hvf/hvf.c | 41 +
 target/arm/kvm_arm.h |  2 --
 5 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h
index f893768df9..7eb61cf094 100644
--- a/include/sysemu/hvf.h
+++ b/include/sysemu/hvf.h
@@ -19,6 +19,8 @@
 #ifdef CONFIG_HVF
 uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
  int reg);
+struct ARMCPU;
+void hvf_arm_set_cpu_features_from_host(struct ARMCPU *cpu);
 extern bool hvf_allowed;
 #define hvf_enabled() (hvf_allowed)
 #else /* !CONFIG_HVF */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 9a501ea4bd..087c6292b6 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2279,12 +2279,16 @@ static void arm_cpu_class_init(ObjectClass *oc, void 
*data)
 #endif
 }
 
-#ifdef CONFIG_KVM
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
 static void arm_host_initfn(Object *obj)
 {
 ARMCPU *cpu = ARM_CPU(obj);
 
+#ifdef CONFIG_KVM
 kvm_arm_set_cpu_features_from_host(cpu);
+#else
+hvf_arm_set_cpu_features_from_host(cpu);
+#endif
 if (arm_feature(>env, ARM_FEATURE_AARCH64)) {
 aarch64_add_sve_properties(obj);
 }
@@ -2296,7 +2300,6 @@ static const TypeInfo host_arm_cpu_type_info = {
 .parent = TYPE_AARCH64_CPU,
 .instance_init = arm_host_initfn,
 };
-
 #endif
 
 static void arm_cpu_instance_init(Object *obj)
@@ -2355,7 +2358,7 @@ static void arm_cpu_register_types(void)
 
 type_register_static(_cpu_type_info);
 
-#ifdef CONFIG_KVM
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
 type_register_static(_arm_cpu_type_info);
 #endif
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e5514c8286..e54963aa8b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2823,6 +2823,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
 #define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
 #define CPU_RESOLVING_TYPE TYPE_ARM_CPU
 
+#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
+
 #define cpu_signal_handler cpu_arm_signal_handler
 #define cpu_list arm_cpu_list
 
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index dfdf0827e4..9442e2f232 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -373,6 +373,47 @@ static uint64_t hvf_get_reg(CPUState *cpu, int rt)
 return val;
 }
 
+void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu)
+{
+ARMISARegisters host_isar;
+const struct isar_regs {
+int reg;
+uint64_t *val;
+} regs[] = {
+{ HV_SYS_REG_ID_AA64PFR0_EL1, _isar.id_aa64pfr0 },
+{ HV_SYS_REG_ID_AA64PFR1_EL1, _isar.id_aa64pfr1 },
+{ HV_SYS_REG_ID_AA64DFR0_EL1, _isar.id_aa64dfr0 },
+{ HV_SYS_REG_ID_AA64DFR1_EL1, _isar.id_aa64dfr1 },
+{ HV_SYS_REG_ID_AA64ISAR0_EL1, _isar.id_aa64isar0 },
+{ HV_SYS_REG_ID_AA64ISAR1_EL1, _isar.id_aa64isar1 },
+{ HV_SYS_REG_ID_AA64MMFR0_EL1, _isar.id_aa64mmfr0 },
+{ HV_SYS_REG_ID_AA64MMFR1_EL1, _isar.id_aa64mmfr1 },
+{ HV_SYS_REG_ID_AA64MMFR2_EL1, _isar.id_aa64mmfr2 },
+};
+hv_vcpu_t fd;
+hv_vcpu_exit_t *exit;
+int i;
+
+cpu->dtb_compatible = "arm,arm-v8";
+cpu->env.features = (1ULL << ARM_FEATURE_V8) |
+(1ULL << ARM_FEATURE_NEON) |
+(1ULL << ARM_FEATURE_AARCH64) |
+(1ULL << ARM_FEATURE_PMU) |
+(1ULL << ARM_FEATURE_GENERIC_TIMER);
+
+/* We set up a small vcpu to extract host registers */
+
+assert_hvf_ok(hv_vcpu_create(, , NULL));
+for (i = 0; i < ARRAY_SIZE(regs); i++) {
+assert_hvf_ok(hv_vcpu_get_sys_reg(fd, regs[i].reg, regs[i].val));
+}
+assert_hvf_ok(hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, >midr));
+assert_hvf_ok(hv_vcpu_destroy(fd));
+
+cpu->isar = host_isar;
+cpu->reset_sctlr = 0x00c50078;
+}
+
 void hvf_arch_vcpu_destroy(CPUState *cpu)
 {
 }
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index eb81b7059e..081727a37e 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -214,8 +214,6 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t 
*cpus_to_try,
  */
 void kvm_arm_destroy_scratch_host_vcpu(int *fdarray);
 
-#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
-
 /**
  * ARMHostCPUFeatures: information about the host CPU (identified
  * by asking the host kernel)
-- 
2.24.3 (Apple Git-128)

[PATCH v3 07/10] arm: Add Hypervisor.framework build target

2020-12-02 Thread Alexander Graf

Now that we have all logic in place that we need to handle Hypervisor.framework
on Apple Silicon systems, let's add CONFIG_HVF for aarch64 as well so that we
can build it.

Signed-off-by: Alexander Graf 

---

v1 -> v2:

  - Fix build on 32bit arm
---
 meson.build| 11 ++-
 target/arm/hvf/meson.build |  3 +++
 target/arm/meson.build |  2 ++
 3 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 target/arm/hvf/meson.build

diff --git a/meson.build b/meson.build
index 2a7ff5560c..bff3fe7089 100644
--- a/meson.build
+++ b/meson.build
@@ -74,16 +74,25 @@ else
 endif
 
 accelerator_targets = { 'CONFIG_KVM': kvm_targets }
+
+if cpu in ['x86', 'x86_64']
+  hvf_targets = ['i386-softmmu', 'x86_64-softmmu']
+elif cpu in ['aarch64']
+  hvf_targets = ['aarch64-softmmu']
+else
+  hvf_targets = []
+endif
+
 if cpu in ['x86', 'x86_64', 'arm', 'aarch64']
   # i368 emulator provides xenpv machine type for multiple architectures
   accelerator_targets += {
 'CONFIG_XEN': ['i386-softmmu', 'x86_64-softmmu'],
+'CONFIG_HVF': hvf_targets,
   }
 endif
 if cpu in ['x86', 'x86_64']
   accelerator_targets += {
 'CONFIG_HAX': ['i386-softmmu', 'x86_64-softmmu'],
-'CONFIG_HVF': ['x86_64-softmmu'],
 'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'],
   }
 endif
diff --git a/target/arm/hvf/meson.build b/target/arm/hvf/meson.build
new file mode 100644
index 00..855e6cce5a
--- /dev/null
+++ b/target/arm/hvf/meson.build
@@ -0,0 +1,3 @@
+arm_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
+  'hvf.c',
+))
diff --git a/target/arm/meson.build b/target/arm/meson.build
index f5de2a77b8..95bebae216 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -56,5 +56,7 @@ arm_softmmu_ss.add(files(
   'psci.c',
 ))
 
+subdir('hvf')
+
 target_arch += {'arm': arm_ss}
 target_softmmu_arch += {'arm': arm_softmmu_ss}
-- 
2.24.3 (Apple Git-128)

[PATCH v3 08/10] arm/hvf: Add a WFI handler

2020-12-02 Thread Alexander Graf

From: Peter Collingbourne 

Sleep on WFI until the VTIMER is due but allow ourselves to be woken
up on IPI.

Signed-off-by: Peter Collingbourne 
[agraf: Remove unused 'set' variable, always advance PC on WFX trap]
Signed-off-by: Alexander Graf 
---
 accel/hvf/hvf-cpus.c |  5 ++--
 include/sysemu/hvf_int.h |  1 +
 target/arm/hvf/hvf.c | 55 
 3 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index e613c22ad0..a981ccde70 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -337,15 +337,14 @@ static int hvf_init_vcpu(CPUState *cpu)
 cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
 
 /* init cpu signals */
-sigset_t set;
 struct sigaction sigact;
 
 memset(, 0, sizeof(sigact));
 sigact.sa_handler = dummy_signal;
 sigaction(SIG_IPI, , NULL);
 
-pthread_sigmask(SIG_BLOCK, NULL, );
-sigdelset(, SIG_IPI);
+pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
+sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
 
 #ifdef __aarch64__
 r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
NULL);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 5f15119184..13adf6ea77 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -62,6 +62,7 @@ extern HVFState *hvf_state;
 struct hvf_vcpu_state {
 uint64_t fd;
 void *exit;
+sigset_t unblock_ipi_mask;
 };
 
 void assert_hvf_ok(hv_return_t ret);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 5ecce36d4a..79aeeb237b 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -2,6 +2,7 @@
  * QEMU Hypervisor.framework support for Apple Silicon
 
  * Copyright 2020 Alexander Graf 
+ * Copyright 2020 Google LLC
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -18,6 +19,7 @@
 #include "sysemu/hw_accel.h"
 
 #include 
+#include 
 
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
@@ -413,6 +415,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
+cpus_kick_thread(cpu);
 hv_vcpus_exit(>hvf->fd, 1);
 }
 
@@ -468,6 +471,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
 return 0;
 }
 
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
+{
+/*
+ * Use pselect to sleep so that other threads can IPI us while we're
+ * sleeping.
+ */
+qatomic_mb_set(>thread_kicked, false);
+qemu_mutex_unlock_iothread();
+pselect(0, 0, 0, 0, ts, >hvf->unblock_ipi_mask);
+qemu_mutex_lock_iothread();
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
 ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -579,6 +594,46 @@ int hvf_vcpu_exec(CPUState *cpu)
 }
 case EC_WFX_TRAP:
 advance_pc = true;
+if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
+(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
+
+uint64_t ctl;
+r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
+);
+assert_hvf_ok(r);
+
+if (!(ctl & 1) || (ctl & 2)) {
+/* Timer disabled or masked, just wait for an IPI. */
+hvf_wait_for_ipi(cpu, NULL);
+break;
+}
+
+uint64_t cval;
+r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
+);
+assert_hvf_ok(r);
+
+int64_t ticks_to_sleep = cval - mach_absolute_time();
+if (ticks_to_sleep < 0) {
+break;
+}
+
+uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz;
+uint64_t nanos =
+(ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) *
+10 / arm_cpu->gt_cntfrq_hz;
+
+/*
+ * Don't sleep for less than 2ms. This is believed to improve
+ * latency of message passing workloads.
+ */
+if (!seconds && nanos < 200) {
+break;
+}
+
+struct timespec ts = { seconds, nanos };
+hvf_wait_for_ipi(cpu, );
+}
 break;
 case EC_AA64_HVC:
 cpu_synchronize_state(cpu);
-- 
2.24.3 (Apple Git-128)

[PATCH v3 04/10] arm: Set PSCI to 0.2 for HVF

2020-12-02 Thread Alexander Graf

In Hypervisor.framework, we just pass PSCI calls straight on to the QEMU 
emulation
of it. That means, if TCG is compatible with PSCI 0.2, so are we. Let's 
transpose
that fact in code too.

Signed-off-by: Alexander Graf 
---
 target/arm/cpu.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 07492e9f9a..db6f7c34ed 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1062,6 +1062,10 @@ static void arm_cpu_initfn(Object *obj)
 if (tcg_enabled()) {
 cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
 }
+
+if (hvf_enabled()) {
+cpu->psci_version = 2; /* HVF uses TCG's PSCI */
+}
 }
 
 static Property arm_cpu_gt_cntfrq_property =
-- 
2.24.3 (Apple Git-128)

[PATCH v3 05/10] hvf: arm: Mark CPU as dirty on reset

2020-12-02 Thread Alexander Graf

When clearing internal state of a CPU, we should also make sure that HVF
knows about it and can push the new values down to vcpu state.

Make sure that with HVF enabled, we tell it that it should synchronize
CPU state on next entry after a reset.

This fixes PSCI handling, because now newly pushed state such as X0 and
PC on remote CPU enablement also get pushed into HVF.

Signed-off-by: Alexander Graf 
---
 target/arm/arm-powerctl.c | 1 +
 target/arm/cpu.c  | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index b75f813b40..a49a5b32e6 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -15,6 +15,7 @@
 #include "arm-powerctl.h"
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
+#include "sysemu/hw_accel.h"
 
 #ifndef DEBUG_ARM_POWERCTL
 #define DEBUG_ARM_POWERCTL 0
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index db6f7c34ed..9a501ea4bd 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -411,6 +411,8 @@ static void arm_cpu_reset(DeviceState *dev)
 #ifndef CONFIG_USER_ONLY
 if (kvm_enabled()) {
 kvm_arm_reset_vcpu(cpu);
+} else if (hvf_enabled()) {
+s->vcpu_dirty = true;
 }
 #endif
 
-- 
2.24.3 (Apple Git-128)

Re: [RFC PATCH] configure: add --without-default-features

2020-12-02 Thread Alex Bennée



Alex Bennée  writes:

> By default QEMU enables a lot of features if it can probe and find the
> support libraries. It also enables a bunch of features by default.
> This patch adds the ability to build --without-default-features which
> can be paired with a --without-default-devices for a barely functional
> build.
>
> The main use case for this is testing our build assumptions and for
> minimising the amount of stuff you build if you just want to test a
> particular feature on your relatively slow emulated test system.
>
> Signed-off-by: Alex Bennée 
> ---
>  configure | 161 ++
>  1 file changed, 89 insertions(+), 72 deletions(-)
>
> diff --git a/configure b/configure
> index 18c26e0389..23fa6f9421 100755
> --- a/configure
> +++ b/configure

> -oss_lib=""
> +oss_lib="$default_feature"


As oss_lib gets passed bare as the library to use this bit needs to be dropped.

-- 
Alex Bennée

[PATCH v3 02/10] hvf: Move common code out

2020-12-02 Thread Alexander Graf

Until now, Hypervisor.framework has only been available on x86_64 systems.
With Apple Silicon shipping now, it extends its reach to aarch64. To
prepare for support for multiple architectures, let's move common code out
into its own accel directory.

Signed-off-by: Alexander Graf 
---
 MAINTAINERS |   9 +-
 accel/hvf/hvf-all.c |  56 +
 accel/hvf/hvf-cpus.c| 464 
 accel/hvf/meson.build   |   7 +
 accel/meson.build   |   1 +
 include/sysemu/hvf_int.h|  69 ++
 target/i386/hvf/hvf-cpus.c  | 131 --
 target/i386/hvf/hvf-cpus.h  |  25 --
 target/i386/hvf/hvf-i386.h  |  48 +---
 target/i386/hvf/hvf.c   | 360 +---
 target/i386/hvf/meson.build |   1 -
 target/i386/hvf/x86hvf.c|  11 +-
 target/i386/hvf/x86hvf.h|   2 -
 13 files changed, 615 insertions(+), 569 deletions(-)
 create mode 100644 accel/hvf/hvf-all.c
 create mode 100644 accel/hvf/hvf-cpus.c
 create mode 100644 accel/hvf/meson.build
 create mode 100644 include/sysemu/hvf_int.h
 delete mode 100644 target/i386/hvf/hvf-cpus.c
 delete mode 100644 target/i386/hvf/hvf-cpus.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 68bc160f41..ca4b6d9279 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -444,9 +444,16 @@ M: Cameron Esfahani 
 M: Roman Bolshakov 
 W: https://wiki.qemu.org/Features/HVF
 S: Maintained
-F: accel/stubs/hvf-stub.c
 F: target/i386/hvf/
+
+HVF
+M: Cameron Esfahani 
+M: Roman Bolshakov 
+W: https://wiki.qemu.org/Features/HVF
+S: Maintained
+F: accel/hvf/
 F: include/sysemu/hvf.h
+F: include/sysemu/hvf_int.h
 
 WHPX CPUs
 M: Sunil Muthuswamy 
diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c
new file mode 100644
index 00..47d77a472a
--- /dev/null
+++ b/accel/hvf/hvf-all.c
@@ -0,0 +1,56 @@
+/*
+ * QEMU Hypervisor.framework support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
+#include "sysemu/runstate.h"
+
+#include "qemu/main-loop.h"
+#include "sysemu/accel.h"
+
+#include 
+
+bool hvf_allowed;
+HVFState *hvf_state;
+
+void assert_hvf_ok(hv_return_t ret)
+{
+if (ret == HV_SUCCESS) {
+return;
+}
+
+switch (ret) {
+case HV_ERROR:
+error_report("Error: HV_ERROR");
+break;
+case HV_BUSY:
+error_report("Error: HV_BUSY");
+break;
+case HV_BAD_ARGUMENT:
+error_report("Error: HV_BAD_ARGUMENT");
+break;
+case HV_NO_RESOURCES:
+error_report("Error: HV_NO_RESOURCES");
+break;
+case HV_NO_DEVICE:
+error_report("Error: HV_NO_DEVICE");
+break;
+case HV_UNSUPPORTED:
+error_report("Error: HV_UNSUPPORTED");
+break;
+default:
+error_report("Unknown Error");
+}
+
+abort();
+}
diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
new file mode 100644
index 00..4d1cca9d6e
--- /dev/null
+++ b/accel/hvf/hvf-cpus.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright 2008 IBM Corporation
+ *   2008 Red Hat, Inc.
+ * Copyright 2011 Intel Corporation
+ * Copyright 2016 Veertu, Inc.
+ * Copyright 2017 The Android Open Source Project
+ *
+ * QEMU Hypervisor.framework support
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ *
+ * This file contain code under public domain from the hvdos project:
+ * https://github.com/mist64/hvdos
+ *
+ * Parts Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF

[PATCH v3 03/10] hvf: Introduce hvf vcpu struct

2020-12-02 Thread Alexander Graf

We will need more than a single field for hvf going forward. To keep
the global vcpu struct uncluttered, let's allocate a special hvf vcpu
struct, similar to how hax does it.

Signed-off-by: Alexander Graf 
---
 accel/hvf/hvf-cpus.c|   8 +-
 include/hw/core/cpu.h   |   3 +-
 include/sysemu/hvf_int.h|   4 +
 target/i386/hvf/hvf.c   | 102 +-
 target/i386/hvf/vmx.h   |  24 +++--
 target/i386/hvf/x86.c   |  28 ++---
 target/i386/hvf/x86_descr.c |  26 ++---
 target/i386/hvf/x86_emu.c   |  62 +--
 target/i386/hvf/x86_mmu.c   |   4 +-
 target/i386/hvf/x86_task.c  |  12 +--
 target/i386/hvf/x86hvf.c| 210 ++--
 11 files changed, 247 insertions(+), 236 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 4d1cca9d6e..a423f629d5 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -314,10 +314,12 @@ static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
 
 static void hvf_vcpu_destroy(CPUState *cpu)
 {
-hv_return_t ret = hv_vcpu_destroy(cpu->hvf_fd);
+hv_return_t ret = hv_vcpu_destroy(cpu->hvf->fd);
 assert_hvf_ok(ret);
 
 hvf_arch_vcpu_destroy(cpu);
+free(cpu->hvf);
+cpu->hvf = NULL;
 }
 
 static void dummy_signal(int sig)
@@ -328,6 +330,8 @@ static int hvf_init_vcpu(CPUState *cpu)
 {
 int r;
 
+cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
+
 /* init cpu signals */
 sigset_t set;
 struct sigaction sigact;
@@ -339,7 +343,7 @@ static int hvf_init_vcpu(CPUState *cpu)
 pthread_sigmask(SIG_BLOCK, NULL, );
 sigdelset(, SIG_IPI);
 
-r = hv_vcpu_create((hv_vcpuid_t *)>hvf_fd, HV_VCPU_DEFAULT);
+r = hv_vcpu_create((hv_vcpuid_t *)>hvf->fd, HV_VCPU_DEFAULT);
 cpu->vcpu_dirty = 1;
 assert_hvf_ok(r);
 
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 3d92c967ff..6032d8a52c 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -280,6 +280,7 @@ struct KVMState;
 struct kvm_run;
 
 struct hax_vcpu_state;
+struct hvf_vcpu_state;
 
 #define TB_JMP_CACHE_BITS 12
 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
@@ -463,7 +464,7 @@ struct CPUState {
 
 struct hax_vcpu_state *hax_vcpu;
 
-int hvf_fd;
+struct hvf_vcpu_state *hvf;
 
 /* track IOMMUs whose translations we've cached in the TCG TLB */
 GArray *iommu_notifiers;
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index de9bad23a8..7967e33727 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -58,6 +58,10 @@ struct HVFState {
 };
 extern HVFState *hvf_state;
 
+struct hvf_vcpu_state {
+int fd;
+};
+
 void assert_hvf_ok(hv_return_t ret);
 int hvf_get_registers(CPUState *cpu);
 int hvf_put_registers(CPUState *cpu);
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 8b96ecd619..08b4adecd9 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -80,11 +80,11 @@ void vmx_update_tpr(CPUState *cpu)
 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 
-wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
+wreg(cpu->hvf->fd, HV_X86_TPR, tpr);
 if (irr == -1) {
-wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
+wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0);
 } else {
-wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
+wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
   irr >> 4);
 }
 }
@@ -92,7 +92,7 @@ void vmx_update_tpr(CPUState *cpu)
 static void update_apic_tpr(CPUState *cpu)
 {
 X86CPU *x86_cpu = X86_CPU(cpu);
-int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
+int tpr = rreg(cpu->hvf->fd, HV_X86_TPR) >> 4;
 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 }
 
@@ -194,43 +194,43 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 }
 
 /* set VMCS control fields */
-wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+wvmcs(cpu->hvf->fd, VMCS_PIN_BASED_CTLS,
   cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
   VMCS_PIN_BASED_CTLS_EXTINT |
   VMCS_PIN_BASED_CTLS_NMI |
   VMCS_PIN_BASED_CTLS_VNMI));
-wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
+wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS,
   cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
   VMCS_PRI_PROC_BASED_CTLS_HLT |
   VMCS_PRI_PROC_BASED_CTLS_MWAIT |
   VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
   VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
   VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
-wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
+wvmcs(cpu->hvf->fd, VMCS_SEC_PROC_BASED_CTLS,
   cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 
-wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 
cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
+wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS, 
cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
   0));
-

[PATCH v3 09/10] hvf: arm: Add support for GICv3

2020-12-02 Thread Alexander Graf

We currently only support GICv2 emulation. To also support GICv3, we will
need to pass a few system registers into their respective handler functions.

This patch adds handling for all of the required system registers, so that
we can run with more than 8 vCPUs.

Signed-off-by: Alexander Graf 
---
 target/arm/hvf/hvf.c | 141 +++
 1 file changed, 141 insertions(+)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 79aeeb237b..dfdf0827e4 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -23,6 +23,7 @@
 
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
+#include "hw/intc/gicv3_internal.h"
 #include "qemu/main-loop.h"
 #include "sysemu/accel.h"
 #include "sysemu/cpus.h"
@@ -47,6 +48,33 @@
 #define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 1, 14, 0)
 #define SYSREG_PMCCNTR_EL0SYSREG(3, 3, 0, 9, 13)
 
+#define SYSREG_ICC_AP0R0_EL1 SYSREG(3, 0, 4, 12, 8)
+#define SYSREG_ICC_AP0R1_EL1 SYSREG(3, 0, 5, 12, 8)
+#define SYSREG_ICC_AP0R2_EL1 SYSREG(3, 0, 6, 12, 8)
+#define SYSREG_ICC_AP0R3_EL1 SYSREG(3, 0, 7, 12, 8)
+#define SYSREG_ICC_AP1R0_EL1 SYSREG(3, 0, 0, 12, 9)
+#define SYSREG_ICC_AP1R1_EL1 SYSREG(3, 0, 1, 12, 9)
+#define SYSREG_ICC_AP1R2_EL1 SYSREG(3, 0, 2, 12, 9)
+#define SYSREG_ICC_AP1R3_EL1 SYSREG(3, 0, 3, 12, 9)
+#define SYSREG_ICC_ASGI1R_EL1SYSREG(3, 0, 6, 12, 11)
+#define SYSREG_ICC_BPR0_EL1  SYSREG(3, 0, 3, 12, 8)
+#define SYSREG_ICC_BPR1_EL1  SYSREG(3, 0, 3, 12, 12)
+#define SYSREG_ICC_CTLR_EL1  SYSREG(3, 0, 4, 12, 12)
+#define SYSREG_ICC_DIR_EL1   SYSREG(3, 0, 1, 12, 11)
+#define SYSREG_ICC_EOIR0_EL1 SYSREG(3, 0, 1, 12, 8)
+#define SYSREG_ICC_EOIR1_EL1 SYSREG(3, 0, 1, 12, 12)
+#define SYSREG_ICC_HPPIR0_EL1SYSREG(3, 0, 2, 12, 8)
+#define SYSREG_ICC_HPPIR1_EL1SYSREG(3, 0, 2, 12, 12)
+#define SYSREG_ICC_IAR0_EL1  SYSREG(3, 0, 0, 12, 8)
+#define SYSREG_ICC_IAR1_EL1  SYSREG(3, 0, 0, 12, 12)
+#define SYSREG_ICC_IGRPEN0_EL1   SYSREG(3, 0, 6, 12, 12)
+#define SYSREG_ICC_IGRPEN1_EL1   SYSREG(3, 0, 7, 12, 12)
+#define SYSREG_ICC_PMR_EL1   SYSREG(3, 0, 0, 4, 6)
+#define SYSREG_ICC_RPR_EL1   SYSREG(3, 0, 3, 12, 11)
+#define SYSREG_ICC_SGI0R_EL1 SYSREG(3, 0, 7, 12, 11)
+#define SYSREG_ICC_SGI1R_EL1 SYSREG(3, 0, 5, 12, 11)
+#define SYSREG_ICC_SRE_EL1   SYSREG(3, 0, 5, 12, 12)
+
 #define WFX_IS_WFE (1 << 0)
 
 struct hvf_reg_match {
@@ -419,6 +447,38 @@ void hvf_kick_vcpu_thread(CPUState *cpu)
 hv_vcpus_exit(>hvf->fd, 1);
 }
 
+static uint32_t hvf_reg2cp_reg(uint32_t reg)
+{
+return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
+  (reg >> 10) & 0xf,
+  (reg >> 1) & 0xf,
+  (reg >> 20) & 0x3,
+  (reg >> 14) & 0x7,
+  (reg >> 17) & 0x7);
+}
+
+static uint64_t hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg)
+{
+ARMCPU *arm_cpu = ARM_CPU(cpu);
+CPUARMState *env = _cpu->env;
+const ARMCPRegInfo *ri;
+uint64_t val = 0;
+
+ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
+if (ri) {
+if (ri->type & ARM_CP_CONST) {
+val = ri->resetvalue;
+} else if (ri->readfn) {
+val = ri->readfn(env, ri);
+} else {
+val = CPREG_FIELD64(env, ri);
+}
+DPRINTF("vgic read from %s [val=%016llx]", ri->name, val);
+}
+
+return val;
+}
+
 static uint64_t hvf_sysreg_read(CPUState *cpu, uint32_t reg)
 {
 ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -432,6 +492,39 @@ static uint64_t hvf_sysreg_read(CPUState *cpu, uint32_t 
reg)
 case SYSREG_PMCCNTR_EL0:
 val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 break;
+case SYSREG_ICC_AP0R0_EL1:
+case SYSREG_ICC_AP0R1_EL1:
+case SYSREG_ICC_AP0R2_EL1:
+case SYSREG_ICC_AP0R3_EL1:
+case SYSREG_ICC_AP1R0_EL1:
+case SYSREG_ICC_AP1R1_EL1:
+case SYSREG_ICC_AP1R2_EL1:
+case SYSREG_ICC_AP1R3_EL1:
+case SYSREG_ICC_ASGI1R_EL1:
+case SYSREG_ICC_BPR0_EL1:
+case SYSREG_ICC_BPR1_EL1:
+case SYSREG_ICC_DIR_EL1:
+case SYSREG_ICC_EOIR0_EL1:
+case SYSREG_ICC_EOIR1_EL1:
+case SYSREG_ICC_HPPIR0_EL1:
+case SYSREG_ICC_HPPIR1_EL1:
+case SYSREG_ICC_IAR0_EL1:
+case SYSREG_ICC_IAR1_EL1:
+case SYSREG_ICC_IGRPEN0_EL1:
+case SYSREG_ICC_IGRPEN1_EL1:
+case SYSREG_ICC_PMR_EL1:
+case SYSREG_ICC_SGI0R_EL1:
+case SYSREG_ICC_SGI1R_EL1:
+case SYSREG_ICC_SRE_EL1:
+val = hvf_sysreg_read_cp(cpu, reg);
+break;
+case SYSREG_ICC_CTLR_EL1:
+val = hvf_sysreg_read_cp(cpu, reg);
+
+/* AP0R registers above 0 don't trap, expose less PRIs to fit */
+val &= ~ICC_CTLR_EL1_PRIBITS_MASK;
+val |= 4 << ICC_CTLR_EL1_PRIBITS_SHIFT;
+break;
 default:
 DPRINTF("unhandled sysreg read %08x (op0=%d op1=%d op2=%d "
 "crn=%d crm=%d)", reg, (reg >> 20) & 0x3,

[PATCH v3 00/10] hvf: Implement Apple Silicon Support

2020-12-02 Thread Alexander Graf

Now that Apple Silicon is widely available, people are obviously excited
to try and run virtualized workloads on them, such as Linux and Windows.

This patch set implements a fully functional version to get the ball
going on that. With this applied, I can successfully run both Linux and
Windows as guests. I am not aware of any limitations specific to
Hypervisor.framework apart from:

  - Live migration / savevm
  - gdbstub debugging (SP register)


Enjoy!

Alex

v1 -> v2:

  - New patch: hvf: Actually set SIG_IPI mask
  - New patch: hvf: Introduce hvf vcpu struct
  - New patch: hvf: arm: Mark CPU as dirty on reset
  - Removed patch: hw/arm/virt: Disable highmem when on hypervisor.framework
  - Removed patch: arm: Synchronize CPU on PSCI on
  - Fix build on 32bit arm
  - Merge vcpu kick function patch into ARM enablement
  - Implement WFI handling (allows vCPUs to sleep)
  - Synchronize system registers (fixes OVMF crashes and reboot)
  - Don't always call cpu_synchronize_state()
  - Use more fine grained iothread locking
  - Populate aa64mmfr0 from hardware
  - Make safe to ctrl-C entitlement application

v2 -> v3:

  - Removed patch: hvf: Actually set SIG_IPI mask
  - New patch: hvf: arm: Add support for GICv3
  - New patch: hvf: arm: Implement -cpu host
  - Advance PC on SMC
  - Use cp list interface for sysreg syncs
  - Do not set current_cpu
  - Fix sysreg isread mask
  - Move sysreg handling to functions
  - Remove WFI logic again
  - Revert to global iothread locking

Alexander Graf (9):
  hvf: Add hypervisor entitlement to output binaries
  hvf: Move common code out
  hvf: Introduce hvf vcpu struct
  arm: Set PSCI to 0.2 for HVF
  hvf: arm: Mark CPU as dirty on reset
  hvf: Add Apple Silicon support
  arm: Add Hypervisor.framework build target
  hvf: arm: Add support for GICv3
  hvf: arm: Implement -cpu host

Peter Collingbourne (1):
  arm/hvf: Add a WFI handler

 MAINTAINERS  |  14 +-
 accel/hvf/entitlements.plist |   8 +
 accel/hvf/hvf-all.c  |  56 +++
 accel/hvf/hvf-cpus.c | 481 
 accel/hvf/meson.build|   7 +
 accel/meson.build|   1 +
 include/hw/core/cpu.h|   3 +-
 include/sysemu/hvf.h |   2 +
 include/sysemu/hvf_int.h |  77 
 meson.build  |  41 +-
 scripts/entitlement.sh   |  13 +
 target/arm/arm-powerctl.c|   1 +
 target/arm/cpu.c |  15 +-
 target/arm/cpu.h |   2 +
 target/arm/hvf/hvf.c | 857 +++
 target/arm/hvf/meson.build   |   3 +
 target/arm/kvm_arm.h |   2 -
 target/arm/meson.build   |   2 +
 target/i386/hvf/hvf-cpus.c   | 131 --
 target/i386/hvf/hvf-cpus.h   |  25 -
 target/i386/hvf/hvf-i386.h   |  48 +-
 target/i386/hvf/hvf.c| 462 +++
 target/i386/hvf/meson.build  |   1 -
 target/i386/hvf/vmx.h|  24 +-
 target/i386/hvf/x86.c|  28 +-
 target/i386/hvf/x86_descr.c  |  26 +-
 target/i386/hvf/x86_emu.c|  62 +--
 target/i386/hvf/x86_mmu.c|   4 +-
 target/i386/hvf/x86_task.c   |  12 +-
 target/i386/hvf/x86hvf.c | 221 -
 target/i386/hvf/x86hvf.h |   2 -
 31 files changed, 1818 insertions(+), 813 deletions(-)
 create mode 100644 accel/hvf/entitlements.plist
 create mode 100644 accel/hvf/hvf-all.c
 create mode 100644 accel/hvf/hvf-cpus.c
 create mode 100644 accel/hvf/meson.build
 create mode 100644 include/sysemu/hvf_int.h
 create mode 100755 scripts/entitlement.sh
 create mode 100644 target/arm/hvf/hvf.c
 create mode 100644 target/arm/hvf/meson.build
 delete mode 100644 target/i386/hvf/hvf-cpus.c
 delete mode 100644 target/i386/hvf/hvf-cpus.h

-- 
2.24.3 (Apple Git-128)

[PATCH v3 01/10] hvf: Add hypervisor entitlement to output binaries

2020-12-02 Thread Alexander Graf

In macOS 11, QEMU only gets access to Hypervisor.framework if it has the
respective entitlement. Add an entitlement template and automatically self
sign and apply the entitlement in the build.

Signed-off-by: Alexander Graf 

---

v1 -> v2:

  - Make safe to ctrl-C
---
 accel/hvf/entitlements.plist |  8 
 meson.build  | 30 ++
 scripts/entitlement.sh   | 13 +
 3 files changed, 47 insertions(+), 4 deletions(-)
 create mode 100644 accel/hvf/entitlements.plist
 create mode 100755 scripts/entitlement.sh

diff --git a/accel/hvf/entitlements.plist b/accel/hvf/entitlements.plist
new file mode 100644
index 00..154f3308ef
--- /dev/null
+++ b/accel/hvf/entitlements.plist
@@ -0,0 +1,8 @@
+
+http://www.apple.com/DTDs/PropertyList-1.0.dtd;>
+
+
+com.apple.security.hypervisor
+
+
+
diff --git a/meson.build b/meson.build
index 5062407c70..2a7ff5560c 100644
--- a/meson.build
+++ b/meson.build
@@ -1844,9 +1844,14 @@ foreach target : target_dirs
 }]
   endif
   foreach exe: execs
-emulators += {exe['name']:
- executable(exe['name'], exe['sources'],
-   install: true,
+exe_name = exe['name']
+exe_sign = 'CONFIG_HVF' in config_target
+if exe_sign
+  exe_name += '-unsigned'
+endif
+
+emulator = executable(exe_name, exe['sources'],
+   install: not exe_sign,
c_args: c_args,
dependencies: arch_deps + deps + exe['dependencies'],
objects: lib.extract_all_objects(recursive: true),
@@ -1854,7 +1859,24 @@ foreach target : target_dirs
link_depends: [block_syms, qemu_syms] + exe.get('link_depends', 
[]),
link_args: link_args,
gui_app: exe['gui'])
-}
+
+if exe_sign
+  exe_full = meson.current_build_dir() / exe['name']
+  emulators += {exe['name'] : custom_target(exe['name'],
+   install: true,
+   install_dir: get_option('bindir'),
+   depends: emulator,
+   output: exe['name'],
+   command: [
+ meson.current_source_dir() / 'scripts/entitlement.sh',
+ meson.current_build_dir() / exe['name'] + '-unsigned',
+ meson.current_build_dir() / exe['name'],
+ meson.current_source_dir() / 
'accel/hvf/entitlements.plist'
+   ])
+  }
+else
+  emulators += {exe['name']: emulator}
+endif
 
 if 'CONFIG_TRACE_SYSTEMTAP' in config_host
   foreach stp: [
diff --git a/scripts/entitlement.sh b/scripts/entitlement.sh
new file mode 100755
index 00..c540fa6435
--- /dev/null
+++ b/scripts/entitlement.sh
@@ -0,0 +1,13 @@
+#!/bin/sh -e
+#
+# Helper script for the build process to apply entitlements
+
+SRC="$1"
+DST="$2"
+ENTITLEMENT="$3"
+
+trap 'rm "$DST.tmp"' exit
+cp -af "$SRC" "$DST.tmp"
+codesign --entitlements "$ENTITLEMENT" --force -s - "$DST.tmp"
+mv "$DST.tmp" "$DST"
+trap '' exit
-- 
2.24.3 (Apple Git-128)

[DISCUSSION] How to set properties of non-pluggable devices?

2020-12-02 Thread Doug Evans

Hi.

Suppose I want to set a property of a non-pluggable device that cannot be
set after the device has been realized (e.g., I can't use qmp to set the
property after QEMU has started).
Being non-pluggable means I can't use "-device foo,bar=baz" on the command
line.
[But I can use "-device foo,help" to list its properties :-)  (if I also
specify -M bar) ]

How do people do this?

The device is part of a "machine" (board really), so I could add the
property to the machine to be passed on to the device when it's realized
(at least I think I can), but that doesn't feel right: The machine has lots
of devices -> it feels cleaner to associate the property with the device
and not the machine (lest the machine over time collect a myriad of random
properties to pass on to its devices). Things get a little complicated
because the machine can have multiple copies of a device: specifying the
device's name is insufficient.

The device has an object path: /machine/foo/bar/device[0]. There's also
/.../device[1].
IWBN to be able to do something along the lines of:
-device-property /device/path[,PROP1=VALUE1,...]
copying the syntax used for "-object".

It's perhaps even nicer if this could be accomplished with -device:
avoiding further confusion on what -device can and can't be used for (e.g.,
can I use -device-property to set a property that could also be set with
-device?).

If what I'm asking for is reasonable and isn't doable today (I'm certainly
willing to believe I'm missing something), I'm happy to work on the patch
(with some guidance as to what would be acceptable).

One thought that comes to mind is to use -object, store the properties
there, and have the machine collect them from there when realizing its
devices. Or is that an abuse of -object ?

Re: [PATCH v3 3/3] arm/hvf: Add a WFI handler

2020-12-02 Thread Alexander Graf




On 02.12.20 05:44, Peter Collingbourne wrote:

Sleep on WFI until the VTIMER is due but allow ourselves to be woken
up on IPI.

Signed-off-by: Peter Collingbourne 
---
v3:
- move the simplified locking to a separate patch
- spin on sleep <2ms

v2:
- simplify locking further
- wait indefinitely on disabled or masked timers

  accel/hvf/hvf-cpus.c |  4 +--
  include/sysemu/hvf_int.h |  1 +
  target/arm/hvf/hvf.c | 56 
  3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index e613c22ad0..b2c8fb57f6 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -344,8 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
  sigact.sa_handler = dummy_signal;
  sigaction(SIG_IPI, , NULL);
  
-pthread_sigmask(SIG_BLOCK, NULL, );

-sigdelset(, SIG_IPI);
+pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
+sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);



That turns set into an unused variable, no? I'll fix it up while 
applying though. The rest looks great, I'll push it as part of my next 
patch set.



Alex

[PATCH 8/9] target/mips: Remove CPUMIPSState* argument from gen_msa*() methods

2020-12-02 Thread Philippe Mathieu-Daudé

The gen_msa*() methods don't use the "CPUMIPSState *env"
argument. Remove it to simplify.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/translate.c | 57 -
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index a5112acc351..5311e6ced62 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -28744,7 +28744,7 @@ static void gen_check_zero_element(TCGv tresult, 
uint8_t df, uint8_t wt)
 tcg_temp_free_i64(t1);
 }
 
-static void gen_msa_branch(CPUMIPSState *env, DisasContext *ctx, uint32_t op1)
+static void gen_msa_branch(DisasContext *ctx, uint32_t op1)
 {
 uint8_t df = (ctx->opcode >> 21) & 0x3;
 uint8_t wt = (ctx->opcode >> 16) & 0x1f;
@@ -28789,7 +28789,7 @@ static void gen_msa_branch(CPUMIPSState *env, 
DisasContext *ctx, uint32_t op1)
 ctx->hflags |= MIPS_HFLAG_BDS32;
 }
 
-static void gen_msa_i8(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_i8(DisasContext *ctx)
 {
 #define MASK_MSA_I8(op)(MASK_MSA_MINOR(op) | (op & (0x03 << 24)))
 uint8_t i8 = (ctx->opcode >> 16) & 0xff;
@@ -28847,7 +28847,7 @@ static void gen_msa_i8(CPUMIPSState *env, DisasContext 
*ctx)
 tcg_temp_free_i32(ti8);
 }
 
-static void gen_msa_i5(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_i5(DisasContext *ctx)
 {
 #define MASK_MSA_I5(op)(MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
 uint8_t df = (ctx->opcode >> 21) & 0x3;
@@ -28920,7 +28920,7 @@ static void gen_msa_i5(CPUMIPSState *env, DisasContext 
*ctx)
 tcg_temp_free_i32(timm);
 }
 
-static void gen_msa_bit(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_bit(DisasContext *ctx)
 {
 #define MASK_MSA_BIT(op)(MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
 uint8_t dfm = (ctx->opcode >> 16) & 0x7f;
@@ -29004,7 +29004,7 @@ static void gen_msa_bit(CPUMIPSState *env, DisasContext 
*ctx)
 tcg_temp_free_i32(tws);
 }
 
-static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_3r(DisasContext *ctx)
 {
 #define MASK_MSA_3R(op)(MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
 uint8_t df = (ctx->opcode >> 21) & 0x3;
@@ -29986,7 +29986,7 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext 
*ctx)
 tcg_temp_free_i32(tdf);
 }
 
-static void gen_msa_elm_3e(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_elm_3e(DisasContext *ctx)
 {
 #define MASK_MSA_ELM_DF3E(op)   (MASK_MSA_MINOR(op) | (op & (0x3FF << 16)))
 uint8_t source = (ctx->opcode >> 11) & 0x1f;
@@ -30018,8 +30018,7 @@ static void gen_msa_elm_3e(CPUMIPSState *env, 
DisasContext *ctx)
 tcg_temp_free_i32(tsr);
 }
 
-static void gen_msa_elm_df(CPUMIPSState *env, DisasContext *ctx, uint32_t df,
-uint32_t n)
+static void gen_msa_elm_df(DisasContext *ctx, uint32_t df, uint32_t n)
 {
 #define MASK_MSA_ELM(op)(MASK_MSA_MINOR(op) | (op & (0xf << 22)))
 uint8_t ws = (ctx->opcode >> 11) & 0x1f;
@@ -30129,7 +30128,7 @@ static void gen_msa_elm_df(CPUMIPSState *env, 
DisasContext *ctx, uint32_t df,
 tcg_temp_free_i32(tdf);
 }
 
-static void gen_msa_elm(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_elm(DisasContext *ctx)
 {
 uint8_t dfn = (ctx->opcode >> 16) & 0x3f;
 uint32_t df = 0, n = 0;
@@ -30148,17 +30147,17 @@ static void gen_msa_elm(CPUMIPSState *env, 
DisasContext *ctx)
 df = DF_DOUBLE;
 } else if (dfn == 0x3E) {
 /* CTCMSA, CFCMSA, MOVE.V */
-gen_msa_elm_3e(env, ctx);
+gen_msa_elm_3e(ctx);
 return;
 } else {
 generate_exception_end(ctx, EXCP_RI);
 return;
 }
 
-gen_msa_elm_df(env, ctx, df, n);
+gen_msa_elm_df(ctx, df, n);
 }
 
-static void gen_msa_3rf(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_3rf(DisasContext *ctx)
 {
 #define MASK_MSA_3RF(op)(MASK_MSA_MINOR(op) | (op & (0xf << 22)))
 uint8_t df = (ctx->opcode >> 21) & 0x1;
@@ -30316,7 +30315,7 @@ static void gen_msa_3rf(CPUMIPSState *env, DisasContext 
*ctx)
 tcg_temp_free_i32(tdf);
 }
 
-static void gen_msa_2r(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_2r(DisasContext *ctx)
 {
 #define MASK_MSA_2R(op) (MASK_MSA_MINOR(op) | (op & (0x1f << 21)) | \
 (op & (0x7 << 18)))
@@ -30400,7 +30399,7 @@ static void gen_msa_2r(CPUMIPSState *env, DisasContext 
*ctx)
 tcg_temp_free_i32(tdf);
 }
 
-static void gen_msa_2rf(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_2rf(DisasContext *ctx)
 {
 #define MASK_MSA_2RF(op)(MASK_MSA_MINOR(op) | (op & (0x1f << 21)) | \
 (op & (0xf << 17)))
@@ -30471,7 +30470,7 @@ static void gen_msa_2rf(CPUMIPSState *env, DisasContext 
*ctx)
 tcg_temp_free_i32(tdf);
 }
 
-static void gen_msa_vec_v(CPUMIPSState *env, DisasContext *ctx)
+static void gen_msa_vec_v(DisasContext *ctx)
 {
 #define MASK_MSA_VEC(op)(MASK_MSA_MINOR(op) | (op & (0x1f << 21)))
 uint8_t wt = (ctx->opcode >>

[PATCH 7/9] target/mips: Extract msa_translate_init() from mips_tcg_init()

2020-12-02 Thread Philippe Mathieu-Daudé

Extract the logic initialization of the MSA registers from
the generic initialization.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/translate.c | 35 ---
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 41880f21abd..a5112acc351 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -31672,6 +31672,24 @@ void mips_cpu_dump_state(CPUState *cs, FILE *f, int 
flags)
 }
 }
 
+static void msa_translate_init(void)
+{
+int i;
+
+for (i = 0; i < 32; i++) {
+int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
+
+/*
+ * The MSA vector registers are mapped on the
+ * scalar floating-point unit (FPU) registers.
+ */
+msa_wr_d[i * 2] = fpu_f64[i];
+off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[1]);
+msa_wr_d[i * 2 + 1] =
+tcg_global_mem_new_i64(cpu_env, off, msaregnames[i * 2 + 1]);
+}
+}
+
 void mips_tcg_init(void)
 {
 int i;
@@ -31685,22 +31703,9 @@ void mips_tcg_init(void)
 for (i = 0; i < 32; i++) {
 int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
 
-fpu_f64[i] = tcg_global_mem_new_i64(cpu_env, off, msaregnames[i * 2]);
+fpu_f64[i] = tcg_global_mem_new_i64(cpu_env, off, fregnames[i]);
 }
-/* MSA */
-for (i = 0; i < 32; i++) {
-int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
-
-/*
- * The MSA vector registers are mapped on the
- * scalar floating-point unit (FPU) registers.
- */
-msa_wr_d[i * 2] = fpu_f64[i];
-off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[1]);
-msa_wr_d[i * 2 + 1] =
-tcg_global_mem_new_i64(cpu_env, off, msaregnames[i * 2 + 1]);
-}
-
+msa_translate_init();
 cpu_PC = tcg_global_mem_new(cpu_env,
 offsetof(CPUMIPSState, active_tc.PC), "PC");
 for (i = 0; i < MIPS_DSP_ACC; i++) {
-- 
2.26.2

[PATCH 6/9] target/mips: Alias MSA vector registers on FPU scalar registers

2020-12-02 Thread Philippe Mathieu-Daudé

Commits 863f264d10f ("add msa_reset(), global msa register") and
cb269f273fd ("fix multiple TCG registers covering same data")
removed the FPU scalar registers and replaced them by aliases to
the MSA vector registers.
While this might be the case for CPU implementing MSA, this makes
QEMU code incoherent for CPU not implementing it. It is simpler
to inverse the logic and alias the MSA vector registers on the
FPU scalar ones.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/translate.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index a05c25e50b8..41880f21abd 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -31682,16 +31682,20 @@ void mips_tcg_init(void)
 offsetof(CPUMIPSState,
  active_tc.gpr[i]),
 regnames[i]);
-
 for (i = 0; i < 32; i++) {
 int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
-msa_wr_d[i * 2] =
-tcg_global_mem_new_i64(cpu_env, off, msaregnames[i * 2]);
+
+fpu_f64[i] = tcg_global_mem_new_i64(cpu_env, off, msaregnames[i * 2]);
+}
+/* MSA */
+for (i = 0; i < 32; i++) {
+int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
+
 /*
- * The scalar floating-point unit (FPU) registers are mapped on
- * the MSA vector registers.
+ * The MSA vector registers are mapped on the
+ * scalar floating-point unit (FPU) registers.
  */
-fpu_f64[i] = msa_wr_d[i * 2];
+msa_wr_d[i * 2] = fpu_f64[i];
 off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[1]);
 msa_wr_d[i * 2 + 1] =
 tcg_global_mem_new_i64(cpu_env, off, msaregnames[i * 2 + 1]);
-- 
2.26.2

[PATCH 4/9] target/mips: Simplify MSA TCG logic

2020-12-02 Thread Philippe Mathieu-Daudé

Only decode MSA opcodes if MSA is present (implemented).

Now than check_msa_access() will only be called if MSA is
present, the only way to have MIPS_HFLAG_MSA unset is if
MSA is disabled (bit CP0C5_MSAEn cleared, see previous
commit). Therefore we can remove the 'reserved instruction'
exception.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/translate.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 803ffefba2c..a05c25e50b8 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -28697,13 +28697,8 @@ static inline int check_msa_access(DisasContext *ctx)
 }
 
 if (unlikely(!(ctx->hflags & MIPS_HFLAG_MSA))) {
-if (ctx->insn_flags & ASE_MSA) {
-generate_exception_end(ctx, EXCP_MSADIS);
-return 0;
-} else {
-generate_exception_end(ctx, EXCP_RI);
-return 0;
-}
+generate_exception_end(ctx, EXCP_MSADIS);
+return 0;
 }
 return 1;
 }
@@ -30547,7 +30542,7 @@ static void gen_msa_vec(CPUMIPSState *env, DisasContext 
*ctx)
 static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opcode = ctx->opcode;
-check_insn(ctx, ASE_MSA);
+
 check_msa_access(ctx);
 
 switch (MASK_MSA_MINOR(opcode)) {
@@ -31194,9 +31189,10 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 case OPC_BNZ_H:
 case OPC_BNZ_W:
 case OPC_BNZ_D:
-check_insn(ctx, ASE_MSA);
-gen_msa_branch(env, ctx, op1);
-break;
+if (ase_msa_available(env)) {
+gen_msa_branch(env, ctx, op1);
+break;
+}
 default:
 MIPS_INVAL("cp1");
 generate_exception_end(ctx, EXCP_RI);
@@ -31385,7 +31381,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 #endif
 } else {
 /* MDMX: Not implemented. */
-gen_msa(env, ctx);
+if (ase_msa_available(env)) {
+gen_msa(env, ctx);
+}
 }
 break;
 case OPC_PCREL:
-- 
2.26.2

[PATCH 5/9] target/mips: Remove now unused ASE_MSA definition

2020-12-02 Thread Philippe Mathieu-Daudé

We don't use ASE_MSA anymore (replaced by ase_msa_available()
checking MSAP bit from CP0_Config3). Remove it.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/mips-defs.h  | 1 -
 target/mips/translate_init.c.inc | 8 
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/target/mips/mips-defs.h b/target/mips/mips-defs.h
index ed6a7a9e545..805034b8956 100644
--- a/target/mips/mips-defs.h
+++ b/target/mips/mips-defs.h
@@ -45,7 +45,6 @@
 #define ASE_MT0x4000ULL
 #define ASE_SMARTMIPS 0x8000ULL
 #define ASE_MICROMIPS 0x0001ULL
-#define ASE_MSA   0x0002ULL
 /*
  *   bits 40-51: vendor-specific base instruction sets
  */
diff --git a/target/mips/translate_init.c.inc b/target/mips/translate_init.c.inc
index 3b069190ed8..2170f8ace6f 100644
--- a/target/mips/translate_init.c.inc
+++ b/target/mips/translate_init.c.inc
@@ -408,7 +408,7 @@ const mips_def_t mips_defs[] =
 .CP1_fcr31_rw_bitmask = 0xFF83,
 .SEGBITS = 32,
 .PABITS = 40,
-.insn_flags = CPU_MIPS32R5 | ASE_MSA,
+.insn_flags = CPU_MIPS32R5,
 .mmu_type = MMU_TYPE_R4000,
 },
 {
@@ -719,7 +719,7 @@ const mips_def_t mips_defs[] =
 .MSAIR = 0x03 << MSAIR_ProcID,
 .SEGBITS = 48,
 .PABITS = 48,
-.insn_flags = CPU_MIPS64R6 | ASE_MSA,
+.insn_flags = CPU_MIPS64R6,
 .mmu_type = MMU_TYPE_R4000,
 },
 {
@@ -759,7 +759,7 @@ const mips_def_t mips_defs[] =
 .MSAIR = 0x03 << MSAIR_ProcID,
 .SEGBITS = 48,
 .PABITS = 48,
-.insn_flags = CPU_MIPS64R6 | ASE_MSA,
+.insn_flags = CPU_MIPS64R6,
 .mmu_type = MMU_TYPE_R4000,
 },
 {
@@ -885,7 +885,7 @@ const mips_def_t mips_defs[] =
 .CP1_fcr31_rw_bitmask = 0xFF83,
 .SEGBITS = 48,
 .PABITS = 48,
-.insn_flags = CPU_LOONGSON3A | ASE_MSA,
+.insn_flags = CPU_LOONGSON3A,
 .mmu_type = MMU_TYPE_R4000,
 },
 {
-- 
2.26.2

[PATCH 3/9] target/mips: Use CP0_Config3 to set MIPS_HFLAG_MSA

2020-12-02 Thread Philippe Mathieu-Daudé

MSA presence is expressed by the MSAP bit of CP0_Config3.
We don't need to check anything else.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/mips/internal.h b/target/mips/internal.h
index f882ac1580c..95cbd314018 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -433,7 +433,7 @@ static inline void compute_hflags(CPUMIPSState *env)
 env->hflags |= MIPS_HFLAG_COP1X;
 }
 }
-if (env->insn_flags & ASE_MSA) {
+if (ase_msa_available(env)) {
 if (env->CP0_Config5 & (1 << CP0C5_MSAEn)) {
 env->hflags |= MIPS_HFLAG_MSA;
 }
-- 
2.26.2

[PATCH 9/9] target/mips: Explode gen_msa_branch() as gen_msa_BxZ_V/BxZ()

2020-12-02 Thread Philippe Mathieu-Daudé

In preparation of using the decodetree script, explode
gen_msa_branch() as following:

- OPC_BZ_V  -> BxZ_V(EQ)
- OPC_BNZ_V -> BxZ_V(NE)
- OPC_BZ_[BHWD] -> BxZ(false)
- OPC_BNZ_[BHWD]-> BxZ(true)

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/translate.c | 71 -
 1 file changed, 49 insertions(+), 22 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 5311e6ced62..8a35d4d0d03 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -28744,49 +28744,76 @@ static void gen_check_zero_element(TCGv tresult, 
uint8_t df, uint8_t wt)
 tcg_temp_free_i64(t1);
 }
 
+static bool gen_msa_BxZ_V(DisasContext *ctx, int wt, int s16, TCGCond cond)
+{
+TCGv_i64 t0;
+
+check_msa_access(ctx);
+
+if (ctx->hflags & MIPS_HFLAG_BMASK) {
+generate_exception_end(ctx, EXCP_RI);
+return true;
+}
+t0 = tcg_temp_new_i64();
+tcg_gen_or_i64(t0, msa_wr_d[wt << 1], msa_wr_d[(wt << 1) + 1]);
+tcg_gen_setcondi_i64(cond, t0, t0, 0);
+tcg_gen_trunc_i64_tl(bcond, t0);
+tcg_temp_free_i64(t0);
+
+ctx->btarget = ctx->base.pc_next + (s16 << 2) + 4;
+
+ctx->hflags |= MIPS_HFLAG_BC;
+ctx->hflags |= MIPS_HFLAG_BDS32;
+
+return true;
+}
+
+static bool gen_msa_BxZ(DisasContext *ctx, int df, int wt, int s16, bool 
if_not)
+{
+check_msa_access(ctx);
+
+if (ctx->hflags & MIPS_HFLAG_BMASK) {
+generate_exception_end(ctx, EXCP_RI);
+return true;
+}
+
+gen_check_zero_element(bcond, df, wt);
+if (if_not) {
+tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, bcond, 0);
+}
+
+ctx->btarget = ctx->base.pc_next + (s16 << 2) + 4;
+ctx->hflags |= MIPS_HFLAG_BC;
+ctx->hflags |= MIPS_HFLAG_BDS32;
+
+return true;
+}
+
 static void gen_msa_branch(DisasContext *ctx, uint32_t op1)
 {
 uint8_t df = (ctx->opcode >> 21) & 0x3;
 uint8_t wt = (ctx->opcode >> 16) & 0x1f;
 int64_t s16 = (int16_t)ctx->opcode;
 
-check_msa_access(ctx);
-
-if (ctx->hflags & MIPS_HFLAG_BMASK) {
-generate_exception_end(ctx, EXCP_RI);
-return;
-}
 switch (op1) {
 case OPC_BZ_V:
 case OPC_BNZ_V:
-{
-TCGv_i64 t0 = tcg_temp_new_i64();
-tcg_gen_or_i64(t0, msa_wr_d[wt << 1], msa_wr_d[(wt << 1) + 1]);
-tcg_gen_setcondi_i64((op1 == OPC_BZ_V) ?
-TCG_COND_EQ : TCG_COND_NE, t0, t0, 0);
-tcg_gen_trunc_i64_tl(bcond, t0);
-tcg_temp_free_i64(t0);
-}
+gen_msa_BxZ_V(ctx, wt, s16, (op1 == OPC_BZ_V) ?
+TCG_COND_EQ : TCG_COND_NE);
 break;
 case OPC_BZ_B:
 case OPC_BZ_H:
 case OPC_BZ_W:
 case OPC_BZ_D:
-gen_check_zero_element(bcond, df, wt);
+gen_msa_BxZ(ctx, df, wt, s16, false);
 break;
 case OPC_BNZ_B:
 case OPC_BNZ_H:
 case OPC_BNZ_W:
 case OPC_BNZ_D:
-gen_check_zero_element(bcond, df, wt);
-tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, bcond, 0);
+gen_msa_BxZ(ctx, df, wt, s16, true);
 break;
 }
-
-ctx->btarget = ctx->base.pc_next + (s16 << 2) + 4;
-
-ctx->hflags |= MIPS_HFLAG_BC;
-ctx->hflags |= MIPS_HFLAG_BDS32;
 }
 
 static void gen_msa_i8(DisasContext *ctx)
-- 
2.26.2

[PATCH 2/9] target/mips: Simplify msa_reset()

2020-12-02 Thread Philippe Mathieu-Daudé

Call msa_reset() inconditionally, but only reset
the MSA registers if MSA is implemented.

Signed-off-by: Philippe Mathieu-Daudé 
---
Maybe not very useful.
---
 target/mips/translate.c  | 5 +
 target/mips/translate_init.c.inc | 4 
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index a7c01c2ea5b..803ffefba2c 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -31997,10 +31997,7 @@ void cpu_state_reset(CPUMIPSState *env)
 env->hflags |= MIPS_HFLAG_M16;
 }
 
-/* MSA */
-if (ase_msa_available(env)) {
-msa_reset(env);
-}
+msa_reset(env);
 
 compute_hflags(env);
 restore_fp_status(env);
diff --git a/target/mips/translate_init.c.inc b/target/mips/translate_init.c.inc
index 79f75ed863c..3b069190ed8 100644
--- a/target/mips/translate_init.c.inc
+++ b/target/mips/translate_init.c.inc
@@ -1018,6 +1018,10 @@ static void mvp_init (CPUMIPSState *env, const 
mips_def_t *def)
 
 static void msa_reset(CPUMIPSState *env)
 {
+if (!ase_msa_available(env)) {
+return;
+}
+
 #ifdef CONFIG_USER_ONLY
 /* MSA access enabled */
 env->CP0_Config5 |= 1 << CP0C5_MSAEn;
-- 
2.26.2

[PATCH 1/9] target/mips: Introduce ase_msa_available() helper

2020-12-02 Thread Philippe Mathieu-Daudé

Instead of accessing CP0_Config3 directly and checking
the 'MSA Present' bit, introduce an explicit helper,
making the code easier to read.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/internal.h  |  6 ++
 target/mips/kvm.c   | 12 ++--
 target/mips/translate.c |  8 +++-
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/target/mips/internal.h b/target/mips/internal.h
index dd8a7809b64..f882ac1580c 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -80,6 +80,12 @@ enum CPUMIPSMSADataFormat {
 DF_DOUBLE
 };
 
+/* Check presence of MSA implementation */
+static inline bool ase_msa_available(CPUMIPSState *env)
+{
+return env->CP0_Config3 & (1 << CP0C3_MSAP);
+}
+
 void mips_cpu_do_interrupt(CPUState *cpu);
 bool mips_cpu_exec_interrupt(CPUState *cpu, int int_req);
 void mips_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index 72637a1e021..9bfd67ede39 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -81,7 +81,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 }
 }
 
-if (kvm_mips_msa_cap && env->CP0_Config3 & (1 << CP0C3_MSAP)) {
+if (kvm_mips_msa_cap && ase_msa_available(env)) {
 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_MIPS_MSA, 0, 0);
 if (ret < 0) {
 /* mark unsupported so it gets disabled on reset */
@@ -107,7 +107,7 @@ void kvm_mips_reset_vcpu(MIPSCPU *cpu)
 warn_report("KVM does not support FPU, disabling");
 env->CP0_Config1 &= ~(1 << CP0C1_FP);
 }
-if (!kvm_mips_msa_cap && env->CP0_Config3 & (1 << CP0C3_MSAP)) {
+if (!kvm_mips_msa_cap && ase_msa_available(env)) {
 warn_report("KVM does not support MSA, disabling");
 env->CP0_Config3 &= ~(1 << CP0C3_MSAP);
 }
@@ -624,7 +624,7 @@ static int kvm_mips_put_fpu_registers(CPUState *cs, int 
level)
  * FPU register state is a subset of MSA vector state, so don't put FPU
  * registers if we're emulating a CPU with MSA.
  */
-if (!(env->CP0_Config3 & (1 << CP0C3_MSAP))) {
+if (!ase_msa_available(env)) {
 /* Floating point registers */
 for (i = 0; i < 32; ++i) {
 if (env->CP0_Status & (1 << CP0St_FR)) {
@@ -643,7 +643,7 @@ static int kvm_mips_put_fpu_registers(CPUState *cs, int 
level)
 }
 
 /* Only put MSA state if we're emulating a CPU with MSA */
-if (env->CP0_Config3 & (1 << CP0C3_MSAP)) {
+if (ase_msa_available(env)) {
 /* MSA Control Registers */
 if (level == KVM_PUT_FULL_STATE) {
 err = kvm_mips_put_one_reg(cs, KVM_REG_MIPS_MSA_IR,
@@ -704,7 +704,7 @@ static int kvm_mips_get_fpu_registers(CPUState *cs)
  * FPU register state is a subset of MSA vector state, so don't save 
FPU
  * registers if we're emulating a CPU with MSA.
  */
-if (!(env->CP0_Config3 & (1 << CP0C3_MSAP))) {
+if (!ase_msa_available(env)) {
 /* Floating point registers */
 for (i = 0; i < 32; ++i) {
 if (env->CP0_Status & (1 << CP0St_FR)) {
@@ -723,7 +723,7 @@ static int kvm_mips_get_fpu_registers(CPUState *cs)
 }
 
 /* Only get MSA state if we're emulating a CPU with MSA */
-if (env->CP0_Config3 & (1 << CP0C3_MSAP)) {
+if (ase_msa_available(env)) {
 /* MSA Control Registers */
 err = kvm_mips_get_one_reg(cs, KVM_REG_MIPS_MSA_IR,
>msair);
diff --git a/target/mips/translate.c b/target/mips/translate.c
index c64a1bc42e1..a7c01c2ea5b 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -25049,8 +25049,7 @@ static void decode_opc_special(CPUMIPSState *env, 
DisasContext *ctx)
 gen_trap(ctx, op1, rs, rt, -1);
 break;
 case OPC_LSA: /* OPC_PMON */
-if ((ctx->insn_flags & ISA_MIPS32R6) ||
-(env->CP0_Config3 & (1 << CP0C3_MSAP))) {
+if ((ctx->insn_flags & ISA_MIPS32R6) || ase_msa_available(env)) {
 decode_opc_special_r6(env, ctx);
 } else {
 /* Pmon entry point, also R4010 selsl */
@@ -25152,8 +25151,7 @@ static void decode_opc_special(CPUMIPSState *env, 
DisasContext *ctx)
 }
 break;
 case OPC_DLSA:
-if ((ctx->insn_flags & ISA_MIPS32R6) ||
-(env->CP0_Config3 & (1 << CP0C3_MSAP))) {
+if ((ctx->insn_flags & ISA_MIPS32R6) || ase_msa_available(env)) {
 decode_opc_special_r6(env, ctx);
 }
 break;
@@ -32000,7 +31998,7 @@ void cpu_state_reset(CPUMIPSState *env)
 }
 
 /* MSA */
-if (env->CP0_Config3 & (1 << CP0C3_MSAP)) {
+if (ase_msa_available(env)) {
 msa_reset(env);
 }
 
-- 
2.26.2

[PATCH 0/9] target/mips: Simplify MSA TCG logic

2020-12-02 Thread Philippe Mathieu-Daudé

I converted MSA opcodes to decodetree. To keep the series
small I split it in 2, this is the non-decodetree specific
patches (so non-decodetree experts can review it ;) ).

First we stop using env->insn_flags to check for MSAi
presence, then we restrict TCG functions to DisasContext*.

Based-on: <20201130102228.2395100-1-f4...@amsat.org>
"target/mips: Allow executing MSA instructions on Loongson-3A4000"

Philippe Mathieu-Daudé (9):
  target/mips: Introduce ase_msa_available() helper
  target/mips: Simplify msa_reset()
  target/mips: Use CP0_Config3 to set MIPS_HFLAG_MSA
  target/mips: Simplify MSA TCG logic
  target/mips: Remove now unused ASE_MSA definition
  target/mips: Alias MSA vector registers on FPU scalar registers
  target/mips: Extract msa_translate_init() from mips_tcg_init()
  target/mips: Remove CPUMIPSState* argument from gen_msa*() methods
  target/mips: Explode gen_msa_branch() as gen_msa_BxZ_V/BxZ()

 target/mips/internal.h   |   8 +-
 target/mips/mips-defs.h  |   1 -
 target/mips/kvm.c|  12 +-
 target/mips/translate.c  | 206 ++-
 target/mips/translate_init.c.inc |  12 +-
 5 files changed, 138 insertions(+), 101 deletions(-)

-- 
2.26.2

[PATCH v13 09/10] stream: skip filters when writing backing file name to QCOW2 header

2020-12-02 Thread Andrey Shinkevich via

Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is being changed after the block stream
job. It can occur due to a concurrent commit job on the same backing
chain.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 21 +++--
 blockdev.c |  8 +---
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 6e281c7..061268b 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,6 +17,7 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
 
@@ -65,6 +66,8 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_unfiltered;
+BlockDriverState *backing_bs;
 Error *local_err = NULL;
 int ret = 0;
 
@@ -75,8 +78,22 @@ static int stream_prepare(Job *job)
 const char *base_id = NULL, *base_fmt = NULL;
 if (base) {
 base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_id) {
+backing_bs = bdrv_find_backing_image(bs, base_id);
+if (backing_bs && backing_bs->drv) {
+base_fmt = backing_bs->drv->format_name;
+} else {
+error_report("Format not found for backing file %s",
+ s->backing_file_str);
+}
+} else {
+base_unfiltered = bdrv_skip_filters(base);
+if (base_unfiltered) {
+base_id = base_unfiltered->filename;
+if (base_unfiltered->drv) {
+base_fmt = base_unfiltered->drv->format_name;
+}
+}
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..70900f4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2508,7 +2508,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2536,7 +2535,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2551,7 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2571,9 +2568,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2581,7 +2575,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? job_id : NULL, bs, base_bs, backing_file,
  job_flags, has_speed ? speed : 0, on_error,
  filter_node_name, _err);
 if (local_err) {
-- 
1.8.3.1

[PATCH v13 10/10] block: apply COR-filter to block-stream jobs

2020-12-02 Thread Andrey Shinkevich via

This patch completes the series with the COR-filter applied to
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the test case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the test #030 as well.
The test case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. All the nodes involved into one job are being frozen, including
the filter node. Operations over the mentioned nodes, including the
filter one, are being blocked for other jobs. So, the filter node gets
involved into two concurrent jobs with the adjacent data node. That is
not allowed. It is what the test cases with overlapping jobs are about.
The concept of the parallel jobs with common nodes is considered vital
no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 97 ++
 tests/qemu-iotests/030 | 51 +++-
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 22 +++
 5 files changed, 86 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 061268b..2f80fae 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -18,8 +18,10 @@
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -34,6 +36,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 char *backing_file_str;
 bool bs_read_only;
@@ -45,8 +49,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
 assert(bytes < SIZE_MAX);
 
-return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
 }
 
 static void stream_abort(Job *job)
@@ -54,24 +57,21 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_unfiltered;
 BlockDriverState *backing_bs;
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -79,7 +79,7 @@ static int stream_prepare(Job *job)
 if (base) {
 base_id = s->backing_file_str;
 if (base_id) {
-backing_bs = bdrv_find_backing_image(bs, base_id);
+backing_bs = bdrv_find_backing_image(unfiltered_bs, base_id);
 if (backing_bs && backing_bs->drv) {
 base_fmt = backing_bs->drv->format_name;
 } else {
@@ -111,15 +111,16 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 
+bdrv_cor_filter_drop(s->cor_filter_bs);
+
 g_free(s->backing_file_str);
 }
 
@@ -127,9 +128,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);

[PATCH v13 03/10] copy-on-read: add filter drop function

2020-12-02 Thread Andrey Shinkevich via

Provide API for the COR-filter removal. Also, drop the filter child
permissions for an inactive state when the filter node is being
removed.
To insert the filter, the block generic layer function
bdrv_insert_node() can be used.
The new function bdrv_cor_filter_drop() may be considered as an
intermediate solution before the QEMU permission update system has
overhauled. Then we are able to implement the API function
bdrv_remove_node() on the block generic layer.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 56 
 block/copy-on-read.h | 32 ++
 2 files changed, 88 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..618c4c4 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,20 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +51,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +73,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +162,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -154,6 +182,34 @@ static BlockDriver bdrv_copy_on_read = {
 .is_filter  = true,
 };
 
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 static void bdrv_copy_on_read_init(void)
 {
 bdrv_register(_copy_on_read);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..7bf405d
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,32 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef BLOCK_COPY_ON_READ
+#define BLOCK_COPY_ON_READ
+
+#include "block/block_int.h"
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
+
+#endif /* BLOCK_COPY_ON_READ */
-- 
1.8.3.1

[PATCH v13 01/10] copy-on-read: support preadv/pwritev_part functions

2020-12-02 Thread Andrey Shinkevich via

Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1

[PATCH v13 05/10] qapi: create BlockdevOptionsCor structure for COR driver

2020-12-02 Thread Andrey Shinkevich via

Create the BlockdevOptionsCor structure for COR driver specific options
splitting it off form the BlockdevOptionsGenericFormat. The only option
'bottom' node in the structure denotes an image file that limits the
COR operations in the backing chain.
We are going to use the COR-filter for a block-stream job and will pass
a bottom node name to the COR driver. The bottom node is the first
non-filter overlay of the base. It was introduced because the base node
itself may change due to possible concurrent jobs.

Suggested-by: Max Reitz 
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 57 ++--
 qapi/block-core.json | 21 ++-
 2 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 618c4c4..2cddc96 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,18 +24,23 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *bottom_bs;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *bottom_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+/* Find a bottom node name, if any */
+const char *bottom_node = qdict_get_try_str(options, "bottom");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -51,7 +56,17 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (bottom_node) {
+bottom_bs = bdrv_lookup_bs(NULL, bottom_node, errp);
+if (!bottom_bs) {
+error_setg(errp, "Bottom node '%s' not found", bottom_node);
+qdict_del(options, "bottom");
+return -EINVAL;
+}
+qdict_del(options, "bottom");
+}
 state->active = true;
+state->bottom_bs = bottom_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
@@ -107,8 +122,46 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->bottom_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (bytes) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (ret <= 0) {
+ret = 
bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
+  state->bottom_bs, true, offset,
+  n, );
+if (ret == 1 || ret < 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+/* Finish earlier if the end of a backing file has been reached */
+if (n == 0) {
+break;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8ef3df6..04055ef 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3942,6 +3942,25 @@
   'data': { 'throttle-group': 'str',
 'file' : 'BlockdevRef'
  } }
+
+##
+# @BlockdevOptionsCor:
+#
+# Driver specific block device options for the copy-on-read driver.
+#
+# @bottom: the name of a non-filter node (allocation-bearing layer) that limits
+#  the COR operations in the backing chain (inclusive).
+#  For the block-stream job, it will be the first non-filter overlay of
+#  the base node. We do not involve the base node into the COR
+#  operations because the base may change due to a concurrent
+#  block-commit job on the same backing chain.
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockdevOptionsCor',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*bottom': 'str' } }
+
 ##
 # @BlockdevOptions:
 #
@@ -3994,7 +4013,7 @@
   'bochs':

[PATCH v13 00/10] Apply COR-filter to the block-stream permanently

2020-12-02 Thread Andrey Shinkevich via

The previous version 12 was discussed in the email thread:
Message-Id: <1603390423-980205-1-git-send-email-andrey.shinkev...@virtuozzo.com>

v13:
  02: The bdrv_remove_node() was dropped.
  05: Three patches with fixes were merged into one.
  06: Minor changes based on Vladimir's suggestions.
  08: Three patches with fixes were merged into one.
  09: The search for format_name of backing file was added.
  10: The flag BLK_PERM_GRAPH_MOD was removed.

Andrey Shinkevich (10):
  copy-on-read: support preadv/pwritev_part functions
  block: add API function to insert a node
  copy-on-read: add filter drop function
  qapi: add filter-node-name to block-stream
  qapi: create BlockdevOptionsCor structure for COR driver
  iotests: add #310 to test bottom node in COR driver
  block: include supported_read_flags into BDS structure
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  block: apply COR-filter to block-stream jobs

 block.c|  25 +++
 block/copy-on-read.c   | 143 +
 block/copy-on-read.h   |  32 +
 block/io.c |  12 +++-
 block/monitor/block-hmp-cmds.c |   4 +-
 block/stream.c | 120 +++---
 blockdev.c |  12 ++--
 include/block/block.h  |  10 ++-
 include/block/block_int.h  |  11 +++-
 qapi/block-core.json   |  27 +++-
 tests/qemu-iotests/030 |  51 ++-
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  22 +--
 tests/qemu-iotests/310 | 114 
 tests/qemu-iotests/310.out |  15 +
 tests/qemu-iotests/group   |   1 +
 17 files changed, 484 insertions(+), 121 deletions(-)
 create mode 100644 block/copy-on-read.h
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

-- 
1.8.3.1

[PATCH v13 07/10] block: include supported_read_flags into BDS structure

2020-12-02 Thread Andrey Shinkevich via

Add the new member supported_read_flags to the BlockDriverState
structure. It will control the flags set for copy-on-read operations.
Make the block generic layer evaluate supported read flags before they
go to a block driver.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/io.c| 12 ++--
 include/block/block_int.h |  4 
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index ec5e152..e28b11c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1405,6 +1405,9 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 if (flags & BDRV_REQ_COPY_ON_READ) {
 int64_t pnum;
 
+/* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */
+flags &= ~BDRV_REQ_COPY_ON_READ;
+
 ret = bdrv_is_allocated(bs, offset, bytes, );
 if (ret < 0) {
 goto out;
@@ -1426,9 +1429,13 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 goto out;
 }
 
+if (flags & ~bs->supported_read_flags) {
+abort();
+}
+
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
 if (bytes <= max_bytes && bytes <= max_transfer) {
-ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
 goto out;
 }
 
@@ -1441,7 +1448,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 
 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
  num, qiov,
- qiov_offset + bytes - bytes_remaining, 0);
+ qiov_offset + bytes - bytes_remaining,
+ flags);
 max_bytes -= num;
 } else {
 num = bytes_remaining;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c05fa1e..247e166 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -873,6 +873,10 @@ struct BlockDriverState {
 /* I/O Limits */
 BlockLimits bl;
 
+/*
+ * Flags honored during pread
+ */
+unsigned int supported_read_flags;
 /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
  * BDRV_REQ_WRITE_UNCHANGED).
  * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
-- 
1.8.3.1

[PATCH v13 02/10] block: add API function to insert a node

2020-12-02 Thread Andrey Shinkevich via

Provide API for insertion a node to backing chain.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block.c   | 25 +
 include/block/block.h |  2 ++
 2 files changed, 27 insertions(+)

diff --git a/block.c b/block.c
index f1cedac..b71c39f 100644
--- a/block.c
+++ b/block.c
@@ -4698,6 +4698,31 @@ static void bdrv_delete(BlockDriverState *bs)
 g_free(bs);
 }
 
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp)
+{
+BlockDriverState *new_node_bs;
+Error *local_err = NULL;
+
+new_node_bs =  bdrv_open(NULL, NULL, node_options, flags, errp);
+if (new_node_bs == NULL) {
+error_prepend(errp, "Could not create node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, new_node_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(new_node_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return new_node_bs;
+}
+
 /*
  * Run consistency checks on an image
  *
diff --git a/include/block/block.h b/include/block/block.h
index c9d7c58..81a3894 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -350,6 +350,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState 
*bs_top,
  Error **errp);
 void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp);
 
 int bdrv_parse_aio(const char *mode, int *flags);
 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-- 
1.8.3.1

[PATCH v13 06/10] iotests: add #310 to test bottom node in COR driver

2020-12-02 Thread Andrey Shinkevich via

The test case #310 is similar to #216 by Max Reitz. The difference is
that the test #310 involves a bottom node to the COR filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/310 | 114 +
 tests/qemu-iotests/310.out |  15 ++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 130 insertions(+)
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

diff --git a/tests/qemu-iotests/310 b/tests/qemu-iotests/310
new file mode 100755
index 000..c8b34cd
--- /dev/null
+++ b/tests/qemu-iotests/310
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+#
+# Copy-on-read tests using a COR filter with a bottom node
+#
+# Copyright (C) 2018 Red Hat, Inc.
+# Copyright (c) 2020 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import iotests
+from iotests import log, qemu_img, qemu_io_silent
+
+# Need backing file support
+iotests.script_initialize(supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk'],
+  supported_platforms=['linux'])
+
+log('')
+log('=== Copy-on-read across nodes ===')
+log('')
+
+# This test is similar to the 216 one by Max Reitz 
+# The difference is that this test case involves a bottom node to the
+# COR filter driver.
+
+with iotests.FilePath('base.img') as base_img_path, \
+ iotests.FilePath('mid.img') as mid_img_path, \
+ iotests.FilePath('top.img') as top_img_path, \
+ iotests.VM() as vm:
+
+log('--- Setting up images ---')
+log('')
+
+assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 0M 1M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 3M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
+'-F', iotests.imgfmt, mid_img_path) == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 2M 1M') == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 4M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', mid_img_path,
+'-F', iotests.imgfmt, top_img_path) == 0
+assert qemu_io_silent(top_img_path,  '-c', 'write -P 2 1M 1M') == 0
+
+#  0 1 2 3 4
+# top2
+# mid  3   3
+# base 1 1
+
+log('Done')
+
+log('')
+log('--- Doing COR ---')
+log('')
+
+vm.launch()
+
+log(vm.qmp('blockdev-add',
+   node_name='node0',
+   driver='copy-on-read',
+   bottom='node2',
+   file={
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': top_img_path
+   },
+   'backing': {
+   'node-name': 'node2',
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': mid_img_path
+   },
+   'backing': {
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': base_img_path
+   }
+   },
+   }
+   }))
+
+# Trigger COR
+log(vm.qmp('human-monitor-command',
+   command_line='qemu-io node0 "read 0 5M"'))
+
+vm.shutdown()
+
+log('')
+log('--- Checking COR result ---')
+log('')
+
+assert qemu_io_silent(base_img_path, '-c', 'discard 0 4M') == 0
+assert qemu_io_silent(mid_img_path, '-c', 'discard 0M 5M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 0 0 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 2 1M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 2M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 0 3M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 4M 1M') == 0
+
+log('Done')
diff --git a/tests/qemu-iotests/310.out b/tests/qemu-iotests/310.out
new file mode 100644
index 000..a70aa5c
--- /dev/null
+++ b/tests/qemu-iotests/310.out
@@ -0,0 +1,15 @@
+
+=== Copy-on-read across nodes ===
+
+---

[PATCH v13 04/10] qapi: add filter-node-name to block-stream

2020-12-02 Thread Andrey Shinkevich via

Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d15a2be..e8a58f3 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -508,8 +508,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 236384f..6e281c7 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index fe6fb5d..c917625 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2499,6 +2499,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2581,7 +2582,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 95d9333..c05fa1e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 04ad80b..8ef3df6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2543,6 +2543,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2573,6 +2578,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'BlockdevOnError',
+

[PATCH v13 08/10] copy-on-read: skip non-guest reads if no copy needed

2020-12-02 Thread Andrey Shinkevich via

If the flag BDRV_REQ_PREFETCH was set, skip idling read/write
operations in COR-driver. It can be taken into account for the
COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Add the BDRV_REQ_PREFETCH flag to the supported_read_flags of the
COR-filter.

block: Modify the comment for the flag BDRV_REQ_PREFETCH as we are
going to use it alone and pass it to the COR-filter driver for further
processing.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c  | 14 ++
 include/block/block.h |  8 +---
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2cddc96..123d197 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -49,6 +49,8 @@ static int cor_open(BlockDriverState *bs, QDict *options, int 
flags,
 return -EINVAL;
 }
 
+bs->supported_read_flags = BDRV_REQ_PREFETCH;
+
 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 
@@ -150,10 +152,14 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+/* Skip if neither read nor write are needed */
+if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
+BDRV_REQ_PREFETCH) {
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
diff --git a/include/block/block.h b/include/block/block.h
index 81a3894..3499554 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -81,9 +81,11 @@ typedef enum {
 BDRV_REQ_NO_FALLBACK= 0x100,
 
 /*
- * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
- * on read request and means that caller doesn't really need data to be
- * written to qiov parameter which may be NULL.
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
  */
 BDRV_REQ_PREFETCH  = 0x200,
 /* Mask of valid flags */
-- 
1.8.3.1

Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-12-02 Thread Andrey Shinkevich




On 27.10.2020 21:24, Andrey Shinkevich wrote:


On 27.10.2020 20:57, Vladimir Sementsov-Ogievskiy wrote:

27.10.2020 20:48, Andrey Shinkevich wrote:


On 27.10.2020 19:13, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for 
copied

regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 
++

  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c



[...]

+    s = block_job_create(job_id, _job_driver, NULL, 
cor_filter_bs,

+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | 
BLK_PERM_GRAPH_MOD,


I think that BLK_PERM_GRAPH_MOD is something outdated. We have 
chain-feeze, what BLK_PERM_GRAPH_MOD adds to it? I don't know, and 
doubt that somebody knows.




That is true for the commit/mirror jobs also. If we agree to remove 
the flag BLK_PERM_GRAPH_MOD from all these jobs, it will be made in a 
separate series, won't it?


Hmm. At least, let's not implement new logic based on 
BLK_PERM_GRAPH_MOD. In original code it's only block_job_create's 
perm, not in shared_perm, not somewhere else.. So, if we keep it, 
let's keep it as is: only in perm in block_job_create, not 
implementing additional perm/shared_perm logic.




With @perm=0 in the block_job_add_bdrv(>common, "active node"...), it 
won't.





   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
+    /*
+ * Prevent concurrent jobs trying to modify the graph 
structure here, we
+ * already have our own plans. Also don't allow resize as the 
image size is

+ * queried only at the job start and then cached.
+ */
+    if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,


why not 0, like for other nodes? We don't use this BdrvChild at all, 
why to requre permissions?




Yes, '0' s right.

+   basic_flags | BLK_PERM_WRITE, 
_abort)) {

+    goto fail;
+    }
+
  /* Block all intermediate nodes between bs and base, because 



[...]


diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dcb4b5d..0064590 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -227,61 +227,20 @@ class TestParallelOps(iotests.QMPTestCase):
  for img in self.imgs:
  os.remove(img)
-    # Test that it's possible to run several block-stream operations
-    # in parallel in the same snapshot chain
-    @unittest.skipIf(os.environ.get('QEMU_CHECK_BLOCK_AUTO'), 
'disabled in CI')

-    def test_stream_parallel(self):


Didn't we agree to add "bottom" paramter to qmp? Than this test-case 
can be rewritten using

node-names and new "bottom" stream argument.



The QMP new "bottom" option is passed to the COR-driver. It is done 
withing the stream-job code. So, it works.




I guess it will not help for the whole test. Particularly, there is 
an issue with freezing the child link to COR-filter of the cuncurrent 
job, then it fails to finish first.


We should not have such frozen link, as our bottom node should be 
above COR-filter of concurrent job.





The bdrv_freeze_backing_chain(bs, above_base, errp) does that job. Max 
insisted on keeping it.


Andrey


I have kept the test_stream_parallel() deleted in the coming v13 because 
it was agreed to make the above_base node frozen. With this, the test 
case can not pass. It is also true because the operations over the 
COR-filter node are blocked for the parallel jobs.


Andrey

Re: [Bug 1906463] [NEW] "-device help" does not report all devices

2020-12-02 Thread Doug Evans

On Wed, Dec 2, 2020 at 9:41 AM Peter Maydell 
wrote:

> On Wed, 2 Dec 2020 at 16:51, Doug Evans  wrote:
> >
> > Btw, if I may ask another dumb question, I get this:
> >
> > @ruffy:build-arm$ ./qemu-system-arm -M virt -monitor stdio
> > Unable to init server: Could not connect: Connection refused
> > QEMU 5.1.93 monitor - type 'help' for more information
> > (qemu) gtk initialization failed
> > 
> >
> > If I add "-display none" then it works, but it's odd that it's trying to
> initialize with gtk here ($DISPLAY isn't set, there is no X present).
>
> That's expected. By default we try to create a GUI window.
> If DISPLAY is not set, then that fails, which is why
> we print "gtk initialization failed" and exit.
> This is the same behaviour as other GUI apps:
>
> $ DISPLAY= xterm
> xterm: Xt error: Can't open display:
> xterm: DISPLAY is not set
>
> $ DISPLAY= firefox
> Unable to init server: Broadway display type not supported:
> Error: cannot open display:
>
> $ DISPLAY= evince
> Unable to init server: Could not connect: Connection refused
> Cannot parse arguments: Cannot open display:
>
> If you don't want graphics you should tell QEMU you
> don't want graphics (eg with '-display none').
>
> This seems to me more helpful to most users than the
> alternative (if you know you don't want the GUI then
> it's easy to disable it; but most non-sophisticated
> users do want it).
>


Thanks. That's not unreasonable.

OTOH, all those examples don't have a non-X mode.
As counterexamples there's emacs and gvim.

The present situation is fine, now that I understand it.
I can write a wrapper that DTRT.

Re: Plans to bring QMP 'x-blockdev-reopen' out of experimental?

2020-12-02 Thread Kevin Wolf

Am 02.12.2020 um 17:40 hat Alberto Garcia geschrieben:
> On Wed 02 Dec 2020 05:28:08 PM CET, Kevin Wolf wrote:
> 
> >> So x-blockdev-reopen sees that we want to replace the current
> >> bs->file ("hd0-file") with a new one ("throttle0"). The problem here
> >> is that throttle0 has hd0-file as its child, so when we check the
> >> permissions on throttle0 (and its children) we get that hd0-file
> >> refuses because it's already being used (although in in the process
> >> of being replaced) by hd0:
> >> 
> >> "Conflicts with use by hd0 as 'file', which does not allow 'write, resize' 
> >> on hd0-file"
> >> 
> > This kind of situation isn't new, I believe some of the existing graph
> > changes (iirc in the context of block jobs) can cause the same problem.
> >
> > This is essentially why some functions in the permission system take a
> > GSList *ignore_children. So I believe the right thing to do here is
> > telling the permission system that it needs to check the situation
> > without the BdrvChild that links hd0 with hd0-file.
> 
> I had tried this already and it does work when inserting the filter (we
> know that 'hd0-file' is about to be detached from the parent so we can
> put it in the list) but I don't think it's so easy if we want to remove
> the filter, i.e.
> 
>hd0 -> throttle -> hd0-file ==> hd0 -> hd0-file
> 
> In this case we get a similar error, we want to make hd0-file a child of
> hd0 but it is being used by the throttle filter.
> 
> Telling bdrv_check_update_perm() to ignore hd0's current child
> (throttle) won't solve the problem.

Isn't this the very same case as removing e.g. a mirror filter from the
chain? I'm sure we have already solved this somewhere.

Hm, no, it might actually be different in that the throttle node
survives this, so we do have to check that the resulting graph is
valid. Do we need a combined operation to remove the throttle node from
the graph and immediately delete it?

> > I don't know the exact stack trace of your failure, so maybe this
> > parameter isn't available yet in the place where you need it, but in
> > the core functions it exists.
> 
> This is in bdrv_reopen_multiple(), in the same place where we are
> currently checking the permissions of the new backing file.

Oh, it's not happening while actually changing the links, but the check
before trying? I guess both would fail in this case anyway, but good to
know.

Kevin

[Bug 1906536] Re: Unable to set SVE VL to 1024 bits or above since 7b6a2198

2020-12-02 Thread Peter Maydell

Yes, we should by default do what the Linux kernel does, but we should
also provide a mechanism for allowing guest software to use a higher
vector length than that kernel default. On a real kernel you can do that
by either setting the /proc/sys/abi/sve_default_vector_length, or by
having process A make the prctl() to change vector length and then exec
process B that inherits that increased vector length. Neither of those
mechanisms work for QEMU linux-user, so we should provide some other
mechanism instead.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1906536

Title:
  Unable to set SVE VL to 1024 bits or above since 7b6a2198

Status in QEMU:
  New

Bug description:
  Prior to 7b6a2198e71794c851f39ac7a92d39692c786820, the QEMU option
  sve-max-vq could be used to set the vector length of the
  implementation. This is useful (among other reasons) for testing
  software compiled with a fixed SVE vector length. Since this commit,
  the vector length is capped at 512 bits.

  To reproduce the issue:

  $ cat rdvl.s
  .global _start
  _start:
rdvl x0, #1
asr x0, x0, #4
mov x8, #93 // exit
svc #0
  $ aarch64-linux-gnu-as -march=armv8.2-a+sve rdvl.s -o rdvl.o
  $ aarch64-linux-gnu-ld rdvl.o
  $ for vl in 1 2 4 8 16; do ../build-qemu/aarch64-linux-user/qemu-aarch64 -cpu 
max,sve-max-vq=$vl a.out; echo $?; done
  1
  2
  4
  4
  4

  For a QEMU built prior to the above revision, we get the output:
  1
  2
  4
  8
  16

  as expected. It seems that either the old behavior should be restored,
  or there should be an option to force a higher vector length?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1906536/+subscriptions

Re: [PATCH] linux-user/elfload: Fix handling of pure BSS segments

2020-12-02 Thread Peter Maydell

On Wed, 25 Nov 2020 at 09:39, Alex Bennée  wrote:
> How hairy is the generation of these binaries? If it's all doable with
> standard gcc/ldd command lines it would be useful to add them as a
> tcg/multiarch test case.

Rather than using C it might be simpler just to create a failing
binary by-hand, as the StackOverflow example does:

https://stackoverflow.com/questions/64956322/alignment-requirements-for-arm64-elf-executables-run-in-qemu-assembled-by-gas

thanks
-- PMM

[Bug 1906536] Re: Unable to set SVE VL to 1024 bits or above since 7b6a2198

2020-12-02 Thread Alex Coplan

Hi Philippe,

I'm aware of the prctl workaround.

It seems to me that this is clearly a regression in functionality. Prior
to the change, I could test any executable with any vector length
without having to modify the executable. Now I have to insert a prctl to
test with 1024 or 2048-bit SVE vectors?

Moreover, with this change, it's no longer possible to have the wider VL
inherited across exec() to another QEMU instance.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1906536

Title:
  Unable to set SVE VL to 1024 bits or above since 7b6a2198

Status in QEMU:
  New

Bug description:
  Prior to 7b6a2198e71794c851f39ac7a92d39692c786820, the QEMU option
  sve-max-vq could be used to set the vector length of the
  implementation. This is useful (among other reasons) for testing
  software compiled with a fixed SVE vector length. Since this commit,
  the vector length is capped at 512 bits.

  To reproduce the issue:

  $ cat rdvl.s
  .global _start
  _start:
rdvl x0, #1
asr x0, x0, #4
mov x8, #93 // exit
svc #0
  $ aarch64-linux-gnu-as -march=armv8.2-a+sve rdvl.s -o rdvl.o
  $ aarch64-linux-gnu-ld rdvl.o
  $ for vl in 1 2 4 8 16; do ../build-qemu/aarch64-linux-user/qemu-aarch64 -cpu 
max,sve-max-vq=$vl a.out; echo $?; done
  1
  2
  4
  4
  4

  For a QEMU built prior to the above revision, we get the output:
  1
  2
  4
  8
  16

  as expected. It seems that either the old behavior should be restored,
  or there should be an option to force a higher vector length?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1906536/+subscriptions

Re: [Bug 1906463] [NEW] "-device help" does not report all devices

2020-12-02 Thread Peter Maydell

On Wed, 2 Dec 2020 at 16:51, Doug Evans  wrote:
>
> Btw, if I may ask another dumb question, I get this:
>
> @ruffy:build-arm$ ./qemu-system-arm -M virt -monitor stdio
> Unable to init server: Could not connect: Connection refused
> QEMU 5.1.93 monitor - type 'help' for more information
> (qemu) gtk initialization failed
> 
>
> If I add "-display none" then it works, but it's odd that it's trying to 
> initialize with gtk here ($DISPLAY isn't set, there is no X present).

That's expected. By default we try to create a GUI window.
If DISPLAY is not set, then that fails, which is why
we print "gtk initialization failed" and exit.
This is the same behaviour as other GUI apps:

$ DISPLAY= xterm
xterm: Xt error: Can't open display:
xterm: DISPLAY is not set

$ DISPLAY= firefox
Unable to init server: Broadway display type not supported:
Error: cannot open display:

$ DISPLAY= evince
Unable to init server: Could not connect: Connection refused
Cannot parse arguments: Cannot open display:

If you don't want graphics you should tell QEMU you
don't want graphics (eg with '-display none').

This seems to me more helpful to most users than the
alternative (if you know you don't want the GUI then
it's easy to disable it; but most non-sophisticated
users do want it).

thanks
-- PMM

[Bug 1906536] Re: Unable to set SVE VL to 1024 bits or above since 7b6a2198

2020-12-02 Thread Peter Maydell

** Changed in: qemu
   Status: Invalid => New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1906536

Title:
  Unable to set SVE VL to 1024 bits or above since 7b6a2198

Status in QEMU:
  New

Bug description:
  Prior to 7b6a2198e71794c851f39ac7a92d39692c786820, the QEMU option
  sve-max-vq could be used to set the vector length of the
  implementation. This is useful (among other reasons) for testing
  software compiled with a fixed SVE vector length. Since this commit,
  the vector length is capped at 512 bits.

  To reproduce the issue:

  $ cat rdvl.s
  .global _start
  _start:
rdvl x0, #1
asr x0, x0, #4
mov x8, #93 // exit
svc #0
  $ aarch64-linux-gnu-as -march=armv8.2-a+sve rdvl.s -o rdvl.o
  $ aarch64-linux-gnu-ld rdvl.o
  $ for vl in 1 2 4 8 16; do ../build-qemu/aarch64-linux-user/qemu-aarch64 -cpu 
max,sve-max-vq=$vl a.out; echo $?; done
  1
  2
  4
  4
  4

  For a QEMU built prior to the above revision, we get the output:
  1
  2
  4
  8
  16

  as expected. It seems that either the old behavior should be restored,
  or there should be an option to force a higher vector length?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1906536/+subscriptions

Re: [PATCH 1/6] migration: Add multi-thread compress method

2020-12-02 Thread Dr. David Alan Gilbert

* Zeyu Jin (jinz...@huawei.com) wrote:
> On 2020/11/30 16:35, Markus Armbruster wrote:
> > Zeyu Jin  writes:
> > 
> >> On 2020/11/27 17:48, Markus Armbruster wrote:
> >>> Kevin, Max, suggest to skip right to Qcow2CompressionType.
> >>>
> >>> Zeyu Jin  writes:
> >>>
>  A multi-thread compress method parameter is added to hold the method we
>  are going to use. By default the 'zlib' method is used to maintain the
>  compatibility as before.
> 
>  Signed-off-by: Zeyu Jin 
>  Signed-off-by: Ying Fang 
> >>> [...]
>  diff --git a/qapi/migration.json b/qapi/migration.json
>  index 3c75820527..2ed6a55b92 100644
>  --- a/qapi/migration.json
>  +++ b/qapi/migration.json
>  @@ -525,6 +525,19 @@
> 'data': [ 'none', 'zlib',
>   { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }
>   
>  +##
>  +# @CompressMethod:
>  +#
>  +# An enumeration of multi-thread compression methods.
>  +#
>  +# @zlib: use zlib compression method.
>  +#
>  +# Since: 6.0
>  +#
>  +##
>  +{ 'enum': 'CompressMethod',
>  +  'data': [ 'zlib' ] }
>  +
>   ##
>   # @BitmapMigrationBitmapAlias:
>   #
>  @@ -599,6 +612,9 @@
>   #  compression, so set the decompress-threads to 
>  the number about 1/4
>   #  of compress-threads is adequate.
>   #
>  +# @compress-method: Set compression method to use in multi-thread 
>  compression.
>  +#   Defaults to zlib. (Since 6.0)
> >>>
> >>> We already have @multifd-compression.  Why do we need to control the two
> >>> separately?  Can you give a use case for different settings?
> >>>
> >>
> >> Generally, mulit-thread compression deals with the situation
> >> where network bandwith is limited but cpu resource is adequate. Multifd
> >> instead aims to situation where single fd cannot take full advantage of
> >> network bandwith. So compression based on multifd cannot fully cover the
> >> cases for multi-thread compression.
> >>
> >> For example, for migration with a bandwith limitation of 10M
> >> bytes/second, single fd is enough for data delivery. This is the case
> >> for multi-thread compression.
> > 
> > Let me rephrase my question.
> > 
> > According to query-migrate-parameters, we default to
> > 
> > "compress-level": 1
> > "compress-threads": 8
> > "compress-wait-thread": true
> > "decompress-threads": 2
> > "multifd-channels": 2
> > "multifd-compression": "none"
> > "multifd-zlib-level": 1
> > "multifd-zstd-level": 1
> > 
> > Your patch adds
> > 
> > "compress-method": "zlib"
> > 
> > I have several basic questions I can't answer from the documentation:
> > 
> > 1. We appear to have two distinct sets of compression parameters:
> > 
> >* Traditional: compress-level, compress-threads,
> >  compress-wait-thread, decompress-threads.
> > 
> >  These parameters all apply to the same compression.  Correct?
> > 
> >  What data is being compressed by it?
> > 
> >* Multi-fd: multifd-channels, multifd-compression,
> >  multifd-zlib-level, multifd-std-level
> > 
> >  These parameters all apply to the same compression.  Correct?
> > 
> >  What data is being compressed by it?
> > 
> >* Why do we want *two*?  I understand why multi-fd is optional, but
> >  why do we need the capability to compress differently there?  Use
> >  case?
> > 
> >All of these questions predate your patch.  David, Juan?
> >
> 
> I see. The problem is that the parameter sets seem to be redundant and
> maybe there is an overlap between these two compression capabilities.
> 
> As you said, the questions predate my patch, so maybe we can have a
> discussion here. What do you think, David, Juan?

Yes it's true, they're redundant - it's the same settings duplicated
for the two systems, traditinoal and multifd.

Can I ask - have you compared the behaviour of multifd-zstd with plain
zstd?  I ask, because it's a shame to have two separate systems; and if
multifd-zstd worked well, then it would be good someday to deprecate the
non-multifd version of compression completely, and simplify a lot of
code that way.

Dave


> > 2. Does compress-method belong to "traditional"?
> >
> 
> Yes.
> 
> >>> If we do want two parameters: the names @compress-method and
> >>> @multifd-compression are inconsistent.  According to your comment,
> >>> @compress-method applies only to multi-thread compression.  That leads
> >>> me to suggest renaming it to @multi-thread-compression.
> >>>
> >>
> >> For the names, my original idea is to make 'CompressMethod' consistent
> >> with other multi-thread compression parameters, like 'compress-threads'
> >> and 'compress-level'. There is truly some inconsistency here, between
> >> multifd-compression params and old multi-thread compression params.
> > 
> > I see.
> > 
> >> For now, I agree with you that 'multi-thread-compression' is

Re: [PATCH 00/18] qapi/qom: QAPIfy object-add

2020-12-02 Thread Kevin Wolf

Am 02.12.2020 um 17:05 hat Eduardo Habkost geschrieben:
> > > Looks nice as end goal.  Then, these are a few questions I would
> > > have about the transition plan:
> > > 
> > > Would it require changing both device implementation and device
> > > users in lockstep?  Should we have a compatibility layer to allow
> > > existing qdev_new()+qdev_prop_set_*() code to keep working after
> > > the devices are converted to the new system?  If not, why not?
> > 
> > Technically, it doesn't strictly require a lockstep update. You can have
> > two code paths leading to a fully initialised device, one being the
> > traditional way of setting properties and finally calling dc->realize,
> > the other being a new method that takes the configuration in its
> > parameters and also sets dev->realized = true at the end.
> > 
> > If at all possible, I would however prefer a lockstep update because
> > having two paths is a weird intermediate state and the code paths could
> > easily diverge. Keeping the old way around for a device also means that
> > property setters are still doing two different jobs (initial
> > configuration and updates at runtime).
> 
> I'd like to understand better how that intermediate state would
> look like and why there's risk of separate code paths diverging.
>
> Could we have an intermediate state that doesn't require any
> duplication and thus have no separate code paths that could
> diverge?

The one requirement we have for an intermediate state is that it
supports both interfaces: The well-know create/set properties/realize
dance, and a new DeviceClass method, say .create(), that takes the
configuration in parameters instead of relying on previously set
properties.

I assumed two separate implementations of transferring the configuration
into the internal state. On second thought, this assumption is maybe
wrong.

You can implement the new method as wrapper around the old way: It could
just set all the properties and call realize. Of course, you don't win
much in terms of improving the class implementation this way, but just
support the new interface, but I guess it can be a reasonable
intermediate step to resolve complicated dependencies etc.

It would be much nicer to do the wrapper the other way round, i.e.
setting properties before the device is realized would update a
configuration struct and realize would then call .create() with that
struct. To me, this sounds much harder, though also a more useful state.

As you have worked a lot with properties recently, maybe you have a good
idea how we could get an intermediate state closer to this?

> > > If we add a compatibility layer, is the end goal to convert all
> > > existing qdev_new() users to the new system?  If yes, why?  If
> > > not, why not?
> > 
> > My personal goal is covering -object and -device, i.e. the external
> > interfaces. Converting purely internally created devices is not as
> > interesting (especially as long as we focus only on object creation),
> > but might be desirable for consistency.
> 
> I wonder how much consistency we will lose and how much confusion
> we'll cause if we end up with two completely separate methods for
> creating devices.

I do think we should follow through and convert everything. It's just
not my main motivation, and if the people who work more with qdev think
it's better to leave that part unchanged (or that it won't make much of
a difference), I won't insist.

> > > What about subclasses?  Would base classes need to be converted
> > > in lockstep with all subclasses?  How would the transition
> > > process of (e.g.) PCI devices look like?
> > 
> > I don't think so.
> > 
> > If you want to convert base classes first, you may need to take the
> > path shown above where both initialisation paths coexist while the
> > children are converted because instantiation of a child class involves
> > setting properties of the base class. So you can only restrict these
> > properties to runtime-only after all children have been converted.
> > 
> > The other way around might be easier: You will need to describe the
> > properties of base classes in the QAPI schema from the beginning, but
> > that doesn't mean that their initialisation code has to change just yet.
> > The child classes will need to forward the part of their configuration
> > that belongs to the base class. The downside is that this code will have
> > to be changed again when the base class is finally converted.
> > 
> > So we have options there, and we can decide case by case which one is
> > most appropriate for the specific class to be converted (depending on
> > how many child classes exist, how many properties are inherited, etc.)
> 
> Right now it's hard for me to understand what those intermediate
> states would look like.  It sounds like it requires too many
> complicated manual changes to be done by humans, and lots of room
> for mistakes when maintaining two parallel code paths.  I'd
> prefer to delegate the translation job to a

[PATCH for-6.0] hw/ppc: Do not re-read the clock on pre_save if doing savevm

2020-12-02 Thread Greg Kurz

A guest with enough RAM, eg. 128G, is likely to detect savevm downtime
and to complain about stalled CPUs. This happens because we re-read
the timebase just before migrating it and we thus don't account for
all the time between VM stop and pre-save.

A very similar situation was already addressed for live migration of
paused guests (commit d14f33976282). Extend the logic to do the same
with savevm.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1893787
Signed-off-by: Greg Kurz 
---
 hw/ppc/ppc.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 1b9827207676..5cbbff1f8d0c 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1027,7 +1027,8 @@ static void timebase_save(PPCTimebase *tb)
  */
 tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset;
 
-tb->runstate_paused = runstate_check(RUN_STATE_PAUSED);
+tb->runstate_paused =
+runstate_check(RUN_STATE_PAUSED) || runstate_check(RUN_STATE_SAVE_VM);
 }
 
 static void timebase_load(PPCTimebase *tb)
@@ -1088,7 +1089,7 @@ static int timebase_pre_save(void *opaque)
 {
 PPCTimebase *tb = opaque;
 
-/* guest_timebase won't be overridden in case of paused guest */
+/* guest_timebase won't be overridden in case of paused guest or savevm */
 if (!tb->runstate_paused) {
 timebase_save(tb);
 }

1 2 3 4 5 >

1 - 100 of 406 matches

Mail list logo