[Qemu-devel] [PATCH v2] qemu-thread: always keep the posix wrapper layer

2018-04-11 Thread Peter Xu
We will conditionally have a wrapper layer depending on whether the host
has the PTHREAD_SETNAME capability.  It complicates stuff.  Let's keep
the wrapper there; we opt out the pthread_setname_np() call only.

Signed-off-by: Peter Xu 
---
v2:
- set thread name only conditionally [Eric]
---
 util/qemu-thread-posix.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index b789cf32e9..a1c34ba6f2 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -482,7 +482,6 @@ static void __attribute__((constructor)) 
qemu_thread_atexit_init(void)
 }
 
 
-#ifdef CONFIG_PTHREAD_SETNAME_NP
 typedef struct {
 void *(*start_routine)(void *);
 void *arg;
@@ -495,16 +494,18 @@ static void *qemu_thread_start(void *args)
 void *(*start_routine)(void *) = qemu_thread_args->start_routine;
 void *arg = qemu_thread_args->arg;
 
+#ifdef CONFIG_PTHREAD_SETNAME_NP
 /* Attempt to set the threads name; note that this is for debug, so
  * we're not going to fail if we can't set it.
  */
-pthread_setname_np(pthread_self(), qemu_thread_args->name);
+if (name_threads && qemu_thread_args->name) {
+pthread_setname_np(pthread_self(), qemu_thread_args->name);
+}
+#endif
 g_free(qemu_thread_args->name);
 g_free(qemu_thread_args);
 return start_routine(arg);
 }
-#endif
-
 
 void qemu_thread_create(QemuThread *thread, const char *name,
void *(*start_routine)(void*),
@@ -513,6 +514,7 @@ void qemu_thread_create(QemuThread *thread, const char 
*name,
 sigset_t set, oldset;
 int err;
 pthread_attr_t attr;
+QemuThreadArgs *qemu_thread_args;
 
 err = pthread_attr_init();
 if (err) {
@@ -527,22 +529,13 @@ void qemu_thread_create(QemuThread *thread, const char 
*name,
 sigfillset();
 pthread_sigmask(SIG_SETMASK, , );
 
-#ifdef CONFIG_PTHREAD_SETNAME_NP
-if (name_threads) {
-QemuThreadArgs *qemu_thread_args;
-qemu_thread_args = g_new0(QemuThreadArgs, 1);
-qemu_thread_args->name = g_strdup(name);
-qemu_thread_args->start_routine = start_routine;
-qemu_thread_args->arg = arg;
-
-err = pthread_create(>thread, ,
- qemu_thread_start, qemu_thread_args);
-} else
-#endif
-{
-err = pthread_create(>thread, ,
- start_routine, arg);
-}
+qemu_thread_args = g_new0(QemuThreadArgs, 1);
+qemu_thread_args->name = g_strdup(name);
+qemu_thread_args->start_routine = start_routine;
+qemu_thread_args->arg = arg;
+
+err = pthread_create(>thread, ,
+ qemu_thread_start, qemu_thread_args);
 
 if (err)
 error_exit(err, __func__);
-- 
2.14.3




Re: [Qemu-devel] [PATCH 2/2] qemu-thread: let cur_mon be per-thread

2018-04-11 Thread Peter Xu
On Wed, Apr 11, 2018 at 08:06:04AM -0500, Eric Blake wrote:
> On 04/11/2018 04:48 AM, Peter Xu wrote:
> 
> > Okay. :) Thanks for confirming.  Then let me repost this patch without
> > touching the qemu-threads.
> > 
> > Btw, do you want me to repost the first patch separately too, or keep
> > the code as is?  I believe it depends on whether you treat that one as
> > a cleanup or not.  Thanks,
> 
> The first patch is no longer necessary for your new approach, but as it
> is a cleanup and you've already written it, it does not hurt to still
> send it as a separate cleanup patch useful in its own right.

Thank you, Eric.  I will repost.

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v2 02/19] spapr: introduce a skeleton for the XIVE interrupt controller

2018-04-11 Thread David Gibson
On Wed, Jan 17, 2018 at 10:18:43AM +0100, Cédric Le Goater wrote:
> >>> Also, have we decided how the process of switching between XICS and
> >>> XIVE will work vs. CAS ? 
> >>
> >> That's how it is described in the architecture. The current choice is
> >> to create both XICS and XIVE objects and choose at CAS which one to
> >> use. It relies today on the capability of the pseries machine to 
> >> allocate IRQ numbers for both interrupt controller backends. These
> >> patches have been merged in QEMU.
> >>
> >> A change of interrupt mode results in a reset. The device tree is 
> >> populated accordingly and the ICPs are switched for the model in 
> >> use. 
> > 
> > For KVM we need to only instanciate one of them though.
> 
> Hmm,
> 
> How would we handle a guest rebooting on a kernel without XIVE support ? 
> Are you suggesting to create the XICS or XIVE device in the CAS negotiation 
> process ? So, the machine would not have any interrupt controller before 
> CAS. That seems really late to me. grub uses the console for instance. 
> 
> I think it should prepare for both options, start in XIVE legacy mode, 
> which is XICS, then possibly switch to XIVE exploitation mode.

I think for our first draft we should have XIVE and XICS based
platforms as separate machine types (or a machine option, I guess).

We do want to allow this to be autonegotiated, but I feel like
emphasising that at the beginning is causing unnatural design
decisions in the XIVE model itself.

> 
> >>> And how that will interact with KVM ?
> >>
> >> I expect we will do the same, which is to create two KVM devices to 
> >> be able to handle both interrupt controller backends depending on the 
> >> mode negotiated by the guest.  
> > 
> > That will be an ungodly mess, I'd rather we only instanciate the right
> > one.
> 
> It's rather transparent currently in the emulated version. There are two 
> sets of objects in QEMU, switching is done in CAS. KVM support should not 
> change anything in that area. 
> 
> I expect the 'xive-kvm' object to get/set states for migration, just like 
> for XICS and to setup the ESB+TIMA memory regions, which is new. 
> 
> C. 
>  
> >>> I was
> >>> thinking the kernel would implement a different KVM device type, ie
> >>> the "emulated XICS" would remain KVM_DEV_TYPE_XICS and XIVE would be
> >>> KVM_DEV_TYPE_XIVE.
> >>
> >> yes. it makes sense. The new device will have a lot in common with the 
> >> KVM_DEV_TYPE_XICS using kvm_xive_ops.
> > 
> > Ben.
> > 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2 02/19] spapr: introduce a skeleton for the XIVE interrupt controller

2018-04-11 Thread David Gibson
On Mon, Feb 12, 2018 at 09:55:17AM +1100, Benjamin Herrenschmidt wrote:
> On Sun, 2018-02-11 at 19:08 +1100, David Gibson wrote:
> > On Thu, Jan 18, 2018 at 08:27:52AM +1100, Benjamin Herrenschmidt wrote:
> > > On Wed, 2018-01-17 at 15:39 +0100, Cédric Le Goater wrote:
> > > > Migration is a problem. We will need both backend QEMU objects to be 
> > > > available anyhow if we want to migrate. So we are back to the current 
> > > > solution creating both QEMU objects but we can try to defer some of the 
> > > > KVM inits and create the KVM device on demand at CAS time.
> > > 
> > > Do we have a way to migrate a piece of info from the machine *first*
> > > that indicate what type of XICS/XIVE to instanciate ?
> > 
> > Nope.  qemu migration doesn't work like that.  Yes, it should, and
> > everyone knows it, but changing it is a really long term project.
> 
> Well, we have a problem then. It looks like Qemu broken migration is
> fundamentally incompatible with PAPR and CAS design...

Hrm, the fit is very clunky certainly, but i think we can make it work.

> I know we don't migrate the configuration, that's not exactly what I
> had in mind tho... Can we have some piece of *data* from the machine be
> migrated first, and use it on the target to reconfigure the interrupt
> controller before the stream arrives ?

Sorta.. maybe.. but it would probably get really ugly if we don't
preserve the usual way object lifetimes work.

> Otherwise, we have indeed no much choice but the horrible wart of
> creating both interrupt controllers with only one "active".

I really think this is the way to go, warts and all.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2 02/19] spapr: introduce a skeleton for the XIVE interrupt controller

2018-04-11 Thread David Gibson
On Wed, Dec 20, 2017 at 08:38:41AM +0100, Cédric Le Goater wrote:
> On 12/20/2017 06:09 AM, David Gibson wrote:
> > On Sat, Dec 09, 2017 at 09:43:21AM +0100, Cédric Le Goater wrote:
> >> With the POWER9 processor comes a new interrupt controller called
> >> XIVE. It is composed of three sub-engines :
> >>
> >>   - Interrupt Virtualization Source Engine (IVSE). These are in PHBs,
> >> in the main controller for the IPIS and in the PSI host
> >> bridge. They are configured to feed the IVRE with events.
> >>
> >>   - Interrupt Virtualization Routing Engine (IVRE). Their job is to
> >> match an event source with a Notification Virtualization Target
> >> (NVT), a priority and an Event Queue (EQ) to determine if a
> >> Virtual Processor can handle the event.
> >>
> >>   - Interrupt Virtualization Presentation Engine (IVPE). It maintains
> >> the interrupt state of each hardware thread and present the
> >> notification as an external exception.
> >>
> >> Each of the engines uses a set of internal tables to redirect
> >> exceptions from event sources to CPU threads. The first table we
> >> introduce is the Interrupt Virtualization Entry (IVE) table, part of
> >> the virtualization engine in charge of routing events. It associates
> >> event sources (IRQ numbers) to event queues which will forward, or
> >> not, the event notification to the presentation controller.
> >>
> >> The XIVE model is designed to make use of the full range of the IRQ
> >> number space and does not use an offset like the XICS mode does.
> >> Hence, the IVE table is directly indexed by the IRQ number.
> >>
> >> Signed-off-by: Cédric Le Goater 
> > 
> > As you've suggested in yourself, I think we might need to more
> > explicitly model the different components of the XIVE system.  As part
> > of that, I think you need to be clearer in this base skeleton about
> > exactly what component your XIVE object represents.

Sorry it's been so long since I looked at these.

> ok. The base skeleton is the IVRE, the central engine handling 
> the routing. 
> 
> > If the answer is "the overall thing" 
> 
> Yes, it is more or less that currently. 
> 
> The sPAPRXive object models the source engine and the routing 
> engine in one object.

Yeah, I suspect we don't want that.  Although it might seem simpler in
the spapr case, at least at first glance, I think it will cause us
problems later.  At the very least, it's likely to make it harder to
share code between the spapr and powernv case.  I think it will also
make for more confusion about exactly what things belong where.

> I have merged these for simplicity and because the interrupt 
> controller has an internal source for the interrupts of the "IPI" 
> type, which are used for the CPU IPIs but also for other generic 
> interrupts, like the OpenCAPI ones. The XIVE sPAPR interface is 
> also much simpler than the baremetal one, all the tables are 
> maintained in the hypervisor, so this choice made some sense. 
> 
> But since, I have started the PowerNV model and I am duplicating 
> a lot of code to handle the triggering and the MMIOs in the 
> different sources. So I am not convinced anymore. Nevertheless, 
> the overall routing logic is the same even if some the tables 
> are not located in QEMU anymore, but in the machine memory.
> 
> The sPAPRXiveNVT models some of the CPU presenter engine. It 
> holds the virtual CPU interrupt states when not dispatched on 
> a real HW thread. Real world is more complex. There are "CAM" 
> lines in the HW threads which are compared to find a matching 
> candidate. But I don't think we need to anything more complex 
> than today unless we want to support KVM under TCG ...
>
> > I suspect that's not what you
> > want - I had one of those for XICs which proved to be a mistake
> > (eventually replaced by the XICSFabric interface).
> 
> The XICSFabric would be the main Xive object. The interface 
> between the sources and the routing engine is hidden in sPAPR, 
> we can use a simple function call : 
> 
>   spapr_xive_irq(pnv->xive, irq);
> 
> we could get rid of the qirqs but they are required for XICS.

I don't quite follow, but this doesn't sound right.

> PowerNV uses MMIOs to notify an event and it makes the modeling
> somewhat easier. Each controller model has a notify port address 
> register on which a interrupt number is written to forward an 
> event to the routing engine. So it is a simple store. 
> 
> I don't know why there is a different notify port address per
> source, may be for extra filtering at the routing engine level.   
> 
> > Changing the model later isn't impossible, but doing so without
> > breaking migration can be a real pain, so I think it's worth a
> > reasonable effort to try and get it right initially.
> 
> I completely agree. 
> 
> This is why I have started the PnvXive model to challenge the 
> current PAPR design. I have hacked a bunch of patches for XIVE, 
> LPC, PSI, OCC and basic PPC 

Re: [Qemu-devel] [PATCH v2 02/19] spapr: introduce a skeleton for the XIVE interrupt controller

2018-04-11 Thread David Gibson
On Thu, Dec 21, 2017 at 11:12:06AM +1100, Benjamin Herrenschmidt wrote:
> On Wed, 2017-12-20 at 16:09 +1100, David Gibson wrote:
> > 
> > As you've suggested in yourself, I think we might need to more
> > explicitly model the different components of the XIVE system.  As part
> > of that, I think you need to be clearer in this base skeleton about
> > exactly what component your XIVE object represents.
> > 
> > If the answer is "the overall thing" I suspect that's not what you
> > want - I had one of those for XICs which proved to be a mistake
> > (eventually replaced by the XICSFabric interface).
> > 
> > Changing the model later isn't impossible, but doing so without
> > breaking migration can be a real pain, so I think it's worth a
> > reasonable effort to try and get it right initially.
> 
> Note: we do need to speed things up a bit, as having exploitation mode
> in KVM will significantly help with IPI performance among other things.
> 
> I'm about ready to do the KVM bits. The one thing we need to discuss
> and figure a good design for is how we map all those interrupt control
> pages into qemu.
> 
> Each interrupt (either PCIe pass-through or the "generic XIVE IPIs"
> which are used for guest IPIs and for vio/virtio/emulated interrupts)
> comes with a "control page" (ESB page) which needs to be mapped into
> the guest, and the generic IPIs also come with a trigger page which
> needs to be mapped into the guest for guest IPIs or OpenCAPI
> interrupts, or just qemu for emulated devices.
> 
> Now that can be thousands of these critters. I certainly don't want to
> create thousands of VMAs in qemu and even less thousands of memory
> regions in KVM.
> 
> So we need some kind of mechanism by wich a single large VMA gets
> mmap'ed into qemu (or maybe a couple of these, but not too many) and
> the interrupt pages can be assigned to slots in there and demand
> faulted.

Ok, I see your point.  We'll definitely need to be able to map things
in as a block, rather than one by one.

> For the generic interrupts, this can probably be covered by KVM, adding
> some arch ioctls for allocating IPIs and mmap'ing that region etc...
> 
> For pass-through, it's trickier, we don't want to mmap each irqfd
> individually for the above reason, so we want to "link" them to KVM. We
> don't want to allow qemu to take control of any arbitrary interrupt in
> the system though, so it has to related to the ownership of the irqfd
> coming from vfio.
> 
> OpenCAPI I suspect will be its own can of worms...
> 
> Also, have we decided how the process of switching between XICS and
> XIVE will work vs. CAS ? And how that will interact with KVM ? I was
> thinking the kernel would implement a different KVM device type, ie
> the "emulated XICS" would remain KVM_DEV_TYPE_XICS and XIVE would be
> KVM_DEV_TYPE_XIVE.
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2 02/19] spapr: introduce a skeleton for the XIVE interrupt controller

2018-04-11 Thread David Gibson
On Wed, Jan 17, 2018 at 03:39:46PM +0100, Cédric Le Goater wrote:
> On 01/17/2018 12:10 PM, Benjamin Herrenschmidt wrote:
> > On Wed, 2018-01-17 at 10:18 +0100, Cédric Le Goater wrote:
> > Also, have we decided how the process of switching between XICS and
> > XIVE will work vs. CAS ? 
> 
>  That's how it is described in the architecture. The current choice is
>  to create both XICS and XIVE objects and choose at CAS which one to
>  use. It relies today on the capability of the pseries machine to 
>  allocate IRQ numbers for both interrupt controller backends. These
>  patches have been merged in QEMU.
> 
>  A change of interrupt mode results in a reset. The device tree is 
>  populated accordingly and the ICPs are switched for the model in 
>  use. 
> >>>
> >>> For KVM we need to only instanciate one of them though.
> >>
> >> Hmm,
> >>
> >> How would we handle a guest rebooting on a kernel without XIVE support ? 
> > 
> > It will do CAS again and we can change the devices.
> 
> So, we would destroy the previous QEMU ICS object and create a new one 
> in the CAS hcall. That would probably work. There might be some issues 
> in creating and destroying the ICS KVM device, but that can be studied 
> without XIVE.

Adding and removing devices at runtime based on guest requests like
this will get really hairy in qemu.

As I've said before for the first cut, I think we want to select just
one as a machine option to avoid this confusion.

Looking further ahead, I think we'll be better off having both the
XIVE and XICS models always present (at least minimally) in qemu, but
with only one "active" at any given time.

Note that having the inactive one destroy and clean up the
corresponding KVM devices is fine, as is deallocating as much of its
runtime state as we can without changing the notional QOM tree.

> 
> It used to be considered ugly to create a QEMU device at reset time, so 
> I wonder if this is still the case, because when the machine reaches CAS, 
> we really are beyond reset.   
> 
> If this is OK, then the next "issue" is to keep in sync the allocated 
> IRQ numbers. The IRQ allocator is now merged at the machine level, so 
> the synchronization is obvious to do when both backend QEMU objects 
> are available. that's the path I took. If both QEMU objects are not 
> available, then we need to scan the IRQ number space in the current 
> interrupt mode and allocate the same IRQs in the newly negotiated mode. 
> Probably OK. I don't see major problems with the current code. 
> 
> Migration is a problem. We will need both backend QEMU objects to be 
> available anyhow if we want to migrate. So we are back to the current 
> solution creating both QEMU objects but we can try to defer some of the 
> KVM inits and create the KVM device on demand at CAS time.
> 
> The next problem is the ICP object that currently needs the KVM device 
> fd to connect the vcpus ... So, we will need to change that also. 
> That is probably the biggest problem today. We need a way to disconnect 
> the vpcu from the KVM device and see how we can defer the connection.
> I need to make sure this is possible, I can check that without XIVE
> I think.
> 
> >> Are you suggesting to create the XICS or XIVE device in the CAS 
> >> negotiation 
> >> process ? So, the machine would not have any interrupt controller before 
> >> CAS. That seems really late to me. grub uses the console for instance. 
> > 
> > We start with XICS by default.
> 
> yes.
> 
> >> I think it should prepare for both options, start in XIVE legacy mode, 
> >> which is XICS, then possibly switch to XIVE exploitation mode.
> >>
> > And how that will interact with KVM ? 
> 
>  I expect we will do the same, which is to create two KVM devices to 
>  be able to handle both interrupt controller backends depending on the 
>  mode negotiated by the guest.  
> >>>
> >>> That will be an ungodly mess, I'd rather we only instanciate the right
> >>> one.
> >>
> >> It's rather transparent currently in the emulated version. There are two 
> >> sets of objects in QEMU, switching is done in CAS. KVM support should not 
> >> change anything in that area. 
> >>
> >> I expect the 'xive-kvm' object to get/set states for migration, just like 
> >> for XICS and to setup the ESB+TIMA memory regions, which is new. 
> > 
> > But both XICS and XIVE are completely different kernel KVM devices that will
> > need to "hook" into the same set of internal hooks for things like 
> > interrupts
> > being passed through, RTAS calls etc... 
> > 
> > How does KVM knows which one to "activate" ?
> 
> Can't we add an extra IRQ type and use vcpu->arch.irq_type for that ? 
> I haven't studied all the low level details though.
> 
> > I don't think the kernel should have both. 
> 
> I hear that. From a QEMU perspective, it is much easier to put everything 
> in place for both interrupt modes and let the guest decide what it wants 
> to use. 
> 
> 

Re: [Qemu-devel] [Qemu-arm] Crash when running hello-world unikernel for ARM

2018-04-11 Thread Ajay Garg
Is "integratorcp" the same board that rumprun is being built for
(https://github.com/rumpkernel/rumprun/tree/master/platform/hw/arch/arm/integrator)?

On Thu, Apr 12, 2018 at 9:56 AM, Ajay Garg  wrote:
> Actually just realised that qemu does support integratorcp as one of
> the supported-boards.
>
> Unfortunately, when I use
>   qemu-system-arm -machine integratorcp -nographic -kernel helloer.bin
> the shell just hangs :(
>
>
> Following are the details when run through gdb :
>
> ##
> GNU gdb (Debian 7.7.1+dfsg-5) 7.7.1
> Copyright (C) 2014 Free Software Foundation, Inc.
> License GPLv3+: GNU GPL version 3 or later 
> This is free software: you are free to change and redistribute it.
> There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
> and "show warranty" for details.
> This GDB was configured as "arm-linux-gnueabihf".
> Type "show configuration" for configuration details.
> For bug reporting instructions, please see:
> .
> Find the GDB manual and other documentation resources online at:
> .
> For help, type "help".
> Type "apropos word" to search for commands related to "word"...
> Reading symbols from qemu-system-arm...(no debugging symbols found)...done.
> (gdb) r
> Starting program: /usr/bin/qemu-system-arm -machine integratorcp
> -nographic -kernel helloer.bin
> [Thread debugging using libthread_db enabled]
> Using host libthread_db library "/lib/arm-linux-gnueabihf/libthread_db.so.1".
> [New Thread 0xb388f290 (LWP 596)]
>
> Program received signal SIGUSR1, User defined signal 1.
> [Switching to Thread 0xb388f290 (LWP 596)]
> memset () at ../ports/sysdeps/arm/memset.S:50
> 50../ports/sysdeps/arm/memset.S: No such file or directory.
> (gdb) bt
> #0  memset () at ../ports/sysdeps/arm/memset.S:50
> #1  0xb65e1c6c in ?? () from /lib/arm-linux-gnueabihf/libglib-2.0.so.0
> Backtrace stopped: previous frame identical to this frame (corrupt stack?)
> (gdb)
> ##
>
> On Wed, Apr 11, 2018 at 12:49 PM, Ajay Garg  wrote:
>> Hi All.
>>
>> Just wondering if there is something specific that needs to changed at
>> https://github.com/rumpkernel/rumprun/tree/master/platform/hw/arch/arm/integrator
>> in order to get a hello-world app run on "virt" machine?
>>
>> If so, I would request the rumprun-guys to kindly throw in some light,
>> on what needs to be done in order to have something run on a "virt"
>> machine in qemu-context.
>>
>>
>> Thanks and Regards,
>> Ajay
>>
>> On Tue, Apr 10, 2018 at 3:49 PM, Ajay Garg  wrote:
>>> Thanks Peter .. my sincere gratitude.
>>> You pin-pointed the real issue ..
>>>
>>>
>>>
>>> On Tue, Apr 10, 2018 at 2:50 PM, Peter Maydell  
>>> wrote:
 On 10 April 2018 at 09:16, Ajay Garg  wrote:
> On Tue, Apr 10, 2018 at 1:08 PM, Peter Maydell  
> wrote:
>> What hardware (what CPU, board, etc) is this "rumprun" software
>> expecting to run on?
>
> Yep, just to ensure that there are no cross-compiling issues, I am
> building rumprun on the pseudo-real hardware itself.
> In our case, the pseudo-real hardware are :
>
> a)
> An ARM32 "virt" hardware/machine in a qemu environment
> (https://translatedcode.wordpress.com/2016/11/03/installing-debian-on-qemus-32-bit-arm-virt-board/)
>
> Once I start  this machine, all environment is arm32, and I compile
> rumprun within this environemnt without any cross-compiling.
>
> b)
> A beaglebone-green-wireless.
> This is a arm32 machine bottoms-up, so no question of cross-compiling
> whatsoever here :)
>
> In both cases, I then use qemu-system-arm (on the "virt" machine, and
> beaglebone-green-wireless itself).

 That's telling me what setups you're trying to compile in,
 which doesn't correspond necessarily to what the guest
 OS is built to run on.

> One query : It is apparent that there is nested qemu-virtualization in
> step a), could that be an issue?

 Why are you running this in a nested setup? I don't understand
 the purpose of doing that. It would be simpler and faster to
 just run the guest on a QEMU running in your native host system.

 Assuming this is the source for the guest you're trying to run:

 https://github.com/rumpkernel/rumprun/tree/master/platform/hw/arch

 that suggests that the only Arm board it supports is "integrator"
 (which is an absolutely ancient devboard with very little memory,
 no PCI and no virtio support). You need to confirm what Arm hardware
 this 'rumpkernel' is actually intended to run on, and then give QEMU

Re: [Qemu-devel] [Qemu-arm] Crash when running hello-world unikernel for ARM

2018-04-11 Thread Ajay Garg
Actually just realised that qemu does support integratorcp as one of
the supported-boards.

Unfortunately, when I use
  qemu-system-arm -machine integratorcp -nographic -kernel helloer.bin
the shell just hangs :(


Following are the details when run through gdb :

##
GNU gdb (Debian 7.7.1+dfsg-5) 7.7.1
Copyright (C) 2014 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "arm-linux-gnueabihf".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
.
Find the GDB manual and other documentation resources online at:
.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from qemu-system-arm...(no debugging symbols found)...done.
(gdb) r
Starting program: /usr/bin/qemu-system-arm -machine integratorcp
-nographic -kernel helloer.bin
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/arm-linux-gnueabihf/libthread_db.so.1".
[New Thread 0xb388f290 (LWP 596)]

Program received signal SIGUSR1, User defined signal 1.
[Switching to Thread 0xb388f290 (LWP 596)]
memset () at ../ports/sysdeps/arm/memset.S:50
50../ports/sysdeps/arm/memset.S: No such file or directory.
(gdb) bt
#0  memset () at ../ports/sysdeps/arm/memset.S:50
#1  0xb65e1c6c in ?? () from /lib/arm-linux-gnueabihf/libglib-2.0.so.0
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
(gdb)
##

On Wed, Apr 11, 2018 at 12:49 PM, Ajay Garg  wrote:
> Hi All.
>
> Just wondering if there is something specific that needs to changed at
> https://github.com/rumpkernel/rumprun/tree/master/platform/hw/arch/arm/integrator
> in order to get a hello-world app run on "virt" machine?
>
> If so, I would request the rumprun-guys to kindly throw in some light,
> on what needs to be done in order to have something run on a "virt"
> machine in qemu-context.
>
>
> Thanks and Regards,
> Ajay
>
> On Tue, Apr 10, 2018 at 3:49 PM, Ajay Garg  wrote:
>> Thanks Peter .. my sincere gratitude.
>> You pin-pointed the real issue ..
>>
>>
>>
>> On Tue, Apr 10, 2018 at 2:50 PM, Peter Maydell  
>> wrote:
>>> On 10 April 2018 at 09:16, Ajay Garg  wrote:
 On Tue, Apr 10, 2018 at 1:08 PM, Peter Maydell  
 wrote:
> What hardware (what CPU, board, etc) is this "rumprun" software
> expecting to run on?

 Yep, just to ensure that there are no cross-compiling issues, I am
 building rumprun on the pseudo-real hardware itself.
 In our case, the pseudo-real hardware are :

 a)
 An ARM32 "virt" hardware/machine in a qemu environment
 (https://translatedcode.wordpress.com/2016/11/03/installing-debian-on-qemus-32-bit-arm-virt-board/)

 Once I start  this machine, all environment is arm32, and I compile
 rumprun within this environemnt without any cross-compiling.

 b)
 A beaglebone-green-wireless.
 This is a arm32 machine bottoms-up, so no question of cross-compiling
 whatsoever here :)

 In both cases, I then use qemu-system-arm (on the "virt" machine, and
 beaglebone-green-wireless itself).
>>>
>>> That's telling me what setups you're trying to compile in,
>>> which doesn't correspond necessarily to what the guest
>>> OS is built to run on.
>>>
 One query : It is apparent that there is nested qemu-virtualization in
 step a), could that be an issue?
>>>
>>> Why are you running this in a nested setup? I don't understand
>>> the purpose of doing that. It would be simpler and faster to
>>> just run the guest on a QEMU running in your native host system.
>>>
>>> Assuming this is the source for the guest you're trying to run:
>>>
>>> https://github.com/rumpkernel/rumprun/tree/master/platform/hw/arch
>>>
>>> that suggests that the only Arm board it supports is "integrator"
>>> (which is an absolutely ancient devboard with very little memory,
>>> no PCI and no virtio support). You need to confirm what Arm hardware
>>> this 'rumpkernel' is actually intended to run on, and then give QEMU
>>> the right command line arguments to emulate that hardware. I can't
>>> really help any further, I'm afraid -- you need somebody who knows
>>> about this guest OS.
>>>
>>> thanks
>>> -- PMM
>>
>>
>>
>> --
>> Regards,
>> Ajay
>
>
>
> --
> Regards,
> Ajay



-- 
Regards,
Ajay



Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Michael S. Tsirkin
On Thu, Apr 12, 2018 at 11:43:41AM +0800, Jason Wang wrote:
> 
> 
> On 2018年04月12日 11:35, Michael S. Tsirkin wrote:
> > > There are more advantages to using request with PASID:
> > > 
> > > You can use hardware support for nesting, having guest supply 1st level
> > > translation and host second level translation.
> > > 
> > > I actually had an idea to do something like this for AMD
> > > and ARM which support nesting even for requests with PASID,
> > > having intel benefit too would be nice.
> > Something else to consider is implementing PRS capability.
> > 
> > 
> > In theory this could then go like this:
> > 
> > - get page request from device
> > - fetch request from VTD page tables
> > - use response to issue a page response message
> > 
> > 
> > This would match the current vhost-user model.
> 
> This requires IOMMU driver can forward this to VFIO and then VFIO can
> forward it to userspace. Looks like a lot of changes and it would be even
> slower than what is proposed in this patch.
> 
> Thanks


It would work better for a static table as only accessed pages would
need to be sent.  Slower for the dynamic case but dynamic case needs
hardware support to work properly in any case.

-- 
MST



Re: [Qemu-devel] [PATCH 0/5] Enable postcopy RDMA live migration

2018-04-11 Thread 858585 jemmy
On Wed, Apr 11, 2018 at 8:29 PM, Dr. David Alan Gilbert
 wrote:
> * Lidong Chen (jemmy858...@gmail.com) wrote:
>> Current Qemu RDMA communication does not support send and receive
>> data at the same time, so when RDMA live migration with postcopy
>> enabled, the source qemu return path thread get qemu file error.
>>
>> Those patch add the postcopy support for RDMA live migration.
>
> This description is a little misleading; it doesn't really
> do RDMA during the postcopy phase - what it really does is disable
> the RDMA page sending during the postcopy phase, relying on the
> RDMA codes stream emulation to send the page.

Hi Dave:
I will modify the description in next version patch.

>
> That's not necessarily a bad fix; you get the nice performance of RDMA
> during the precopy phase, but how bad are you finding the performance
> during the postcopy phase - the RDMA code we have was only really
> designed for sending small commands over the stream?

I have not finished the performance test. There are three choices for RDMA
migration during the postcopy phase.

1. RDMA SEND operation from the source qemu
2. RDMA Write with Immediate from the source qemu
3. RDMA READ from the destination qemu

In theory, RDMA READ from the destination qemu is the best way.
But I think it's better to make choice base on the performance result.
I will send the performance result later.

If use another way during the postcopy phase, it will a big change for the code.
This patch just make postcopy works, and i will send another patch to
improve the performance.

Thanks.

>
> Dave
>
>> Lidong Chen (5):
>>   migration: create a dedicated connection for rdma return path
>>   migration: add the interface to set get_return_path
>>   migration: implement the get_return_path for RDMA iochannel
>>   migration: fix qemu carsh when RDMA live migration
>>   migration: disable RDMA WRITR after postcopy started.
>>
>>  migration/qemu-file-channel.c |  12 ++--
>>  migration/qemu-file.c |  13 +++-
>>  migration/qemu-file.h |   2 +-
>>  migration/rdma.c  | 148 
>> --
>>  4 files changed, 163 insertions(+), 12 deletions(-)
>>
>> --
>> 1.8.3.1
>>
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Jason Wang



On 2018年04月12日 11:37, Michael S. Tsirkin wrote:

Yeah, we are still using the existing IOTLB update messages
to send the IOTLB messages to backend. The only difference
is that, QEMU won't wait for the queries before sending the
IOTLB update messages.

Yes, my question is not very clear. I mean why must need a new feature bit?
It looks to me qemu code can work without this.

Thanks

Generally we avoid adding new messages without a protocol feature bit.
While careful analysis might sometimes prove it's not a strict
requirement, it's just overall a clean and robust approach.


Right but the looks like the patch does not introduce any new type of
messages.

Thanks

In this case remote needs to know that it will send these messages.

-- MST


Ok, if some backend does not expect qemu will send without any query 
from itself, I agree we need a new bit.


But it looks more like a workaround for buggy backend, at least vhost 
kernel does not need to know about this.


Thanks





Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Jason Wang



On 2018年04月12日 11:35, Michael S. Tsirkin wrote:

There are more advantages to using request with PASID:

You can use hardware support for nesting, having guest supply 1st level
translation and host second level translation.

I actually had an idea to do something like this for AMD
and ARM which support nesting even for requests with PASID,
having intel benefit too would be nice.

Something else to consider is implementing PRS capability.


In theory this could then go like this:

- get page request from device
- fetch request from VTD page tables
- use response to issue a page response message


This would match the current vhost-user model.


This requires IOMMU driver can forward this to VFIO and then VFIO can 
forward it to userspace. Looks like a lot of changes and it would be 
even slower than what is proposed in this patch.


Thanks



Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Michael S. Tsirkin
On Thu, Apr 12, 2018 at 11:37:35AM +0800, Jason Wang wrote:
> 
> 
> On 2018年04月12日 09:57, Michael S. Tsirkin wrote:
> > On Thu, Apr 12, 2018 at 09:39:43AM +0800, Tiwei Bie wrote:
> > > On Thu, Apr 12, 2018 at 04:29:29AM +0300, Michael S. Tsirkin wrote:
> > > > On Thu, Apr 12, 2018 at 09:10:59AM +0800, Tiwei Bie wrote:
> > > > > On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:
> > > > > > On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:
> > > > > > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > > > > feature for vhost-user. By default, vhost-user backend needs
> > > > > > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > > > > > With this protocol feature negotiated, QEMU will provide all
> > > > > > > the IOTLBs to vhost-user backend without waiting for the
> > > > > > > queries from backend. This is helpful when using a hardware
> > > > > > > accelerator which is not able to handle unknown IOVAs at the
> > > > > > > vhost-user backend.
> > > > > > > 
> > > > > > > Signed-off-by: Tiwei Bie 
> > > > > > This is potentially a large amount of data to be sent
> > > > > > on a socket.
> > > > > If we take the hardware accelerator out of this picture, we
> > > > > will find that it's actually a question of "pre-loading" vs
> > > > > "lazy-loading". I think neither of them is perfect.
> > > > > 
> > > > > For "pre-loading", as you said, we may have a tough starting.
> > > > > But for "lazy-loading", we can't have a predictable performance.
> > > > > A sudden, unexpected performance drop may happen at any time,
> > > > > because we may meet an unknown IOVA at any time in this case.
> > > > That's how hardware behaves too though. So we can expect guests
> > > > to try to optimize locality.
> > > The difference is that, the software implementation needs to
> > > query the mappings via socket. And it's much slower..
> > If you are proposing this new feature as an optimization,
> > then I'd like to see numbers showing the performance gains.
> > 
> > It's definitely possible to optimize things out.  Pre-loading isn't
> > where I would start optimizing though.  For example, DPDK could have its
> > own VTD emulation, then it could access guest memory directly.
> 
> Have vtd emulation in dpdk have many disadvantages:
> 
> - vendor locked, can only work for intel

I don't see what would prevent other vendors from doing the same.

> - duplication of codes and bugs
> - a huge number of new message types needs to be invented

Oh, just the flush I'd wager.

> So I tend to go to a reverse way, link dpdk to qemu.

Won't really help as people want to build software using dpdk.


> > 
> > 
> > > > > Once we meet an unknown IOVA, the backend's data path will need
> > > > > to stop and query the mapping of the IOVA via the socket and
> > > > > wait for the reply. And the latency is not negligible (sometimes
> > > > > it's even unacceptable), especially in high performance network
> > > > > case. So maybe it's better to make both of them available to
> > > > > the vhost backend.
> > > > > 
> > > > > > I had an impression that a hardware accelerator was using
> > > > > > VFIO anyway. Given this, can't we have QEMU program
> > > > > > the shadow IOMMU tables into VFIO directly?
> > > > > I think it's a good idea! Currently, my concern about it is
> > > > > that, the hardware device may not use IOMMU and it may have
> > > > > its builtin address translation unit. And it will be a pain
> > > > > for device vendors to teach VFIO to be able to work with the
> > > > > builtin address translation unit.
> > > > I think such drivers would have to interate with VFIO somehow.
> > > > Otherwise, what is the plan for assigning such devices then?
> > > Such devices are just for vhost data path acceleration.
> > That's not true I think.  E.g. RDMA devices have an on-card MMU.
> > 
> > > They have many available queue pairs, the switch logic
> > > will be done among those queue pairs. And different queue
> > > pairs will serve different VMs directly.
> > > 
> > > Best regards,
> > > Tiwei Bie
> > The way I would do it is attach different PASID values to
> > different queues. This way you can use the standard IOMMU
> > to enforce protection.
> 
> So that's just shared virtual memory on host which can share iova address
> space between a specific queue pair and a process. I'm not sure how hard can
> exist vhost-user backend to support this.
> 
> Thanks

That would be VFIO's job, nothing to do with vhost-user besides
sharing the VFIO descriptor.

> > 
> > 
> > 
> > > > 
> > > > > Best regards,
> > > > > Tiwei Bie
> > > > > 
> > > > > > 
> > > > > > > ---
> > > > > > > The idea of this patch is to let QEMU push all the IOTLBs
> > > > > > > to vhost-user backend without waiting for the queries from
> > > > > > > the backend. Because hardware accelerator at the vhost-user
> > > > > > > backend may not be able to handle unknown IOVAs.
> > > > > > > 
> > > > > > > This 

Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Jason Wang



On 2018年04月12日 09:57, Michael S. Tsirkin wrote:

On Thu, Apr 12, 2018 at 09:39:43AM +0800, Tiwei Bie wrote:

On Thu, Apr 12, 2018 at 04:29:29AM +0300, Michael S. Tsirkin wrote:

On Thu, Apr 12, 2018 at 09:10:59AM +0800, Tiwei Bie wrote:

On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:

On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:

This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
feature for vhost-user. By default, vhost-user backend needs
to query the IOTLBs from QEMU after meeting unknown IOVAs.
With this protocol feature negotiated, QEMU will provide all
the IOTLBs to vhost-user backend without waiting for the
queries from backend. This is helpful when using a hardware
accelerator which is not able to handle unknown IOVAs at the
vhost-user backend.

Signed-off-by: Tiwei Bie 

This is potentially a large amount of data to be sent
on a socket.

If we take the hardware accelerator out of this picture, we
will find that it's actually a question of "pre-loading" vs
"lazy-loading". I think neither of them is perfect.

For "pre-loading", as you said, we may have a tough starting.
But for "lazy-loading", we can't have a predictable performance.
A sudden, unexpected performance drop may happen at any time,
because we may meet an unknown IOVA at any time in this case.

That's how hardware behaves too though. So we can expect guests
to try to optimize locality.

The difference is that, the software implementation needs to
query the mappings via socket. And it's much slower..

If you are proposing this new feature as an optimization,
then I'd like to see numbers showing the performance gains.

It's definitely possible to optimize things out.  Pre-loading isn't
where I would start optimizing though.  For example, DPDK could have its
own VTD emulation, then it could access guest memory directly.


Have vtd emulation in dpdk have many disadvantages:

- vendor locked, can only work for intel
- duplication of codes and bugs
- a huge number of new message types needs to be invented

So I tend to go to a reverse way, link dpdk to qemu.





Once we meet an unknown IOVA, the backend's data path will need
to stop and query the mapping of the IOVA via the socket and
wait for the reply. And the latency is not negligible (sometimes
it's even unacceptable), especially in high performance network
case. So maybe it's better to make both of them available to
the vhost backend.


I had an impression that a hardware accelerator was using
VFIO anyway. Given this, can't we have QEMU program
the shadow IOMMU tables into VFIO directly?

I think it's a good idea! Currently, my concern about it is
that, the hardware device may not use IOMMU and it may have
its builtin address translation unit. And it will be a pain
for device vendors to teach VFIO to be able to work with the
builtin address translation unit.

I think such drivers would have to interate with VFIO somehow.
Otherwise, what is the plan for assigning such devices then?

Such devices are just for vhost data path acceleration.

That's not true I think.  E.g. RDMA devices have an on-card MMU.


They have many available queue pairs, the switch logic
will be done among those queue pairs. And different queue
pairs will serve different VMs directly.

Best regards,
Tiwei Bie

The way I would do it is attach different PASID values to
different queues. This way you can use the standard IOMMU
to enforce protection.


So that's just shared virtual memory on host which can share iova 
address space between a specific queue pair and a process. I'm not sure 
how hard can exist vhost-user backend to support this.


Thanks








Best regards,
Tiwei Bie




---
The idea of this patch is to let QEMU push all the IOTLBs
to vhost-user backend without waiting for the queries from
the backend. Because hardware accelerator at the vhost-user
backend may not be able to handle unknown IOVAs.

This is just a RFC for now. It seems that, it doesn't work
as expected when guest is using kernel driver (To handle
this case, it seems that some RAM regions' events also need
to be listened). Any comments would be appreciated! Thanks!

  docs/interop/vhost-user.txt   |  9 
  hw/virtio/vhost-backend.c |  7 ++
  hw/virtio/vhost-user.c|  8 +++
  hw/virtio/vhost.c | 47 ---
  include/hw/virtio/vhost-backend.h |  3 +++
  5 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index 534caab18a..73e07f9dad 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -380,6 +380,7 @@ Protocol features
  #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
  #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
  #define VHOST_USER_PROTOCOL_F_CONFIG 9
+#define VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB 10
  
  Master message types

  
@@ -797,3 +798,11 @@ resilient for 

Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Michael S. Tsirkin
On Thu, Apr 12, 2018 at 11:23:31AM +0800, Jason Wang wrote:
> 
> 
> On 2018年04月12日 01:00, Michael S. Tsirkin wrote:
> > On Wed, Apr 11, 2018 at 09:41:05PM +0800, Jason Wang wrote:
> > > On 2018年04月11日 16:38, Tiwei Bie wrote:
> > > > On Wed, Apr 11, 2018 at 04:01:19PM +0800, Jason Wang wrote:
> > > > > On 2018年04月11日 15:20, Tiwei Bie wrote:
> > > > > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > > > feature for vhost-user. By default, vhost-user backend needs
> > > > > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > > > > With this protocol feature negotiated, QEMU will provide all
> > > > > > the IOTLBs to vhost-user backend without waiting for the
> > > > > > queries from backend. This is helpful when using a hardware
> > > > > > accelerator which is not able to handle unknown IOVAs at the
> > > > > > vhost-user backend.
> > > > > > 
> > > > > > Signed-off-by: Tiwei Bie
> > > > > > ---
> > > > > > The idea of this patch is to let QEMU push all the IOTLBs
> > > > > > to vhost-user backend without waiting for the queries from
> > > > > > the backend. Because hardware accelerator at the vhost-user
> > > > > > backend may not be able to handle unknown IOVAs.
> > > > > > 
> > > > > > This is just a RFC for now. It seems that, it doesn't work
> > > > > > as expected when guest is using kernel driver (To handle
> > > > > > this case, it seems that some RAM regions' events also need
> > > > > > to be listened). Any comments would be appreciated! Thanks!
> > > > > Interesting, a quick question is why this is needed? Can we just use 
> > > > > exist
> > > > > IOTLB update message?
> > > > Yeah, we are still using the existing IOTLB update messages
> > > > to send the IOTLB messages to backend. The only difference
> > > > is that, QEMU won't wait for the queries before sending the
> > > > IOTLB update messages.
> > > Yes, my question is not very clear. I mean why must need a new feature 
> > > bit?
> > > It looks to me qemu code can work without this.
> > > 
> > > Thanks
> > Generally we avoid adding new messages without a protocol feature bit.
> > While careful analysis might sometimes prove it's not a strict
> > requirement, it's just overall a clean and robust approach.
> > 
> 
> Right but the looks like the patch does not introduce any new type of
> messages.
> 
> Thanks

In this case remote needs to know that it will send these messages.

-- 
MST



Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Michael S. Tsirkin
On Thu, Apr 12, 2018 at 06:20:55AM +0300, Michael S. Tsirkin wrote:
> On Thu, Apr 12, 2018 at 10:35:05AM +0800, Tiwei Bie wrote:
> > On Thu, Apr 12, 2018 at 04:57:13AM +0300, Michael S. Tsirkin wrote:
> > > On Thu, Apr 12, 2018 at 09:39:43AM +0800, Tiwei Bie wrote:
> > > > On Thu, Apr 12, 2018 at 04:29:29AM +0300, Michael S. Tsirkin wrote:
> > > > > On Thu, Apr 12, 2018 at 09:10:59AM +0800, Tiwei Bie wrote:
> > > > > > On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:
> > > > > > > On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:
> > > > > > > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > > > > > feature for vhost-user. By default, vhost-user backend needs
> > > > > > > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > > > > > > With this protocol feature negotiated, QEMU will provide all
> > > > > > > > the IOTLBs to vhost-user backend without waiting for the
> > > > > > > > queries from backend. This is helpful when using a hardware
> > > > > > > > accelerator which is not able to handle unknown IOVAs at the
> > > > > > > > vhost-user backend.
> > > > > > > > 
> > > > > > > > Signed-off-by: Tiwei Bie 
> > > > > > > 
> > > > > > > This is potentially a large amount of data to be sent
> > > > > > > on a socket.
> > > > > > 
> > > > > > If we take the hardware accelerator out of this picture, we
> > > > > > will find that it's actually a question of "pre-loading" vs
> > > > > > "lazy-loading". I think neither of them is perfect.
> > > > > > 
> > > > > > For "pre-loading", as you said, we may have a tough starting.
> > > > > > But for "lazy-loading", we can't have a predictable performance.
> > > > > > A sudden, unexpected performance drop may happen at any time,
> > > > > > because we may meet an unknown IOVA at any time in this case.
> > > > > 
> > > > > That's how hardware behaves too though. So we can expect guests
> > > > > to try to optimize locality.
> > > > 
> > > > The difference is that, the software implementation needs to
> > > > query the mappings via socket. And it's much slower..
> > > 
> > > If you are proposing this new feature as an optimization,
> > > then I'd like to see numbers showing the performance gains.
> > > 
> > > It's definitely possible to optimize things out.  Pre-loading isn't
> > > where I would start optimizing though.  For example, DPDK could have its
> > > own VTD emulation, then it could access guest memory directly.
> > > 
> > > 
> > > > > 
> > > > > > Once we meet an unknown IOVA, the backend's data path will need
> > > > > > to stop and query the mapping of the IOVA via the socket and
> > > > > > wait for the reply. And the latency is not negligible (sometimes
> > > > > > it's even unacceptable), especially in high performance network
> > > > > > case. So maybe it's better to make both of them available to
> > > > > > the vhost backend.
> > > > > > 
> > > > > > > 
> > > > > > > I had an impression that a hardware accelerator was using
> > > > > > > VFIO anyway. Given this, can't we have QEMU program
> > > > > > > the shadow IOMMU tables into VFIO directly?
> > > > > > 
> > > > > > I think it's a good idea! Currently, my concern about it is
> > > > > > that, the hardware device may not use IOMMU and it may have
> > > > > > its builtin address translation unit. And it will be a pain
> > > > > > for device vendors to teach VFIO to be able to work with the
> > > > > > builtin address translation unit.
> > > > > 
> > > > > I think such drivers would have to interate with VFIO somehow.
> > > > > Otherwise, what is the plan for assigning such devices then?
> > > > 
> > > > Such devices are just for vhost data path acceleration.
> > > 
> > > That's not true I think.  E.g. RDMA devices have an on-card MMU.
> > > 
> > > > They have many available queue pairs, the switch logic
> > > > will be done among those queue pairs. And different queue
> > > > pairs will serve different VMs directly.
> > > > 
> > > > Best regards,
> > > > Tiwei Bie
> > > 
> > > The way I would do it is attach different PASID values to
> > > different queues. This way you can use the standard IOMMU
> > > to enforce protection.
> > 
> > I'm thinking about the case that device wants to use its
> > builtin address translation, although I'm not really sure
> > whether there will be a real product work in this way.
> > 
> > Honestly, I like your idea, and I don't object to it. I'll
> > do more investigations on it. And for the feature proposed
> > in this RFC, I just think maybe it's better to provide one
> > more possibility for the backend to support vIOMMU.
> > 
> > Anyway, the work about adding the vIOMMU support in vDPA is
> > just started few days ago. I'll do more investigations on
> > each possibility. Thanks! :)
> > 
> > Best regards,
> > Tiwei Bie
> 
> There are more advantages to using request with PASID:
> 
> You can use hardware support for nesting, having guest supply 1st level
> translation and host second 

Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Jason Wang



On 2018年04月12日 01:00, Michael S. Tsirkin wrote:

On Wed, Apr 11, 2018 at 09:41:05PM +0800, Jason Wang wrote:

On 2018年04月11日 16:38, Tiwei Bie wrote:

On Wed, Apr 11, 2018 at 04:01:19PM +0800, Jason Wang wrote:

On 2018年04月11日 15:20, Tiwei Bie wrote:

This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
feature for vhost-user. By default, vhost-user backend needs
to query the IOTLBs from QEMU after meeting unknown IOVAs.
With this protocol feature negotiated, QEMU will provide all
the IOTLBs to vhost-user backend without waiting for the
queries from backend. This is helpful when using a hardware
accelerator which is not able to handle unknown IOVAs at the
vhost-user backend.

Signed-off-by: Tiwei Bie
---
The idea of this patch is to let QEMU push all the IOTLBs
to vhost-user backend without waiting for the queries from
the backend. Because hardware accelerator at the vhost-user
backend may not be able to handle unknown IOVAs.

This is just a RFC for now. It seems that, it doesn't work
as expected when guest is using kernel driver (To handle
this case, it seems that some RAM regions' events also need
to be listened). Any comments would be appreciated! Thanks!

Interesting, a quick question is why this is needed? Can we just use exist
IOTLB update message?

Yeah, we are still using the existing IOTLB update messages
to send the IOTLB messages to backend. The only difference
is that, QEMU won't wait for the queries before sending the
IOTLB update messages.

Yes, my question is not very clear. I mean why must need a new feature bit?
It looks to me qemu code can work without this.

Thanks

Generally we avoid adding new messages without a protocol feature bit.
While careful analysis might sometimes prove it's not a strict
requirement, it's just overall a clean and robust approach.



Right but the looks like the patch does not introduce any new type of 
messages.


Thanks




Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Michael S. Tsirkin
On Thu, Apr 12, 2018 at 10:35:05AM +0800, Tiwei Bie wrote:
> On Thu, Apr 12, 2018 at 04:57:13AM +0300, Michael S. Tsirkin wrote:
> > On Thu, Apr 12, 2018 at 09:39:43AM +0800, Tiwei Bie wrote:
> > > On Thu, Apr 12, 2018 at 04:29:29AM +0300, Michael S. Tsirkin wrote:
> > > > On Thu, Apr 12, 2018 at 09:10:59AM +0800, Tiwei Bie wrote:
> > > > > On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:
> > > > > > On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:
> > > > > > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > > > > feature for vhost-user. By default, vhost-user backend needs
> > > > > > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > > > > > With this protocol feature negotiated, QEMU will provide all
> > > > > > > the IOTLBs to vhost-user backend without waiting for the
> > > > > > > queries from backend. This is helpful when using a hardware
> > > > > > > accelerator which is not able to handle unknown IOVAs at the
> > > > > > > vhost-user backend.
> > > > > > > 
> > > > > > > Signed-off-by: Tiwei Bie 
> > > > > > 
> > > > > > This is potentially a large amount of data to be sent
> > > > > > on a socket.
> > > > > 
> > > > > If we take the hardware accelerator out of this picture, we
> > > > > will find that it's actually a question of "pre-loading" vs
> > > > > "lazy-loading". I think neither of them is perfect.
> > > > > 
> > > > > For "pre-loading", as you said, we may have a tough starting.
> > > > > But for "lazy-loading", we can't have a predictable performance.
> > > > > A sudden, unexpected performance drop may happen at any time,
> > > > > because we may meet an unknown IOVA at any time in this case.
> > > > 
> > > > That's how hardware behaves too though. So we can expect guests
> > > > to try to optimize locality.
> > > 
> > > The difference is that, the software implementation needs to
> > > query the mappings via socket. And it's much slower..
> > 
> > If you are proposing this new feature as an optimization,
> > then I'd like to see numbers showing the performance gains.
> > 
> > It's definitely possible to optimize things out.  Pre-loading isn't
> > where I would start optimizing though.  For example, DPDK could have its
> > own VTD emulation, then it could access guest memory directly.
> > 
> > 
> > > > 
> > > > > Once we meet an unknown IOVA, the backend's data path will need
> > > > > to stop and query the mapping of the IOVA via the socket and
> > > > > wait for the reply. And the latency is not negligible (sometimes
> > > > > it's even unacceptable), especially in high performance network
> > > > > case. So maybe it's better to make both of them available to
> > > > > the vhost backend.
> > > > > 
> > > > > > 
> > > > > > I had an impression that a hardware accelerator was using
> > > > > > VFIO anyway. Given this, can't we have QEMU program
> > > > > > the shadow IOMMU tables into VFIO directly?
> > > > > 
> > > > > I think it's a good idea! Currently, my concern about it is
> > > > > that, the hardware device may not use IOMMU and it may have
> > > > > its builtin address translation unit. And it will be a pain
> > > > > for device vendors to teach VFIO to be able to work with the
> > > > > builtin address translation unit.
> > > > 
> > > > I think such drivers would have to interate with VFIO somehow.
> > > > Otherwise, what is the plan for assigning such devices then?
> > > 
> > > Such devices are just for vhost data path acceleration.
> > 
> > That's not true I think.  E.g. RDMA devices have an on-card MMU.
> > 
> > > They have many available queue pairs, the switch logic
> > > will be done among those queue pairs. And different queue
> > > pairs will serve different VMs directly.
> > > 
> > > Best regards,
> > > Tiwei Bie
> > 
> > The way I would do it is attach different PASID values to
> > different queues. This way you can use the standard IOMMU
> > to enforce protection.
> 
> I'm thinking about the case that device wants to use its
> builtin address translation, although I'm not really sure
> whether there will be a real product work in this way.
> 
> Honestly, I like your idea, and I don't object to it. I'll
> do more investigations on it. And for the feature proposed
> in this RFC, I just think maybe it's better to provide one
> more possibility for the backend to support vIOMMU.
> 
> Anyway, the work about adding the vIOMMU support in vDPA is
> just started few days ago. I'll do more investigations on
> each possibility. Thanks! :)
> 
> Best regards,
> Tiwei Bie

There are more advantages to using request with PASID:

You can use hardware support for nesting, having guest supply 1st level
translation and host second level translation.

I actually had an idea to do something like this for AMD
and ARM which support nesting even for requests with PASID,
having intel benefit too would be nice.



> > 
> > 
> > > > 
> > > > 
> > > > > Best regards,
> > > > > Tiwei Bie
> > > > > 
> > > > > 

Re: [Qemu-devel] [PATCH for-2.13] spapr: drop useless dynamic sysbus device sanity check

2018-04-11 Thread David Gibson
On Wed, Apr 11, 2018 at 05:01:20PM +0200, Greg Kurz wrote:
> Since commit 7da79a167aa11, the machine class init function registers
> dynamic sysbus device types it supports. Passing an unsupported device
> type on the command line causes QEMU to exit with an error message
> just after machine init.
> 
> It is hence not needed to do the same sanity check at machine reset.
> 
> Signed-off-by: Greg Kurz 

Applied to ppc-for-2.13, thanks.

> ---
>  hw/ppc/spapr.c |   18 --
>  1 file changed, 18 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 3ffadd6ac7ce..637e50e6d10b 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1440,21 +1440,6 @@ void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
>  }
>  }
>  
> -static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
> -{
> -bool matched = false;
> -
> -if (object_dynamic_cast(OBJECT(sbdev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
> -matched = true;
> -}
> -
> -if (!matched) {
> -error_report("Device %s is not supported by this machine yet.",
> - qdev_fw_name(DEVICE(sbdev)));
> -exit(1);
> -}
> -}
> -
>  static int spapr_reset_drcs(Object *child, void *opaque)
>  {
>  sPAPRDRConnector *drc =
> @@ -1478,9 +1463,6 @@ static void spapr_machine_reset(void)
>  void *fdt;
>  int rc;
>  
> -/* Check for unknown sysbus devices */
> -foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
> -
>  spapr_caps_reset(spapr);
>  
>  first_ppc_cpu = POWERPC_CPU(first_cpu);
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH] spapr: drop useless sanity check in spapr_irq_alloc*()

2018-04-11 Thread David Gibson
On Wed, Apr 11, 2018 at 07:46:06PM +0200, Greg Kurz wrote:
> Both spapr_irq_alloc() and spapr_irq_alloc_block() have an errp
> parameter, but they don't use it if XICS hasn't been initialized
> yet.
> 
> This is doubly wrong:
> 
> - all callers do pass a non-null Error **, ie, they expect an error
>   to be propagated in case of failure
> 
> - XICS obviously needs to be initialized before anything starts allocating
>   IRQs
> 
> So this patch turns the check into an assert.
> 
> Signed-off-by: Greg Kurz 

Applied to ppc-for-2.13, thanks.

> ---
>  hw/ppc/spapr.c |9 +++--
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 4d27909fb152..799673319b06 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -3707,9 +3707,8 @@ int spapr_irq_alloc(sPAPRMachineState *spapr, int 
> irq_hint, bool lsi,
>  ICSState *ics = spapr->ics;
>  int irq;
>  
> -if (!ics) {
> -return -1;
> -}
> +assert(ics);
> +
>  if (irq_hint) {
>  if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
>  error_setg(errp, "can't allocate IRQ %d: already in use", 
> irq_hint);
> @@ -3741,9 +3740,7 @@ int spapr_irq_alloc_block(sPAPRMachineState *spapr, int 
> num, bool lsi,
>  ICSState *ics = spapr->ics;
>  int i, first = -1;
>  
> -if (!ics) {
> -return -1;
> -}
> +assert(ics);
>  
>  /*
>   * MSIMesage::data is used for storing VIRQ so
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH for 2.13 v3 0/2] target/ppc: Support adding memory to initially memory-less NUMA nodes

2018-04-11 Thread David Gibson
On Wed, Apr 11, 2018 at 02:41:58PM -0400, Serhii Popovych wrote:
> Now PowerPC Linux kernel supports hot-add to NUMA nodes not populated
> initially with memory we can enable such support in qemu. This requires
> two changes:
> 
>   o Add device tree property "ibm,max-associativity-domains" to let
> guest kernel chance to find max possible NUMA node
> 
>   o Revert  commit b556854bd852 ("spapr: Don't allow memory hotplug to
> memory less nodes") to remove check for hot-add to memory-less node.
> 
> See description messges for individual changes for more details.
> 
> v3:
>   - Make layer for max_cpus unspecified instead of setting it to zero.
> Not adding cpu_to_be32(spapr_vcpu_id(spapr, max_cpus - 1)) because
> at the moment we only want max number for numa nodes to enable feat.
>   - Rebase to current state of master branch.
> 
> v2:
>   - Reorder patches in series according to description above.
>   - Add extra coment to revert noticing return to previous behaviour for
> guests without support for hot-add to empty node.
>   - Drop max_cpus from topology in property due to vcpu id discontiguous
> allocations. Thanks to David Gibson for extra explanation.
>   - Rebase to current state of master branch.
> 
> Serhii Popovych (2):
>   spapr: Add ibm,max-associativity-domains property
>   Revert "spapr: Don't allow memory hotplug to memory less nodes"

Applied to ppc-for-2.13, thanks.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v3 05/12] hw/pci: introduce PCISVAOps to PCIDevice

2018-04-11 Thread David Gibson
On Tue, Mar 06, 2018 at 06:33:52PM +0800, Liu, Yi L wrote:
> On Mon, Mar 05, 2018 at 02:31:44PM +1100, David Gibson wrote:
> > On Thu, Mar 01, 2018 at 06:31:55PM +0800, Liu, Yi L wrote:
> > > This patch intoduces PCISVAOps for virt-SVA.
> > >
> > > So far, to setup virt-SVA for assigned SVA capable device, needs to
> > > config host translation structures. e.g. for VT-d, needs to set the
> > > guest pasid table to host and enable nested translation. Besides,
> > > vIOMMU emulator needs to forward guest's cache invalidation to host.
> > > On VT-d, it is guest's invalidation to 1st level translation related
> > > cache, such invalidation should be forwarded to host.
> > >
> > > Proposed PCISVAOps are:
> > > * sva_bind_guest_pasid_table: set the guest pasid table to host, and
> > >   enable nested translation in host
> > > * sva_register_notifier: register sva_notifier to forward guest's
> > >  cache invalidation to host
> > > * sva_unregister_notifier: unregister sva_notifier
> > >
> > > The PCISVAOps should be provided by vfio or modules alike. Mainly for
> > > assigned SVA capable devices.
> > >
> > > Take virt-SVA on VT-d as an exmaple:
> > > If a guest wants to setup virt-SVA for an assigned SVA capable device,
> > > it programs its context entry. vIOMMU emulator captures guest's context
> > > entry programming, and figure out the target device. vIOMMU emulator
> > > use the pci_device_sva_bind_pasid_table() API to bind the guest pasid
> > > table to host.
> > >
> > > Guest would also program its pasid table. vIOMMU emulator captures
> > > guest's pasid entry programming. In Qemu, needs to allocate an
> > > AddressSpace to stand for the pasid tagged address space and Qemu also
> > > needs to register sva_notifier to forward future cache invalidation
> > > request to host.
> > >
> > > Allocating AddressSpace to stand for the pasid tagged address space is
> > > for the emulation of emulated SVA capable devices. Emulated SVA capable
> > > devices may issue SVA aware DMAs, Qemu needs to emulate read/write to a
> > > pasid tagged AddressSpace. Thus needs an abstraction for such address
> > > space in Qemu.
> > >
> > > Signed-off-by: Liu, Yi L 
> >
> > So PCISVAOps is roughly equivalent to the cluster-of-PASIDs context I
> > was suggesting in my earlier comments,
> 
> yes, it is. The purpose is to expose pasid table bind and sva notfier
> registration/unregistration to vIOMMU emulators.
> 
> > however it's only an ops
> > structure.  That means you can't easily share a context between
> > multiple PCI devices which is unfortunate because:
> > * The simplest use case for SVA I can see would just put the
> >   same set of PASIDs into place for every SVA capable device
> 
> Do you mean for emulated SVA capable device?

Not necessarily.  I'd expect that model could be useful for both
emulated and passthrough SVA capable devices.

> > * Sometimes the IOMMU can't determine exactly what device a DMA
> >   came from.  Now the bridge cases where this applies are probably
> >   unlikely with SVA devices, but I wouldn't want to bet on it.  In
> >   addition, the chances some manufacturer will eventually put out
> >   a buggy multifunction SVA capable device that use the wrong RIDs
> >   for the secondary functions is pretty darn high.
> 
> I'm not sure I 100% got your point here. Do yu mean physical device?
> In PCIE TLP, DMA packet should have a RID field?

Yes, but that RID isn't accurate in all cases.

One case is if you have a PCIe device behind both a PCIe->PCI and
PCI->PCIe bridge.  Now obviously SVA won't work in that case, but it
would be good to at least detect it and refuse to attempt SVA.

Another case is with a buggy device that just sends the wrong RID.  In
particular there are some multifunction devices that use function 0's
RID for all functions.  Obviously that's a hardware bug and we can't
expect everything to work in this case.  But forcing all the functions
to share an SVAContext in this case - like we alreayd force them to
share an IOMMU group - allows us to reason about what will and won't work

> And it looks more like
> a hardware layer trouble. For this series, it only provides necessary
> software support to make sure guest's SVA operation is well prepared
> before the SVA device issues the SVA aware DMA. e.g. link guest's pasid
> table to host, and config iommu translation in nested mode.
> 
> >
> > So I think instead you want a cluster-of-PASIDs object which has an
> > ops table including both these and the per-PASID calls from the
> > earlier patches (but the per-PASID calls would now take an explicit
> > PASID value).
> 
> I didn't quite get "including both these and the per-PASID calls".
> What do you mean by "these"? Do you mean the PCISVAOps?

I mean that I think PCISVAOps should become a full object including an
ops table, not just an ops table.  That table would include the 

Re: [Qemu-devel] Bad icount read when running qemu-system-ppc64 and mfspr atbu guest instruction

2018-04-11 Thread Darrell Leinwand
Ah I see, that makes sense and the atb instructions are marked as unused in the 
mainline which when I compared I recall now that we modifed them because the 
guest software we are running is trying to use those special registers. 

I added the wrappers into those function, is that correct?

Thanks for the help.

Darrell

On 4/11/18, 4:39 PM, "Richard Henderson"  wrote:

On 04/12/2018 04:18 AM, Darrell Leinwand wrote:
> Hi,
> 
> When I enable icount using an e5500 core I get an exit with “Bad icount
> read” when the guest software executes a load atbu command.

Yep, it looks like there are some bugs in the ppc front end wrt icount.

In target/ppc/translate_init.c, spr_read_tbu has the proper wrappers for
integrating with icount, but a few lines lower spr_read_atbl and 
spr_read_atbu
do not.

This will probably have to wait until the 2.13 cycle to fix...


r~


PS:

> 
> CONFIDENTIALITY NOTICE: The information contained in this e-mail and any 
accompanying documents may contain information that is confidential, 
proprietary to Performance Software, or otherwise protected from disclosure. If 
you are not the intended recipient of this message, or if this message has been 
addressed to you in error, please immediately alert the sender by reply e-mail 
and then delete this message, including any attachments. Any dissemination, 
distribution or other use of the contents of this message by anyone other than 
the intended recipient is strictly prohibited.
> 

You really should strip this from your signature when posting to a public 
list.



smime.p7s
Description: S/MIME cryptographic signature


Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Tiwei Bie
On Thu, Apr 12, 2018 at 04:57:13AM +0300, Michael S. Tsirkin wrote:
> On Thu, Apr 12, 2018 at 09:39:43AM +0800, Tiwei Bie wrote:
> > On Thu, Apr 12, 2018 at 04:29:29AM +0300, Michael S. Tsirkin wrote:
> > > On Thu, Apr 12, 2018 at 09:10:59AM +0800, Tiwei Bie wrote:
> > > > On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:
> > > > > On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:
> > > > > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > > > feature for vhost-user. By default, vhost-user backend needs
> > > > > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > > > > With this protocol feature negotiated, QEMU will provide all
> > > > > > the IOTLBs to vhost-user backend without waiting for the
> > > > > > queries from backend. This is helpful when using a hardware
> > > > > > accelerator which is not able to handle unknown IOVAs at the
> > > > > > vhost-user backend.
> > > > > > 
> > > > > > Signed-off-by: Tiwei Bie 
> > > > > 
> > > > > This is potentially a large amount of data to be sent
> > > > > on a socket.
> > > > 
> > > > If we take the hardware accelerator out of this picture, we
> > > > will find that it's actually a question of "pre-loading" vs
> > > > "lazy-loading". I think neither of them is perfect.
> > > > 
> > > > For "pre-loading", as you said, we may have a tough starting.
> > > > But for "lazy-loading", we can't have a predictable performance.
> > > > A sudden, unexpected performance drop may happen at any time,
> > > > because we may meet an unknown IOVA at any time in this case.
> > > 
> > > That's how hardware behaves too though. So we can expect guests
> > > to try to optimize locality.
> > 
> > The difference is that, the software implementation needs to
> > query the mappings via socket. And it's much slower..
> 
> If you are proposing this new feature as an optimization,
> then I'd like to see numbers showing the performance gains.
> 
> It's definitely possible to optimize things out.  Pre-loading isn't
> where I would start optimizing though.  For example, DPDK could have its
> own VTD emulation, then it could access guest memory directly.
> 
> 
> > > 
> > > > Once we meet an unknown IOVA, the backend's data path will need
> > > > to stop and query the mapping of the IOVA via the socket and
> > > > wait for the reply. And the latency is not negligible (sometimes
> > > > it's even unacceptable), especially in high performance network
> > > > case. So maybe it's better to make both of them available to
> > > > the vhost backend.
> > > > 
> > > > > 
> > > > > I had an impression that a hardware accelerator was using
> > > > > VFIO anyway. Given this, can't we have QEMU program
> > > > > the shadow IOMMU tables into VFIO directly?
> > > > 
> > > > I think it's a good idea! Currently, my concern about it is
> > > > that, the hardware device may not use IOMMU and it may have
> > > > its builtin address translation unit. And it will be a pain
> > > > for device vendors to teach VFIO to be able to work with the
> > > > builtin address translation unit.
> > > 
> > > I think such drivers would have to interate with VFIO somehow.
> > > Otherwise, what is the plan for assigning such devices then?
> > 
> > Such devices are just for vhost data path acceleration.
> 
> That's not true I think.  E.g. RDMA devices have an on-card MMU.
> 
> > They have many available queue pairs, the switch logic
> > will be done among those queue pairs. And different queue
> > pairs will serve different VMs directly.
> > 
> > Best regards,
> > Tiwei Bie
> 
> The way I would do it is attach different PASID values to
> different queues. This way you can use the standard IOMMU
> to enforce protection.

I'm thinking about the case that device wants to use its
builtin address translation, although I'm not really sure
whether there will be a real product work in this way.

Honestly, I like your idea, and I don't object to it. I'll
do more investigations on it. And for the feature proposed
in this RFC, I just think maybe it's better to provide one
more possibility for the backend to support vIOMMU.

Anyway, the work about adding the vIOMMU support in vDPA is
just started few days ago. I'll do more investigations on
each possibility. Thanks! :)

Best regards,
Tiwei Bie

> 
> 
> > > 
> > > 
> > > > Best regards,
> > > > Tiwei Bie
> > > > 
> > > > > 
> > > > > 
> > > > > > ---
> > > > > > The idea of this patch is to let QEMU push all the IOTLBs
> > > > > > to vhost-user backend without waiting for the queries from
> > > > > > the backend. Because hardware accelerator at the vhost-user
> > > > > > backend may not be able to handle unknown IOVAs.
> > > > > > 
> > > > > > This is just a RFC for now. It seems that, it doesn't work
> > > > > > as expected when guest is using kernel driver (To handle
> > > > > > this case, it seems that some RAM regions' events also need
> > > > > > to be listened). Any comments would be appreciated! Thanks!

Re: [Qemu-devel] [PATCH V4] migration: add capability to bypass the shared memory

2018-04-11 Thread Lai Jiangshan
On Tue, Apr 10, 2018 at 1:30 AM, Dr. David Alan Gilbert
 wrote:
> Hi,
>
> * Lai Jiangshan (jiangshan...@gmail.com) wrote:
>> 1) What's this
>>
>> When the migration capability 'bypass-shared-memory'
>> is set, the shared memory will be bypassed when migration.
>>
>> It is the key feature to enable several excellent features for
>> the qemu, such as qemu-local-migration, qemu-live-update,
>> extremely-fast-save-restore, vm-template, vm-fast-live-clone,
>> yet-another-post-copy-migration, etc..
>>
>> The philosophy behind this key feature, including the resulting
>> advanced key features, is that a part of the memory management
>> is separated out from the qemu, and let the other toolkits
>> such as libvirt, kata-containers (https://github.com/kata-containers)
>> runv(https://github.com/hyperhq/runv/) or some multiple cooperative
>> qemu commands directly access to it, manage it, provide features on it.
>>
>> 2) Status in real world
>>
>> The hyperhq(http://hyper.sh  http://hypercontainer.io/)
>> introduced the feature vm-template(vm-fast-live-clone)
>> to the hyper container for several years, it works perfect.
>> (see https://github.com/hyperhq/runv/pull/297).
>>
>> The feature vm-template makes the containers(VMs) can
>> be started in 130ms and save 80M memory for every
>> container(VM). So that the hyper containers are fast
>> and high-density as normal containers.
>>
>> kata-containers project (https://github.com/kata-containers)
>> which was launched by hyper, intel and friends and which descended
>> from runv (and clear-container) should have this feature enabled.
>> Unfortunately, due to the code confliction between runv,
>> this feature was temporary disabled and it is being brought
>> back by hyper and intel team.
>>
>> 3) How to use and bring up advanced features.
>>
>> In current qemu command line, shared memory has
>> to be configured via memory-object.
>>
>> a) feature: qemu-local-migration, qemu-live-update
>> Set the mem-path on the tmpfs and set share=on for it when
>> start the vm. example:
>> -object \
>> memory-backend-file,id=mem,size=128M,mem-path=/dev/shm/memory,share=on \
>> -numa node,nodeid=0,cpus=0-7,memdev=mem
>>
>> when you want to migrate the vm locally (after fixed a security bug
>> of the qemu-binary, or other reason), you can start a new qemu with
>> the same command line and -incoming, then you can migrate the
>> vm from the old qemu to the new qemu with the migration capability
>> 'bypass-shared-memory' set. The migration will migrate the device-state
>> *ONLY*, the memory is the origin memory backed by tmpfs file.
>>
>> b) feature: extremely-fast-save-restore
>> the same above, but the mem-path is on the persistent file system.
>>
>> c)  feature: vm-template, vm-fast-live-clone
>> the template vm is started as 1), and paused when the guest reaches
>> the template point(example: the guest app is ready), then the template
>> vm is saved. (the qemu process of the template can be killed now, because
>> we need only the memory and the device state files (in tmpfs)).
>>
>> Then we can launch one or multiple VMs base on the template vm states,
>> the new VMs are started without the “share=on”, all the new VMs share
>> the initial memory from the memory file, they save a lot of memory.
>> all the new VMs start from the template point, the guest app can go to
>> work quickly.
>
> How do you handle the storage in this case, or giving each VM it's own
> MAC address?

The user or the upper layer tools can copy/clone the storage
(on xfs,btrfs,ceph...). The user or the upper layer tools can handle the
interface MAC itself while this patch just focus on memory.

hyper/runv clone the vm before the interfaces are inserted.
vm-template are often used along with hotplugging.

>
>> The new VM booted from template vm can’t become template again,
>> if you need this unusual chained-template feature, you can write
>> a cloneable-tmpfs kernel module for it.
>>
>> The libvirt toolkit can’t manage vm-template currently, in the
>> hyperhq/runv, we use qemu wrapper script to do it. I hope someone add
>> “libvrit managed template” feature to libvirt.
>
>> d) feature: yet-another-post-copy-migration
>> It is a possible feature, no toolkit can do it well now.
>> Using nbd server/client on the memory file is reluctantly Ok but
>> inconvenient. A special feature for tmpfs might be needed to
>> fully complete this feature.
>> No one need yet another post copy migration method,
>> but it is possible when some crazy man need it.
>
> As the crazy person who did the existing postcopy; one is enough!
>

Very true. This part of comments just shows how much
potentials there are for such a simple migration capability.


> Some minor fix requests below, but this looks nice and simple.
>

Will do soon. Thank for your review.

> Shared memory is interesting because tehre are lots of different uses;
> e.g. your uses, but also vhost-user which is sharing for a completely
> different reason.
>
>> 

Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Michael S. Tsirkin
On Thu, Apr 12, 2018 at 09:39:43AM +0800, Tiwei Bie wrote:
> On Thu, Apr 12, 2018 at 04:29:29AM +0300, Michael S. Tsirkin wrote:
> > On Thu, Apr 12, 2018 at 09:10:59AM +0800, Tiwei Bie wrote:
> > > On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:
> > > > On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:
> > > > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > > feature for vhost-user. By default, vhost-user backend needs
> > > > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > > > With this protocol feature negotiated, QEMU will provide all
> > > > > the IOTLBs to vhost-user backend without waiting for the
> > > > > queries from backend. This is helpful when using a hardware
> > > > > accelerator which is not able to handle unknown IOVAs at the
> > > > > vhost-user backend.
> > > > > 
> > > > > Signed-off-by: Tiwei Bie 
> > > > 
> > > > This is potentially a large amount of data to be sent
> > > > on a socket.
> > > 
> > > If we take the hardware accelerator out of this picture, we
> > > will find that it's actually a question of "pre-loading" vs
> > > "lazy-loading". I think neither of them is perfect.
> > > 
> > > For "pre-loading", as you said, we may have a tough starting.
> > > But for "lazy-loading", we can't have a predictable performance.
> > > A sudden, unexpected performance drop may happen at any time,
> > > because we may meet an unknown IOVA at any time in this case.
> > 
> > That's how hardware behaves too though. So we can expect guests
> > to try to optimize locality.
> 
> The difference is that, the software implementation needs to
> query the mappings via socket. And it's much slower..

If you are proposing this new feature as an optimization,
then I'd like to see numbers showing the performance gains.

It's definitely possible to optimize things out.  Pre-loading isn't
where I would start optimizing though.  For example, DPDK could have its
own VTD emulation, then it could access guest memory directly.


> > 
> > > Once we meet an unknown IOVA, the backend's data path will need
> > > to stop and query the mapping of the IOVA via the socket and
> > > wait for the reply. And the latency is not negligible (sometimes
> > > it's even unacceptable), especially in high performance network
> > > case. So maybe it's better to make both of them available to
> > > the vhost backend.
> > > 
> > > > 
> > > > I had an impression that a hardware accelerator was using
> > > > VFIO anyway. Given this, can't we have QEMU program
> > > > the shadow IOMMU tables into VFIO directly?
> > > 
> > > I think it's a good idea! Currently, my concern about it is
> > > that, the hardware device may not use IOMMU and it may have
> > > its builtin address translation unit. And it will be a pain
> > > for device vendors to teach VFIO to be able to work with the
> > > builtin address translation unit.
> > 
> > I think such drivers would have to interate with VFIO somehow.
> > Otherwise, what is the plan for assigning such devices then?
> 
> Such devices are just for vhost data path acceleration.

That's not true I think.  E.g. RDMA devices have an on-card MMU.

> They have many available queue pairs, the switch logic
> will be done among those queue pairs. And different queue
> pairs will serve different VMs directly.
> 
> Best regards,
> Tiwei Bie

The way I would do it is attach different PASID values to
different queues. This way you can use the standard IOMMU
to enforce protection.



> > 
> > 
> > > Best regards,
> > > Tiwei Bie
> > > 
> > > > 
> > > > 
> > > > > ---
> > > > > The idea of this patch is to let QEMU push all the IOTLBs
> > > > > to vhost-user backend without waiting for the queries from
> > > > > the backend. Because hardware accelerator at the vhost-user
> > > > > backend may not be able to handle unknown IOVAs.
> > > > > 
> > > > > This is just a RFC for now. It seems that, it doesn't work
> > > > > as expected when guest is using kernel driver (To handle
> > > > > this case, it seems that some RAM regions' events also need
> > > > > to be listened). Any comments would be appreciated! Thanks!
> > > > > 
> > > > >  docs/interop/vhost-user.txt   |  9 
> > > > >  hw/virtio/vhost-backend.c |  7 ++
> > > > >  hw/virtio/vhost-user.c|  8 +++
> > > > >  hw/virtio/vhost.c | 47 
> > > > > ---
> > > > >  include/hw/virtio/vhost-backend.h |  3 +++
> > > > >  5 files changed, 71 insertions(+), 3 deletions(-)
> > > > > 
> > > > > diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> > > > > index 534caab18a..73e07f9dad 100644
> > > > > --- a/docs/interop/vhost-user.txt
> > > > > +++ b/docs/interop/vhost-user.txt
> > > > > @@ -380,6 +380,7 @@ Protocol features
> > > > >  #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
> > > > >  #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
> > > > >  #define 

Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Tiwei Bie
On Wed, Apr 11, 2018 at 08:37:17PM +0300, Michael S. Tsirkin wrote:
> On Wed, Apr 11, 2018 at 04:38:53PM +0800, Tiwei Bie wrote:
> > On Wed, Apr 11, 2018 at 04:01:19PM +0800, Jason Wang wrote:
> > > On 2018年04月11日 15:20, Tiwei Bie wrote:
> > > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > feature for vhost-user. By default, vhost-user backend needs
> > > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > > With this protocol feature negotiated, QEMU will provide all
> > > > the IOTLBs to vhost-user backend without waiting for the
> > > > queries from backend. This is helpful when using a hardware
> > > > accelerator which is not able to handle unknown IOVAs at the
> > > > vhost-user backend.
> > > > 
> > > > Signed-off-by: Tiwei Bie
> > > > ---
> > > > The idea of this patch is to let QEMU push all the IOTLBs
> > > > to vhost-user backend without waiting for the queries from
> > > > the backend. Because hardware accelerator at the vhost-user
> > > > backend may not be able to handle unknown IOVAs.
> > > > 
> > > > This is just a RFC for now. It seems that, it doesn't work
> > > > as expected when guest is using kernel driver (To handle
> > > > this case, it seems that some RAM regions' events also need
> > > > to be listened). Any comments would be appreciated! Thanks!
> > > 
> > > Interesting, a quick question is why this is needed? Can we just use exist
> > > IOTLB update message?
> > 
> > Yeah, we are still using the existing IOTLB update messages
> > to send the IOTLB messages to backend. The only difference
> > is that, QEMU won't wait for the queries before sending the
> > IOTLB update messages.
> 
> So I have a concern with that, in that without any flow
> control the socket buffer used by vhost-user might become
> full.

Each IOTLB update message needs a reply. So I think it
won't happen.

Best regards,
Tiwei Bie

> 
> We don't currently expect that.
> 
> 
> Again, my understanding is that the biggest benefit from use of a
> hardware accelerator is when notifications can be passed-through to the
> guest.
> 
> And since that involves VFIO, and since VFIO already needs to support
> all kinds of IOMMUs, one wonders whether one can just pass that directly
> to the VFIO instead of shipping it to vhost-user.
> 
> 
> > > 
> > > It looks to me at least kernel does not need this.
> > 
> > Something similar in kernel vhost is that, for kernel vhost,
> > QEMU needs to push the IOTLBs of some ring addrs to kernel
> > vhost backend without waiting for the queries.
> > 
> > Best regards,
> > Tiwei Bie
> 
> That's really to work around a bug in kernel, we keep this around since
> we want to support old kernels.
> 
> > > 
> > > Thanks



Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Tiwei Bie
On Thu, Apr 12, 2018 at 04:29:29AM +0300, Michael S. Tsirkin wrote:
> On Thu, Apr 12, 2018 at 09:10:59AM +0800, Tiwei Bie wrote:
> > On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:
> > > On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:
> > > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > feature for vhost-user. By default, vhost-user backend needs
> > > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > > With this protocol feature negotiated, QEMU will provide all
> > > > the IOTLBs to vhost-user backend without waiting for the
> > > > queries from backend. This is helpful when using a hardware
> > > > accelerator which is not able to handle unknown IOVAs at the
> > > > vhost-user backend.
> > > > 
> > > > Signed-off-by: Tiwei Bie 
> > > 
> > > This is potentially a large amount of data to be sent
> > > on a socket.
> > 
> > If we take the hardware accelerator out of this picture, we
> > will find that it's actually a question of "pre-loading" vs
> > "lazy-loading". I think neither of them is perfect.
> > 
> > For "pre-loading", as you said, we may have a tough starting.
> > But for "lazy-loading", we can't have a predictable performance.
> > A sudden, unexpected performance drop may happen at any time,
> > because we may meet an unknown IOVA at any time in this case.
> 
> That's how hardware behaves too though. So we can expect guests
> to try to optimize locality.

The difference is that, the software implementation needs to
query the mappings via socket. And it's much slower..

> 
> > Once we meet an unknown IOVA, the backend's data path will need
> > to stop and query the mapping of the IOVA via the socket and
> > wait for the reply. And the latency is not negligible (sometimes
> > it's even unacceptable), especially in high performance network
> > case. So maybe it's better to make both of them available to
> > the vhost backend.
> > 
> > > 
> > > I had an impression that a hardware accelerator was using
> > > VFIO anyway. Given this, can't we have QEMU program
> > > the shadow IOMMU tables into VFIO directly?
> > 
> > I think it's a good idea! Currently, my concern about it is
> > that, the hardware device may not use IOMMU and it may have
> > its builtin address translation unit. And it will be a pain
> > for device vendors to teach VFIO to be able to work with the
> > builtin address translation unit.
> 
> I think such drivers would have to interate with VFIO somehow.
> Otherwise, what is the plan for assigning such devices then?

Such devices are just for vhost data path acceleration.
They have many available queue pairs, the switch logic
will be done among those queue pairs. And different queue
pairs will serve different VMs directly.

Best regards,
Tiwei Bie

> 
> 
> > Best regards,
> > Tiwei Bie
> > 
> > > 
> > > 
> > > > ---
> > > > The idea of this patch is to let QEMU push all the IOTLBs
> > > > to vhost-user backend without waiting for the queries from
> > > > the backend. Because hardware accelerator at the vhost-user
> > > > backend may not be able to handle unknown IOVAs.
> > > > 
> > > > This is just a RFC for now. It seems that, it doesn't work
> > > > as expected when guest is using kernel driver (To handle
> > > > this case, it seems that some RAM regions' events also need
> > > > to be listened). Any comments would be appreciated! Thanks!
> > > > 
> > > >  docs/interop/vhost-user.txt   |  9 
> > > >  hw/virtio/vhost-backend.c |  7 ++
> > > >  hw/virtio/vhost-user.c|  8 +++
> > > >  hw/virtio/vhost.c | 47 
> > > > ---
> > > >  include/hw/virtio/vhost-backend.h |  3 +++
> > > >  5 files changed, 71 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> > > > index 534caab18a..73e07f9dad 100644
> > > > --- a/docs/interop/vhost-user.txt
> > > > +++ b/docs/interop/vhost-user.txt
> > > > @@ -380,6 +380,7 @@ Protocol features
> > > >  #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
> > > >  #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
> > > >  #define VHOST_USER_PROTOCOL_F_CONFIG 9
> > > > +#define VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB 10
> > > >  
> > > >  Master message types
> > > >  
> > > > @@ -797,3 +798,11 @@ resilient for selective requests.
> > > >  For the message types that already solicit a reply from the client, the
> > > >  presence of VHOST_USER_PROTOCOL_F_REPLY_ACK or need_reply bit being 
> > > > set brings
> > > >  no behavioural change. (See the 'Communication' section for details.)
> > > > +
> > > > +VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > > +
> > > > +By default, vhost-user backend needs to query the IOTLBs from QEMU 
> > > > after
> > > > +meeting unknown IOVAs. With this protocol feature negotiated, QEMU will
> > > > +provide all the IOTLBs to vhost backend 

Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Michael S. Tsirkin
On Thu, Apr 12, 2018 at 09:10:59AM +0800, Tiwei Bie wrote:
> On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:
> > On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:
> > > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > feature for vhost-user. By default, vhost-user backend needs
> > > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > > With this protocol feature negotiated, QEMU will provide all
> > > the IOTLBs to vhost-user backend without waiting for the
> > > queries from backend. This is helpful when using a hardware
> > > accelerator which is not able to handle unknown IOVAs at the
> > > vhost-user backend.
> > > 
> > > Signed-off-by: Tiwei Bie 
> > 
> > This is potentially a large amount of data to be sent
> > on a socket.
> 
> If we take the hardware accelerator out of this picture, we
> will find that it's actually a question of "pre-loading" vs
> "lazy-loading". I think neither of them is perfect.
> 
> For "pre-loading", as you said, we may have a tough starting.
> But for "lazy-loading", we can't have a predictable performance.
> A sudden, unexpected performance drop may happen at any time,
> because we may meet an unknown IOVA at any time in this case.

That's how hardware behaves too though. So we can expect guests
to try to optimize locality.

> Once we meet an unknown IOVA, the backend's data path will need
> to stop and query the mapping of the IOVA via the socket and
> wait for the reply. And the latency is not negligible (sometimes
> it's even unacceptable), especially in high performance network
> case. So maybe it's better to make both of them available to
> the vhost backend.
> 
> > 
> > I had an impression that a hardware accelerator was using
> > VFIO anyway. Given this, can't we have QEMU program
> > the shadow IOMMU tables into VFIO directly?
> 
> I think it's a good idea! Currently, my concern about it is
> that, the hardware device may not use IOMMU and it may have
> its builtin address translation unit. And it will be a pain
> for device vendors to teach VFIO to be able to work with the
> builtin address translation unit.

I think such drivers would have to interate with VFIO somehow.
Otherwise, what is the plan for assigning such devices then?


> Best regards,
> Tiwei Bie
> 
> > 
> > 
> > > ---
> > > The idea of this patch is to let QEMU push all the IOTLBs
> > > to vhost-user backend without waiting for the queries from
> > > the backend. Because hardware accelerator at the vhost-user
> > > backend may not be able to handle unknown IOVAs.
> > > 
> > > This is just a RFC for now. It seems that, it doesn't work
> > > as expected when guest is using kernel driver (To handle
> > > this case, it seems that some RAM regions' events also need
> > > to be listened). Any comments would be appreciated! Thanks!
> > > 
> > >  docs/interop/vhost-user.txt   |  9 
> > >  hw/virtio/vhost-backend.c |  7 ++
> > >  hw/virtio/vhost-user.c|  8 +++
> > >  hw/virtio/vhost.c | 47 
> > > ---
> > >  include/hw/virtio/vhost-backend.h |  3 +++
> > >  5 files changed, 71 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> > > index 534caab18a..73e07f9dad 100644
> > > --- a/docs/interop/vhost-user.txt
> > > +++ b/docs/interop/vhost-user.txt
> > > @@ -380,6 +380,7 @@ Protocol features
> > >  #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
> > >  #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
> > >  #define VHOST_USER_PROTOCOL_F_CONFIG 9
> > > +#define VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB 10
> > >  
> > >  Master message types
> > >  
> > > @@ -797,3 +798,11 @@ resilient for selective requests.
> > >  For the message types that already solicit a reply from the client, the
> > >  presence of VHOST_USER_PROTOCOL_F_REPLY_ACK or need_reply bit being set 
> > > brings
> > >  no behavioural change. (See the 'Communication' section for details.)
> > > +
> > > +VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > > +
> > > +By default, vhost-user backend needs to query the IOTLBs from QEMU after
> > > +meeting unknown IOVAs. With this protocol feature negotiated, QEMU will
> > > +provide all the IOTLBs to vhost backend without waiting for the queries
> > > +from backend. This is helpful when using a hardware accelerator which is
> > > +not able to handle unknown IOVAs at the vhost-user backend.
> > > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > > index 7f09efab8b..d72994e9a5 100644
> > > --- a/hw/virtio/vhost-backend.c
> > > +++ b/hw/virtio/vhost-backend.c
> > > @@ -233,6 +233,11 @@ static void vhost_kernel_set_iotlb_callback(struct 
> > > vhost_dev *dev,
> > >  qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL);
> > >  }
> > >  
> > > +static bool vhost_kernel_need_all_device_iotlb(struct 

Re: [Qemu-devel] [RFC] vhost-user: introduce F_NEED_ALL_IOTLB protocol feature

2018-04-11 Thread Tiwei Bie
On Wed, Apr 11, 2018 at 04:22:21PM +0300, Michael S. Tsirkin wrote:
> On Wed, Apr 11, 2018 at 03:20:27PM +0800, Tiwei Bie wrote:
> > This patch introduces VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > feature for vhost-user. By default, vhost-user backend needs
> > to query the IOTLBs from QEMU after meeting unknown IOVAs.
> > With this protocol feature negotiated, QEMU will provide all
> > the IOTLBs to vhost-user backend without waiting for the
> > queries from backend. This is helpful when using a hardware
> > accelerator which is not able to handle unknown IOVAs at the
> > vhost-user backend.
> > 
> > Signed-off-by: Tiwei Bie 
> 
> This is potentially a large amount of data to be sent
> on a socket.

If we take the hardware accelerator out of this picture, we
will find that it's actually a question of "pre-loading" vs
"lazy-loading". I think neither of them is perfect.

For "pre-loading", as you said, we may have a tough starting.
But for "lazy-loading", we can't have a predictable performance.
A sudden, unexpected performance drop may happen at any time,
because we may meet an unknown IOVA at any time in this case.
Once we meet an unknown IOVA, the backend's data path will need
to stop and query the mapping of the IOVA via the socket and
wait for the reply. And the latency is not negligible (sometimes
it's even unacceptable), especially in high performance network
case. So maybe it's better to make both of them available to
the vhost backend.

> 
> I had an impression that a hardware accelerator was using
> VFIO anyway. Given this, can't we have QEMU program
> the shadow IOMMU tables into VFIO directly?

I think it's a good idea! Currently, my concern about it is
that, the hardware device may not use IOMMU and it may have
its builtin address translation unit. And it will be a pain
for device vendors to teach VFIO to be able to work with the
builtin address translation unit.

Best regards,
Tiwei Bie

> 
> 
> > ---
> > The idea of this patch is to let QEMU push all the IOTLBs
> > to vhost-user backend without waiting for the queries from
> > the backend. Because hardware accelerator at the vhost-user
> > backend may not be able to handle unknown IOVAs.
> > 
> > This is just a RFC for now. It seems that, it doesn't work
> > as expected when guest is using kernel driver (To handle
> > this case, it seems that some RAM regions' events also need
> > to be listened). Any comments would be appreciated! Thanks!
> > 
> >  docs/interop/vhost-user.txt   |  9 
> >  hw/virtio/vhost-backend.c |  7 ++
> >  hw/virtio/vhost-user.c|  8 +++
> >  hw/virtio/vhost.c | 47 
> > ---
> >  include/hw/virtio/vhost-backend.h |  3 +++
> >  5 files changed, 71 insertions(+), 3 deletions(-)
> > 
> > diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> > index 534caab18a..73e07f9dad 100644
> > --- a/docs/interop/vhost-user.txt
> > +++ b/docs/interop/vhost-user.txt
> > @@ -380,6 +380,7 @@ Protocol features
> >  #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
> >  #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
> >  #define VHOST_USER_PROTOCOL_F_CONFIG 9
> > +#define VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB 10
> >  
> >  Master message types
> >  
> > @@ -797,3 +798,11 @@ resilient for selective requests.
> >  For the message types that already solicit a reply from the client, the
> >  presence of VHOST_USER_PROTOCOL_F_REPLY_ACK or need_reply bit being set 
> > brings
> >  no behavioural change. (See the 'Communication' section for details.)
> > +
> > +VHOST_USER_PROTOCOL_F_NEED_ALL_IOTLB
> > +
> > +By default, vhost-user backend needs to query the IOTLBs from QEMU after
> > +meeting unknown IOVAs. With this protocol feature negotiated, QEMU will
> > +provide all the IOTLBs to vhost backend without waiting for the queries
> > +from backend. This is helpful when using a hardware accelerator which is
> > +not able to handle unknown IOVAs at the vhost-user backend.
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index 7f09efab8b..d72994e9a5 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -233,6 +233,11 @@ static void vhost_kernel_set_iotlb_callback(struct 
> > vhost_dev *dev,
> >  qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL);
> >  }
> >  
> > +static bool vhost_kernel_need_all_device_iotlb(struct vhost_dev *dev)
> > +{
> > +return false;
> > +}
> > +
> >  static const VhostOps kernel_ops = {
> >  .backend_type = VHOST_BACKEND_TYPE_KERNEL,
> >  .vhost_backend_init = vhost_kernel_init,
> > @@ -264,6 +269,8 @@ static const VhostOps kernel_ops = {
> >  #endif /* CONFIG_VHOST_VSOCK */
> >  .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback,
> >  .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg,
> > +

Re: [Qemu-devel] [PATCHv3 for-2.13 0/2] Helpers to obtain host page sizes for guest RAM

2018-04-11 Thread David Gibson
On Wed, Apr 11, 2018 at 03:54:20PM +0200, Paolo Bonzini wrote:
> On 11/04/2018 09:04, David Gibson wrote:
> > This series makes some small changes to make it easier to obtain the
> > host page size backing given portions of guest RAM.  We use this in a
> > couple of places currently, and I have one or two more to add in some
> > upcoming code.
> > 
> > Assuming there are no objections, what should be the procedure for
> > staging this?  Can I put it in my for-2.13 ppc tree, even though it's
> > technically generic code, or should it go through someone else's tree?
> 
> Yes, go ahead:
> 
> Acked-by: Paolo Bonzini 

I'm assuming that's an ack to putting it through my tree.

In which case, done, thanks.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 00/10] Avoid integer overflow in next_page_start

2018-04-11 Thread Richard Henderson
On 04/12/2018 01:29 AM, Emilio G. Cota wrote:
> To ease an eventual merge I'll be updating the patches' R-b tags as
> they come in this branch:
>   https://github.com/cota/qemu/tree/next_page_overflow-r-b
> 
> BTW to avoid conflicts we should merge this before the translator loop
> conversion series; I'll make that clear when I send a new version
> of that patch set.

Right-o.  Thanks.  We'll get these in right away once development starts again
so that you don't have to carry it long.


r~




Re: [Qemu-devel] Bad icount read when running qemu-system-ppc64 and mfspr atbu guest instruction

2018-04-11 Thread Richard Henderson
On 04/12/2018 04:18 AM, Darrell Leinwand wrote:
> Hi,
> 
> When I enable icount using an e5500 core I get an exit with “Bad icount
> read” when the guest software executes a load atbu command.

Yep, it looks like there are some bugs in the ppc front end wrt icount.

In target/ppc/translate_init.c, spr_read_tbu has the proper wrappers for
integrating with icount, but a few lines lower spr_read_atbl and spr_read_atbu
do not.

This will probably have to wait until the 2.13 cycle to fix...


r~


PS:

> 
> CONFIDENTIALITY NOTICE: The information contained in this e-mail and any 
> accompanying documents may contain information that is confidential, 
> proprietary to Performance Software, or otherwise protected from disclosure. 
> If you are not the intended recipient of this message, or if this message has 
> been addressed to you in error, please immediately alert the sender by reply 
> e-mail and then delete this message, including any attachments. Any 
> dissemination, distribution or other use of the contents of this message by 
> anyone other than the intended recipient is strictly prohibited.
> 

You really should strip this from your signature when posting to a public list.



Re: [Qemu-devel] [PATCH v8 19/23] SiFive RISC-V UART Device

2018-04-11 Thread Eric Blake
On 04/10/2018 03:04 AM, Antony Pavlov wrote:

 +++ b/include/hw/riscv/sifive_uart.h

 +
 +typedef struct SiFiveUARTState {
 +/*< private >*/
 +SysBusDevice parent_obj;
>>>
>>>
>>> You use SysBusDevive in this header file but there is no 'include 
>>> "hw/sysbus.h"' in the header file itself.

That is, this header is not standalone; a .c file can't use the header
unless it first includes hw/sysbus.h prior to sifive_uart.h.

>>>
>>> Please see my comment 
>>> https://github.com/riscv/riscv-qemu/pull/130#issuecomment-379640538
>>>
>>>
 +/*< public >*/
 +qemu_irq irq;
 +MemoryRegion mmio;
 +CharBackend chr;
>>>
>>> Just the same thing. CharBackend is defined in "chardev/char-fe.h" please 
>>> include it.

If you were to use a CharBackend*, you could get by with just the
typedef.  Since all .c files include osdeps.h, which in turn includes
typedefs.h, you wouldn't have to include anything if you only refer to
the type via a pointer.  But here, you are including a full object, so
the compiler has to know the size of the type, which means this header
DOES depend on "chardev/char-fe.h" being included first (either in this
.h to keep it standalone, or in all .c files prior to the point where
they include sifive_uart.h).

>>
>> Honestly, I rather prefer to *not* add more includes to header files
>> than we already have. We already have got lots of "touch this header and
>> you have to recompile almost the whole QEMU" conditions, so to avoid
>> that this situation gets worse, we should rather avoid including headers
>> from headers if it is not necessary. Thus if the current sources build
>> fine, no need to change anything here. Just my 0.02 €.
> 
> Adding ONLY NECESSARY header files inclusions to header file __can't produce__
> additional recompile efforts.
> Moreover this can decrease number of include directives in c-files.

My personal preference: if your header only refers to a type via a
pointer where the header is still standalone with just the appropriate
typedefs, then DON'T include another .h from your header.  But if your
header has a hard dependency on something not already included by
osdeps.h, where failing to include that other header first creates a
compile error, then including the .h in your header is appropriate, as
it is less work for all .c clients that use your header.

The art of reducing compile-time dependencies is figuring out which
structs must be included inline (requiring .h in headers), and where you
can use pointers, or opaque types that live in .c, or whatever other
solutions, so that the headers become lighter-weight.  But it is NOT
designed to break standalone use of a header (other than the one
exception that headers DON'T include osdeps.h because that had to
already be included first by all .c files).

> 
> I __rebased__ my RISC-V board in my out-of tree qemu branch 
> (https://github.com/miet-riscv-workgroup/riscv-qemu/tree/20180409.erizo). I 
> faced with problem: I have to track dependencies of
> header files from include/hw/riscv/ which I use.
> 
> So your "not-add-more-includes-to-header-files" approach has an disadvantage:
> if a header file required header list changes, each c-file that includes that 
> header file
> must be edited to update the #include statement list.

Indeed, that's why I argue that include statements in .h files that are
necessary for standalone compilation of that header is a good idea, and
not something to burden every .c file with.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] Bad icount read when running qemu-system-ppc64 and mfspr atbu guest instruction

2018-04-11 Thread Emilio G. Cota
On Wed, Apr 11, 2018 at 18:18:18 +, Darrell Leinwand wrote:
> Hi,
> 
> When I enable icount using an e5500 core I get an exit with “Bad icount read” 
> when the guest software executes a load atbu command.
> 
> It looks like in qemu/accel/tcg/cpu_exec.c:166 sets can_do_io false when 
> using icount.
> cpu->can_do_io = !use_icount;
> ret = tcg_qemu_tb_exec(env, tb_ptr);
> cpu->can_do_io = 1;
> 
> Since can_do_io is set to false there and the cpu_get_icount_raw() function 
> checks that flag qemu/cpus.c:260:
> if (!cpu->can_do_io) {
> fprintf(stderr, "Bad icount read\n");
> exit(1);
> }
> 
> The load_atbu function will always fail. I commented out the !use_icount and 
> set it to a 1 in cpu_exec.c and it continues and seems to run normally, but I 
> am not sure what side effects while running icount that could have since it 
> may allow actual IO to occur while translation is happening.

Can you reproduce with the current master, i.e. 38e83a71d02 ?
A few patches have very recently been merged that might fix
your problem.

If master is still broken and it used to work in the past for you,
it would be useful if you could bisect this to point out the commit
that broke this.

Thanks,

Emilio



Re: [Qemu-devel] [PATCH v4 11/13] block/mirror: Add active mirroring

2018-04-11 Thread Eric Blake
On 04/11/2018 01:54 PM, Max Reitz wrote:
> This patch implements active synchronous mirroring.  In active mode, the
> passive mechanism will still be in place and is used to copy all
> initially dirty clusters off the source disk; but every write request
> will write data both to the source and the target disk, so the source
> cannot be dirtied faster than data is mirrored to the target.  Also,
> once the block job has converged (BLOCK_JOB_READY sent), source and
> target are guaranteed to stay in sync (unless an error occurs).
> 
> Active mode is completely optional and currently disabled at runtime.  A
> later patch will add a way for users to enable it.
> 
> Signed-off-by: Max Reitz 
> Reviewed-by: Fam Zheng 
> ---
>  qapi/block-core.json |  18 
>  block/mirror.c   | 252 
> ++-
>  2 files changed, 265 insertions(+), 5 deletions(-)
> 
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index c50517bff3..8210d601f4 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -1049,6 +1049,24 @@
>  { 'enum': 'MirrorSyncMode',
>'data': ['top', 'full', 'none', 'incremental'] }
>  
> +##
> +# @MirrorCopyMode:
> +#
> +# An enumeration whose values tell the mirror block job when to
> +# trigger writes to the target.
> +#
> +# @background: copy data in background only.
> +#
> +# @write-blocking: when data is written to the source, write it
> +#  (synchronously) to the target as well.  In
> +#  addition, data is copied in background just like in
> +#  @background mode.
> +#
> +# Since: 2.12

Missed an instance of 2.13

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 01/10] target/riscv: avoid integer overflow in next_page PC check

2018-04-11 Thread Michael Clark
On Wed, Apr 11, 2018 at 4:19 AM, Emilio G. Cota  wrote:

> If the PC is in the last page of the address space, next_page_start
> overflows to 0. Fix it.
>
> Reported-by: Richard Henderson 
> Suggested-by: Richard Henderson 
> Cc: Michael Clark 
> Cc: Palmer Dabbelt 
> Cc: Sagar Karandikar 
> Cc: Bastian Koppelmann 
> Signed-off-by: Emilio G. Cota 
>

Reviewed-by: Michael Clark 


> ---
>  target/riscv/translate.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 808eab7..d2d2e5e 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -1849,11 +1849,11 @@ void gen_intermediate_code(CPUState *cs,
> TranslationBlock *tb)
>  CPURISCVState *env = cs->env_ptr;
>  DisasContext ctx;
>  target_ulong pc_start;
> -target_ulong next_page_start;
> +target_ulong page_start;
>  int num_insns;
>  int max_insns;
>  pc_start = tb->pc;
> -next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
> +page_start = pc_start & TARGET_PAGE_MASK;
>  ctx.pc = pc_start;
>
>  /* once we have GDB, the rest of the translate.c implementation
> should be
> @@ -1903,7 +1903,7 @@ void gen_intermediate_code(CPUState *cs,
> TranslationBlock *tb)
>  if (cs->singlestep_enabled) {
>  break;
>  }
> -if (ctx.pc >= next_page_start) {
> +if (ctx.pc - page_start >= TARGET_PAGE_SIZE) {
>  break;
>  }
>  if (tcg_op_buf_full()) {
> --
> 2.7.4
>
>


Re: [Qemu-devel] [PATCH v3 01/15] tests: add fp-test, a floating point test suite

2018-04-11 Thread Emilio G. Cota
On Wed, Apr 11, 2018 at 02:20:49 +0100, Alex Bennée wrote:
> Emilio G. Cota  writes:
> So with the attached patch and my proposed cross build we can now get:
> 
> 02:15:54 [alex@zen:~/l/q/qemu.git] softfloat-fixes-for-2.12-v1 ± find . 
> -iname "fp-test" | xargs file
> ./ppc64-linux-user/tests/fp-test:  ELF 64-bit LSB executable, 64-bit 
> PowerPC or cisco 7500, version 1 (GNU/Linux), statically linked, for 
> GNU/Linux 3.2.0, not stripped
(snip)
> But it did mean having to hack about a little, mainly to get rid of
> glib.

That will let us build fp-test using a cross-compiler. My initial
thinking was that since we'd end up testing on a real host
(with "-t host" mode), cross-compiling wouldn't be necessary since we
could just compile natively on said host.

But since we seem to be moving towards supporting cross-compilers,
it takes little effort to cross-compile fp-test as well. The main
hurdle is to remove the glib dependence as you pointed out. I just
wrote a few patches to do this:

$ git log --oneline -5 --reverse
48e802b osdep: disable glib-compat.h include with QEMU_NO_GLIB
d3c78c7 softfloat: do not include glib headers
744a9c4 tests/tcg/Makefile: define _GNU_SOURCE
661c0e2 tests/fp: fixup
e057d45 tests/tcg/Makefile: fp-test build fixup

The main difference with your attached patch is that we remove ifdef's
from fp-test.c while keeping the osdep.h include.

You can fetch the patches from
  https://github.com/cota/qemu/tree/softfloat-fixes-for-2.12-v1

[BTW the name of the branch is just to keep your original branch name;
I'm in now way intending for this to be part of 2.12 :>]

Thanks,

Emilio



Re: [Qemu-devel] [PATCH v8 19/23] SiFive RISC-V UART Device

2018-04-11 Thread Michael Clark
On Tue, Apr 10, 2018 at 8:04 PM, Antony Pavlov 
wrote:

> On Tue, 10 Apr 2018 08:17:32 +0200
> Thomas Huth  wrote:
>
> > On 10.04.2018 05:21, Antony Pavlov wrote:
> > > On Sat,  3 Mar 2018 02:51:47 +1300
> > > Michael Clark  wrote:
> > >
> > >> QEMU model of the UART on the SiFive E300 and U500 series SOCs.
> > >> BBL supports the SiFive UART for early console access via the SBI
> > >> (Supervisor Binary Interface) and the linux kernel SBI console.
> > >>
> > >> The SiFive UART implements the pre qom legacy interface consistent
> > >> with the 16550a UART in 'hw/char/serial.c'.
> > >>
> > >> Acked-by: Richard Henderson 
> > >> Signed-off-by: Stefan O'Rear 
> > >> Signed-off-by: Palmer Dabbelt 
> > >> Signed-off-by: Michael Clark 
> > >> ---
> > >>  hw/riscv/sifive_uart.c | 176 ++
> +++
> > >>  include/hw/riscv/sifive_uart.h |  71 +
> > >>  2 files changed, 247 insertions(+)
> > >>  create mode 100644 hw/riscv/sifive_uart.c
> > >>  create mode 100644 include/hw/riscv/sifive_uart.h
> > >>
> > >> diff --git a/hw/riscv/sifive_uart.c b/hw/riscv/sifive_uart.c
> > >> new file mode 100644
> > >> index 000..b0c3798
> > >> --- /dev/null
> > >> +++ b/hw/riscv/sifive_uart.c
> > >> @@ -0,0 +1,176 @@
> > >> +/*
> > >> + * QEMU model of the UART on the SiFive E300 and U500 series SOCs.
> > >> + *
> > >> + * Copyright (c) 2016 Stefan O'Rear
> > >> + *
> > >> + * This program is free software; you can redistribute it and/or
> modify it
> > >> + * under the terms and conditions of the GNU General Public License,
> > >> + * version 2 or later, as published by the Free Software Foundation.
> > >> + *
> > >> + * This program is distributed in the hope it will be useful, but
> WITHOUT
> > >> + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or
> > >> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> License for
> > >> + * more details.
> > >> + *
> > >> + * You should have received a copy of the GNU General Public License
> along with
> > >> + * this program.  If not, see .
> > >> + */
> > >> +
> > >> +#include "qemu/osdep.h"
> > >> +#include "qapi/error.h"
> > >> +#include "hw/sysbus.h"
> > >> +#include "chardev/char.h"
> > >> +#include "chardev/char-fe.h"
> > >> +#include "target/riscv/cpu.h"
> > >> +#include "hw/riscv/sifive_uart.h"
> > >>
> > >> +/*
> > >> + * Not yet implemented:
> > >> + *
> > >> + * Transmit FIFO using "qemu/fifo8.h"
> > >> + * SIFIVE_UART_IE_TXWM interrupts
> > >> + * SIFIVE_UART_IE_RXWM interrupts must honor fifo watermark
> > >> + * Rx FIFO watermark interrupt trigger threshold
> > >> + * Tx FIFO watermark interrupt trigger threshold.
> > >> + */
> > >> +
> > >> +static void update_irq(SiFiveUARTState *s)
> > >> +{
> > >> +int cond = 0;
> > >> +if ((s->ie & SIFIVE_UART_IE_RXWM) && s->rx_fifo_len) {
> > >> +cond = 1;
> > >> +}
> > >> +if (cond) {
> > >> +qemu_irq_raise(s->irq);
> > >> +} else {
> > >> +qemu_irq_lower(s->irq);
> > >> +}
> > >> +}
> > >> +
> > >> +static uint64_t
> > >> +uart_read(void *opaque, hwaddr addr, unsigned int size)
> > >> +{
> > >> +SiFiveUARTState *s = opaque;
> > >> +unsigned char r;
> > >> +switch (addr) {
> > >> +case SIFIVE_UART_RXFIFO:
> > >> +if (s->rx_fifo_len) {
> > >> +r = s->rx_fifo[0];
> > >> +memmove(s->rx_fifo, s->rx_fifo + 1, s->rx_fifo_len - 1);
> > >> +s->rx_fifo_len--;
> > >> +qemu_chr_fe_accept_input(>chr);
> > >> +update_irq(s);
> > >> +return r;
> > >> +}
> > >> +return 0x8000;
> > >> +
> > >> +case SIFIVE_UART_TXFIFO:
> > >> +return 0; /* Should check tx fifo */
> > >> +case SIFIVE_UART_IE:
> > >> +return s->ie;
> > >> +case SIFIVE_UART_IP:
> > >> +return s->rx_fifo_len ? SIFIVE_UART_IP_RXWM : 0;
> > >> +case SIFIVE_UART_TXCTRL:
> > >> +return s->txctrl;
> > >> +case SIFIVE_UART_RXCTRL:
> > >> +return s->rxctrl;
> > >> +case SIFIVE_UART_DIV:
> > >> +return s->div;
> > >> +}
> > >> +
> > >> +hw_error("%s: bad read: addr=0x%x\n",
> > >> +__func__, (int)addr);
> > >> +return 0;
> > >> +}
> > >> +
> > >> +static void
> > >> +uart_write(void *opaque, hwaddr addr,
> > >> +   uint64_t val64, unsigned int size)
> > >> +{
> > >> +SiFiveUARTState *s = opaque;
> > >> +uint32_t value = val64;
> > >> +unsigned char ch = value;
> > >> +
> > >> +switch (addr) {
> > >> +case SIFIVE_UART_TXFIFO:
> > >> +qemu_chr_fe_write(>chr, , 1);
> > >> +return;
> > >> +case SIFIVE_UART_IE:
> > >> +s->ie = val64;
> > >> +update_irq(s);
> > >> +return;
> > >> +case SIFIVE_UART_TXCTRL:

[Qemu-devel] [ANNOUNCE] QEMU 2.12.0-rc3 is now available

2018-04-11 Thread Michael Roth
Hello,

On behalf of the QEMU Team, I'd like to announce the availability of the
fourth release candidate for the QEMU 2.12 release.  This release is meant
for testing purposes and should not be used in a production environment.

  http://download.qemu-project.org/qemu-2.12.0-rc3.tar.xz
  http://download.qemu-project.org/qemu-2.12.0-rc3.tar.xz.sig

A note from the maintainer:

  If no further release critical bugs are found, we'll do the
  final release next week on the 17th; otherwise we will likely
  roll an rc4 on the 17th and do final release on the 24th.

You can help improve the quality of the QEMU 2.12 release by testing this
release and reporting bugs on Launchpad:

  https://bugs.launchpad.net/qemu/

The release plan, as well a documented known issues for release
candidates, are available at:

  http://wiki.qemu.org/Planning/2.12

Please add entries to the ChangeLog for the 2.12 release below:

  http://wiki.qemu.org/ChangeLog/2.12

Changes since rc2:

38e83a71d0: Update version for v2.12.0-rc3 release (Peter Maydell)
6670b494fd: hw/char/cmsdk-apb-uart.c: Correctly clear INTSTATUS bits on writes 
(Peter Maydell)
afd46fcad2: icount: fix cpu_restore_state_from_tb for non-tb-exit cases (Pavel 
Dovgalyuk)
46a1ee4f39: linux-user: implement HWCAP bits on MIPS (James Cowgill)
947aeab311: linux-user: add microblaze/microblazeel magic numbers in 
qemu-binfmt-conf.sh (Laurent Vivier)
a31e7605dd: linux-user: fix microblaze get_sp_from_cpustate() (Laurent Vivier)
c1de5696d6: qemu-iotests: update 185 output (Stefan Hajnoczi)
2fe4bba19b: commit/stream: Reset delay_ns (Kevin Wolf)
ad53ea42fe: qemu-iotests: Remove _supported_fmt dmg (Kevin Wolf)
e819edd090: iotests: blacklist bochs and cloop for 205 and 208 (Vladimir 
Sementsov-Ogievskiy)
f48351d2f3: iotests.py: improve verify_image_format helper (Vladimir 
Sementsov-Ogievskiy)
07c13a7172: hw/block/pflash_cfi: fix off-by-one error (Philippe Mathieu-Daudé)
febc8c865f: iotests.py: support unsupported_fmts in main() (Vladimir 
Sementsov-Ogievskiy)
a18a73d747: Revert "migration: Don't activate block devices if using -S" (Dr. 
David Alan Gilbert)
951702f39c: monitor: bind dispatch bh to iohandler context (Peter Xu)
1554434927: iothread: workaround glib bug which hangs qmp-test (Peter Xu)
c3988519c4: iotests: fix wait_until_completed() (Peter Xu)
bd49e6027c: fpu: Fix rounding mode for floatN_to_uintM_round_to_zero (Richard 
Henderson)
9743cd5736: tcg: Introduce tcg_set_insn_start_param (Richard Henderson)
7f0f4208b3: linux-user/signal.c: Ensure AArch64 signal frame isn't too small 
(Peter Maydell)
c52e7132d7: cpus.c: ensure running CPU recalculates icount deadlines on timer 
expiry (Peter Maydell)
8aec759b45: target/arm: Report unsupported MPU region sizes more clearly (Peter 
Maydell)
f640a5914f: hw/arm/fsl-imx: Fix introspection problem with fsl-imx6 and 
fsl-imx7 (Thomas Huth)
8aabc5437b: hw/arm/allwinner-a10: Do not use nd_table in instance_init function 
(Thomas Huth)
f3d9fe8f95: hw/sd/bcm2835_sdhost: Don't raise spurious interrupts (Peter 
Maydell)
b318f3265c: hw/sd/bcm2835_sdhost: Add tracepoints (Peter Maydell)
c4869ca630: target-arm: Check undefined opcodes for SWP in A32 decoder (Onur 
Sahin)
8720daad47: hw/arm/integratorcp: Don't do things that could be fatal in the 
instance_init (Thomas Huth)
c39770cd63: hw/arm: Allow manually specified /psci node (Andrey Smirnov)
c6093a05d6: configure: don't warn SDL abi if disabled (Peter Xu)
5a464e6ce8: configure: don't warn GTK if disabled (Peter Xu)
fa2d039b2c: gtk: drop pointless code from gd_window_close (Gerd Hoffmann)
1e70de679d: ui: fix keymap detection under Xwayland (Daniel P. Berrangé)
5f52353091: e1000: Old machine types, turn new subsection off (Dr. David Alan 
Gilbert)
ff214d427e: e1000: Choose which set of props to migrate (Dr. David Alan Gilbert)
5935448478: e1000: Migrate props via a temporary structure (Dr. David Alan 
Gilbert)
46f2a9ec54: e1000: wire new subsection to property (Dr. David Alan Gilbert)
3c4053c52c: e1000: Dupe offload data on reading old stream (Dr. David Alan 
Gilbert)
4ae4bf5bb1: e1000: Convert v3 fields to subsection (Dr. David Alan Gilbert)
9ac225171c: linux-user: fix preadv/pwritev offsets (Max Filippov)
6b3913e085: roms/u-boot-sam460ex: Change to qemu git mirror and update (BALATON 
Zoltan)
f8815532dc: sam460ex: Fix timer frequency and clock multipliers (BALATON Zoltan)
99b336cdd9: tests/boot-serial: Test the sam460ex board (Thomas Huth)
127f03e442: spapr: Initialize reserved areas list in FDT in H_CAS handler 
(Alexey Kardashevskiy)
efb7db250a: target/ppc: Fix backwards migration of msr_mask (David Gibson)
ddd835f32a: hw/misc/macio: Fix crash when listing device properties of macio 
device (Thomas Huth)
e69ba2b489: target/ppc: Initialize lazy_tlb_flush correctly (David Gibson)
e0014d4b3a: Add missing bit for SSE instr in VEX decoding (Eugene Minibaev)
3bd2608db7: maint: Add .mailmap entries for patches claiming list authorship 
(Eric Blake)
84c868f6b8: dump: Fix build with newer 

Re: [Qemu-devel] [PATCH for 2.13 v2 00/19] linux-user: move arch specific parts from main.c to arch directories

2018-04-11 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180411185651.21351-1-laur...@vivier.eu
Subject: [Qemu-devel] [PATCH for 2.13 v2 00/19] linux-user: move arch specific 
parts from main.c to arch directories

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 t [tag update]patchew/20180329032149.44685-1-...@ozlabs.ru -> 
patchew/20180329032149.44685-1-...@ozlabs.ru
 * [new tag]   patchew/20180411185651.21351-1-laur...@vivier.eu -> 
patchew/20180411185651.21351-1-laur...@vivier.eu
Auto packing the repository in background for optimum performance.
See "git help gc" for manual housekeeping.
Switched to a new branch 'test'
ce56cf4ddf linux-user: move xtensa cpu loop to xtensa directory
fbea8496c0 linux-user: move hppa cpu loop to hppa directory
d6052fec98 linux-user: move riscv cpu loop to riscv directory
18c64ee772 linux-user: move tilegx cpu loop to tilegx directory
bb73857610 linux-user: move s390x cpu loop to s390x directory
dd5bdbd5a2 linux-user: move alpha cpu loop to alpha directory
6fd30276b4 linux-user: move m68k cpu loop to m68k directory
154b70b43b linux-user: move microblaze cpu loop to microblaze directory
3c976b0a5b linux-user: move cris cpu loop to cris directory
168d198956 linux-user: move sh4 cpu loop to sh4 directory
3727af28c4 linux-user: move openrisc cpu loop to openrisc directory
022b6bfbfe linux-user: move nios2 cpu loop to nios2 directory
a74c1bdc87 linux-user: move mips/mips64 cpu loop to mips directory
bdba9924f0 linux-user: move ppc/ppc64 cpu loop to ppc directory
f574990761 linux-user: move sparc/sparc64 cpu loop to sparc directory
ba06f492ed linux-user: move arm cpu loop to arm directory
e972942232 linux-user: move aarch64 cpu loop to aarch64 directory
3b0cea493a linux-user: move i386/x86_64 cpu loop to i386 directory
99606788a5 linux-user: create a dummy per arch cpu_loop.c

=== OUTPUT BEGIN ===
Checking PATCH 1/19: linux-user: create a dummy per arch cpu_loop.c...
Checking PATCH 2/19: linux-user: move i386/x86_64 cpu loop to i386 directory...
ERROR: space required before the open parenthesis '('
#92: FILE: linux-user/i386/cpu_loop.c:91:
+for(;;) {

ERROR: space required before the open parenthesis '('
#98: FILE: linux-user/i386/cpu_loop.c:97:
+switch(trapnr) {

ERROR: braces {} are necessary for all arms of this statement
#161: FILE: linux-user/i386/cpu_loop.c:160:
+if (!(env->error_code & 1))
[...]
+else
[...]

ERROR: that open brace { should be on the previous line
#233: FILE: linux-user/i386/cpu_loop.c:232:
+if (sig)
+  {

WARNING: line over 80 characters
#247: FILE: linux-user/i386/cpu_loop.c:246:
+EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - 
aborting\n",

ERROR: spaces required around that '|' (ctx:VxV)
#307: FILE: linux-user/i386/cpu_loop.c:306:
+PROT_READ|PROT_WRITE,
  ^

ERROR: spaces required around that '|' (ctx:VxV)
#308: FILE: linux-user/i386/cpu_loop.c:307:
+MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  ^

ERROR: spaces required around that '|' (ctx:VxV)
#336: FILE: linux-user/i386/cpu_loop.c:335:
+PROT_READ|PROT_WRITE,
  ^

ERROR: spaces required around that '|' (ctx:VxV)
#337: FILE: linux-user/i386/cpu_loop.c:336:
+MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  ^

total: 8 errors, 1 warnings, 720 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 3/19: linux-user: move aarch64 cpu loop to aarch64 directory...
WARNING: line over 80 characters
#141: FILE: linux-user/aarch64/cpu_loop.c:142:
+EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", 
trapnr);

total: 0 errors, 1 warnings, 289 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 4/19: linux-user: move arm cpu loop to arm directory...
ERROR: braces {} are necessary 

Re: [Qemu-devel] [RFC PATCH] migration: discard RAMBlocks of type ram_device

2018-04-11 Thread Dr. David Alan Gilbert
* Cédric Le Goater (c...@kaod.org) wrote:
> Here is some context for this strange change request.
> 
> On the POWER9 processor, the XIVE interrupt controller can control
> interrupt sources using MMIO to trigger events, to EOI or to turn off
> the sources. Priority management and interrupt acknowledgment is also
> controlled by MMIO in the presenter subengine.
> 
> These MMIO regions are exposed to guests in QEMU with a set of 'ram
> device' memory mappings, similarly to VFIO, and the VMAs are populated
> dynamically with the appropriate pages using a fault handler.
> 
> But, these regions are an issue for migration. We need to discard the
> associated RAMBlocks from the RAM state on the source VM and let the
> destination VM rebuild the memory mappings on the new host in the
> post_load() operation just before resuming the system.
> 
> This is the goal of the following proposal. Does it make sense ? It
> seems to be working enough to migrate a running guest but there might
> be a better, more subtle, approach.

If this is always true of RAM devices (which I suspect it is).

Interestingly, your patch comes less than 2 weeks after Lai Jiangshan's
 'add capability to bypass the shared memory'
   https://lists.nongnu.org/archive/html/qemu-devel/2018-03/msg07511.html

which is the only other case I think we've got of someone trying to
avoid transmitting a block.

We should try and merge the two sets to make them consistent; you've
covered some more cases (the other patch wasn't expected to work with
Postcopy anyway).
(At this rate then we can expect another 20 for the year)

We should probably have:
   1) A bool is_migratable_block(RAMBlock *)
   2) A RAMBLOCK_FOREACH_MIGRATABLE(block)  macro that is like
RAMBLOCK_FOREACH but does the call to is_migratable_block

then the changes should be mostly pretty tidy.

A sanity check is probably needed on load as well, to give a neat
error if for some reason the source transmits pages to you.

One other thing I notice is your code changes ram_bytes_total(),
where as the other patch avoids it;  I think your code is actually
more correct.

Is there *any* case in existing QEMUs where we migrate ram devices
succesfully, if so we've got to make it backwards compatible; but I
think you're saying there isn't.

Dave


> Thanks,
> 
> C.
> 
> Signed-off-by: Cédric Le Goater 
> ---
>  migration/ram.c | 42 --
>  1 file changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 0e90efa09236..6404ccd046d8 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -780,6 +780,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, 
> RAMBlock *rb,
>  unsigned long *bitmap = rb->bmap;
>  unsigned long next;
>  
> +if (memory_region_is_ram_device(rb->mr)) {
> +return size;
> +}
> +
>  if (rs->ram_bulk_stage && start > 0) {
>  next = start + 1;
>  } else {
> @@ -826,6 +830,9 @@ uint64_t ram_pagesize_summary(void)
>  uint64_t summary = 0;
>  
>  RAMBLOCK_FOREACH(block) {
> +if (memory_region_is_ram_device(block->mr)) {
> +continue;
> +}
>  summary |= block->page_size;
>  }
>  
> @@ -850,6 +857,9 @@ static void migration_bitmap_sync(RAMState *rs)
>  qemu_mutex_lock(>bitmap_mutex);
>  rcu_read_lock();
>  RAMBLOCK_FOREACH(block) {
> +if (memory_region_is_ram_device(block->mr)) {
> +continue;
> +}
>  migration_bitmap_sync_range(rs, block, 0, block->used_length);
>  }
>  rcu_read_unlock();
> @@ -1499,6 +1509,10 @@ static int ram_save_host_page(RAMState *rs, 
> PageSearchStatus *pss,
>  size_t pagesize_bits =
>  qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
>  
> +if (memory_region_is_ram_device(pss->block->mr)) {
> +return 0;
> +}
> +

Now we shouldn't actually end up here should we - so I suggest an
error_report and returning -EINVAL.

>  do {
>  tmppages = ram_save_target_page(rs, pss, last_stage);
>  if (tmppages < 0) {
> @@ -1588,6 +1602,9 @@ uint64_t ram_bytes_total(void)
>  
>  rcu_read_lock();
>  RAMBLOCK_FOREACH(block) {
> +if (memory_region_is_ram_device(block->mr)) {
> +continue;
> +}
>  total += block->used_length;
>  }
>  rcu_read_unlock();
> @@ -1643,6 +1660,9 @@ static void ram_save_cleanup(void *opaque)
>  memory_global_dirty_log_stop();
>  
>  QLIST_FOREACH_RCU(block, _list.blocks, next) {
> +if (memory_region_is_ram_device(block->mr)) {
> +continue;
> +}
>  g_free(block->bmap);
>  block->bmap = NULL;
>  g_free(block->unsentmap);
> @@ -1710,6 +1730,9 @@ void 
> ram_postcopy_migrated_memory_release(MigrationState *ms)
>  unsigned long range = block->used_length >> TARGET_PAGE_BITS;
>  unsigned long run_start = find_next_zero_bit(bitmap, range, 0);

[Qemu-devel] [PATCH for 2.13 v2 14/19] linux-user: move alpha cpu loop to alpha directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
alpha/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/alpha/cpu_loop.c | 199 ++
 linux-user/main.c   | 204 
 2 files changed, 199 insertions(+), 204 deletions(-)

diff --git a/linux-user/alpha/cpu_loop.c b/linux-user/alpha/cpu_loop.c
index b7700a5561..b87fcaea87 100644
--- a/linux-user/alpha/cpu_loop.c
+++ b/linux-user/alpha/cpu_loop.c
@@ -21,6 +21,205 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+void cpu_loop(CPUAlphaState *env)
+{
+CPUState *cs = CPU(alpha_env_get_cpu(env));
+int trapnr;
+target_siginfo_t info;
+abi_long sysret;
+
+while (1) {
+bool arch_interrupt = true;
+
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch (trapnr) {
+case EXCP_RESET:
+fprintf(stderr, "Reset requested. Exit\n");
+exit(EXIT_FAILURE);
+break;
+case EXCP_MCHK:
+fprintf(stderr, "Machine check exception. Exit\n");
+exit(EXIT_FAILURE);
+break;
+case EXCP_SMP_INTERRUPT:
+case EXCP_CLK_INTERRUPT:
+case EXCP_DEV_INTERRUPT:
+fprintf(stderr, "External interrupt. Exit\n");
+exit(EXIT_FAILURE);
+break;
+case EXCP_MMFAULT:
+info.si_signo = TARGET_SIGSEGV;
+info.si_errno = 0;
+info.si_code = (page_get_flags(env->trap_arg0) & PAGE_VALID
+? TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR);
+info._sifields._sigfault._addr = env->trap_arg0;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_UNALIGN:
+info.si_signo = TARGET_SIGBUS;
+info.si_errno = 0;
+info.si_code = TARGET_BUS_ADRALN;
+info._sifields._sigfault._addr = env->trap_arg0;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_OPCDEC:
+do_sigill:
+info.si_signo = TARGET_SIGILL;
+info.si_errno = 0;
+info.si_code = TARGET_ILL_ILLOPC;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_ARITH:
+info.si_signo = TARGET_SIGFPE;
+info.si_errno = 0;
+info.si_code = TARGET_FPE_FLTINV;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_FEN:
+/* No-op.  Linux simply re-enables the FPU.  */
+break;
+case EXCP_CALL_PAL:
+switch (env->error_code) {
+case 0x80:
+/* BPT */
+info.si_signo = TARGET_SIGTRAP;
+info.si_errno = 0;
+info.si_code = TARGET_TRAP_BRKPT;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case 0x81:
+/* BUGCHK */
+info.si_signo = TARGET_SIGTRAP;
+info.si_errno = 0;
+info.si_code = 0;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case 0x83:
+/* CALLSYS */
+trapnr = env->ir[IR_V0];
+sysret = do_syscall(env, trapnr,
+env->ir[IR_A0], env->ir[IR_A1],
+env->ir[IR_A2], env->ir[IR_A3],
+env->ir[IR_A4], env->ir[IR_A5],
+0, 0);
+if (sysret == -TARGET_ERESTARTSYS) {
+env->pc -= 4;
+break;
+}
+if (sysret == -TARGET_QEMU_ESIGRETURN) {
+break;
+}
+/* Syscall writes 0 to V0 to bypass error check, similar
+   to how this is handled internal to Linux kernel.
+   (Ab)use trapnr temporarily as boolean indicating error.  */
+trapnr = (env->ir[IR_V0] != 0 && sysret < 0);
+env->ir[IR_V0] = (trapnr ? -sysret : sysret);
+env->ir[IR_A3] = trapnr;
+break;
+case 0x86:
+/* IMB */
+/* ??? We can probably elide the code using page_unprotect
+   that is checking for self-modifying code.  Instead we
+   could simply call tb_flush here.  Until 

Re: [Qemu-devel] [PATCH qemu] RFC: memory/hmp: Print owners/parents in "info mtree"

2018-04-11 Thread Dr. David Alan Gilbert
* Alexey Kardashevskiy (a...@ozlabs.ru) wrote:
> This adds owners/parents (which are the same, just occasionally
> owner==NULL) printing for memory regions; a new '-o' flag
> enabled new output.
> 
> Signed-off-by: Alexey Kardashevskiy 

From the HMP side this looks fine to me, and if it's making it clearer
where mappings are coming from that's good.
(It's a bit long/wordy, but that's the nature of the information).



Reviewed-by: Dr. David Alan Gilbert 

> ---
> 
> Does this look anything useful?
> 
> There are cases ("msi", "msix-table", "msix-pba" and probably more) when
> it is not clear what owns an MR while they all have an owner (always? 
> mostly?).
> 
> 
> "info mtree" example:
> 
> address-space: memory
>   - (prio 0, i/o): system parent:{obj}
> -7fff (prio 0, ram): ppc_spapr.ram 
> parent:{obj}
> 2000-2000 (prio 0, i/o): alias 
> p...@8002000.io-alias @p...@8002000.io 
> - owner:{dev 
> path=/machine/unattached/device[3]}
> 20008000-2000 (prio 0, i/o): alias 
> pci@8002000.mmio32-alias @p...@8002000.mmio 
> 8000- owner:{dev 
> path=/machine/unattached/device[3]}
> 2100-21ff (prio 0, i/o): alias 
> pci@8002000.mmio64-alias @p...@8002000.mmio 
> 2100-21ff owner:{dev 
> path=/machine/unattached/device[3]}
> 
> address-space: I/O
>   - (prio 0, i/o): io parent:{obj}
> 
> address-space: cpu-memory-0
>   - (prio 0, i/o): system parent:{obj}
> -7fff (prio 0, ram): ppc_spapr.ram 
> parent:{obj}
> 2000-2000 (prio 0, i/o): alias 
> p...@8002000.io-alias @p...@8002000.io 
> - owner:{dev 
> path=/machine/unattached/device[3]}
> 20008000-2000 (prio 0, i/o): alias 
> pci@8002000.mmio32-alias @p...@8002000.mmio 
> 8000- owner:{dev 
> path=/machine/unattached/device[3]}
> 2100-21ff (prio 0, i/o): alias 
> pci@8002000.mmio64-alias @p...@8002000.mmio 
> 2100-21ff owner:{dev 
> path=/machine/unattached/device[3]}
> 
> address-space: pci@8002000
>   - (prio 0, i/o): 
> pci@8002000.iommu-root owner:{dev path=/machine/unattached/device[3]}
> - (prio 0, i/o): tce-root-8001 
> owner:{dev path=/machine/unattached/device[3]/tce-table-8001}
>   0800-08007fff (prio 0, i/o): tce-iommu-8001 
> owner:{dev path=/machine/unattached/device[3]/tce-table-8001}
> - (prio 0, i/o): tce-root-8000 
> owner:{dev path=/machine/unattached/device[3]/tce-table-8000}
>   -3fff (prio 0, i/o): tce-iommu-8000 
> owner:{dev path=/machine/unattached/device[3]/tce-table-8000}
> 0400-0400 (prio 0, i/o): msi owner:{dev 
> path=/machine/unattached/device[3]}
> 
> address-space: vfio-pci
>   - (prio 0, i/o): bus master container 
> owner:{dev id=vfio0001_03_00_0}
> - (prio 0, i/o): alias bus master 
> @pci@8002000.iommu-root - owner:{dev 
> id=vfio0001_03_00_0}
> 
> memory-region: p...@8002000.io
>   - (prio 0, i/o): p...@8002000.io 
> owner:{dev path=/machine/unattached/device[3]}
> 
> memory-region: p...@8002000.mmio
>   - (prio 0, i/o): p...@8002000.mmio 
> owner:{dev path=/machine/unattached/device[3]}
> 2100-2100 (prio 1, i/o): 0001:03:00.0 base BAR 1 
> owner:{dev id=vfio0001_03_00_0}
>   2100-2100 (prio 0, i/o): 0001:03:00.0 BAR 1 
> owner:{dev id=vfio0001_03_00_0}
>   2100e000-2100e5ff (prio 0, i/o): msix-table owner:{dev 
> id=vfio0001_03_00_0}
>   2100f000-2100f00f (prio 0, i/o): msix-pba [disabled] 
> owner:{dev id=vfio0001_03_00_0}
> 2104-2107 (prio 1, i/o): 0001:03:00.0 base BAR 3 
> owner:{dev id=vfio0001_03_00_0}
>   2104-2107 (prio 0, i/o): 0001:03:00.0 BAR 3 
> owner:{dev id=vfio0001_03_00_0}
> 2104-2107 (prio 0, ramd): 0001:03:00.0 BAR 3 
> mmaps[0] owner:{dev id=vfio0001_03_00_0}
> 
> memory-region: pci@8002000.iommu-root
>   - (prio 0, i/o): 
> pci@8002000.iommu-root owner:{dev path=/machine/unattached/device[3]}
> - (prio 0, 

[Qemu-devel] [PATCH for 2.13 v2 17/19] linux-user: move riscv cpu loop to riscv directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
riscv/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
Reviewed-by: Michael Clark 
---
 linux-user/main.c   | 101 +---
 linux-user/riscv/cpu_loop.c |  92 
 2 files changed, 93 insertions(+), 100 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 32922110f1..834ec0bfe5 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,100 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_RISCV
-
-void cpu_loop(CPURISCVState *env)
-{
-CPUState *cs = CPU(riscv_env_get_cpu(env));
-int trapnr, signum, sigcode;
-target_ulong sigaddr;
-target_ulong ret;
-
-for (;;) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-signum = 0;
-sigcode = 0;
-sigaddr = 0;
-
-switch (trapnr) {
-case EXCP_INTERRUPT:
-/* just indicate that signals should be handled asap */
-break;
-case EXCP_ATOMIC:
-cpu_exec_step_atomic(cs);
-break;
-case RISCV_EXCP_U_ECALL:
-env->pc += 4;
-if (env->gpr[xA7] == TARGET_NR_arch_specific_syscall + 15) {
-/* riscv_flush_icache_syscall is a no-op in QEMU as
-   self-modifying code is automatically detected */
-ret = 0;
-} else {
-ret = do_syscall(env,
- env->gpr[xA7],
- env->gpr[xA0],
- env->gpr[xA1],
- env->gpr[xA2],
- env->gpr[xA3],
- env->gpr[xA4],
- env->gpr[xA5],
- 0, 0);
-}
-if (ret == -TARGET_ERESTARTSYS) {
-env->pc -= 4;
-} else if (ret != -TARGET_QEMU_ESIGRETURN) {
-env->gpr[xA0] = ret;
-}
-if (cs->singlestep_enabled) {
-goto gdbstep;
-}
-break;
-case RISCV_EXCP_ILLEGAL_INST:
-signum = TARGET_SIGILL;
-sigcode = TARGET_ILL_ILLOPC;
-break;
-case RISCV_EXCP_BREAKPOINT:
-signum = TARGET_SIGTRAP;
-sigcode = TARGET_TRAP_BRKPT;
-sigaddr = env->pc;
-break;
-case RISCV_EXCP_INST_PAGE_FAULT:
-case RISCV_EXCP_LOAD_PAGE_FAULT:
-case RISCV_EXCP_STORE_PAGE_FAULT:
-signum = TARGET_SIGSEGV;
-sigcode = TARGET_SEGV_MAPERR;
-break;
-case EXCP_DEBUG:
-gdbstep:
-signum = gdb_handlesig(cs, TARGET_SIGTRAP);
-sigcode = TARGET_TRAP_BRKPT;
-break;
-default:
-EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n",
- trapnr);
-exit(EXIT_FAILURE);
-}
-
-if (signum) {
-target_siginfo_t info = {
-.si_signo = signum,
-.si_errno = 0,
-.si_code = sigcode,
-._sifields._sigfault._addr = sigaddr
-};
-queue_signal(env, info.si_signo, QEMU_SI_KILL, );
-}
-
-process_pending_signals(env);
-}
-}
-
-#endif /* TARGET_RISCV */
-
 #ifdef TARGET_HPPA
 
 static abi_ulong hppa_lws(CPUHPPAState *env)
@@ -1322,12 +1228,7 @@ int main(int argc, char **argv, char **envp)
 
 target_cpu_copy_regs(env, regs);
 
-#if defined(TARGET_RISCV)
-{
-env->pc = regs->sepc;
-env->gpr[xSP] = regs->sp;
-}
-#elif defined(TARGET_HPPA)
+#if defined(TARGET_HPPA)
 {
 int i;
 for (i = 1; i < 32; i++) {
diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
index b7700a5561..f137d39d7e 100644
--- a/linux-user/riscv/cpu_loop.c
+++ b/linux-user/riscv/cpu_loop.c
@@ -21,6 +21,98 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+void cpu_loop(CPURISCVState *env)
+{
+CPUState *cs = CPU(riscv_env_get_cpu(env));
+int trapnr, signum, sigcode;
+target_ulong sigaddr;
+target_ulong ret;
+
+for (;;) {
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+signum = 0;
+sigcode = 0;
+sigaddr = 0;
+
+switch (trapnr) {
+case EXCP_INTERRUPT:
+/* just indicate that signals should be handled asap */
+break;
+case EXCP_ATOMIC:
+cpu_exec_step_atomic(cs);
+break;
+case RISCV_EXCP_U_ECALL:
+env->pc += 4;
+if (env->gpr[xA7] == TARGET_NR_arch_specific_syscall + 15) {
+   

Re: [Qemu-devel] [PATCH for 2.13 v3 00/20] move arch specific parts to arch directories

2018-04-11 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180411184556.17200-1-laur...@vivier.eu
Subject: [Qemu-devel] [PATCH for 2.13 v3 00/20] move arch specific parts to 
arch directories

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]   patchew/20180411184556.17200-1-laur...@vivier.eu -> 
patchew/20180411184556.17200-1-laur...@vivier.eu
Switched to a new branch 'test'
b8200121c0 linux-user: define TARGET_ARCH_HAS_SETUP_FRAME
1c33964700 linux-user: move ppc/ppc64 signal.c parts to ppc directory
f8baf51675 linux-user: move mips/mips64 signal.c parts to mips directory
da511d0ec7 linux-user: move sparc/sparc64 signal.c parts to sparc directory
e1ad8f443e linux-user: move i386/x86_64 signal.c parts to i386 directory
b2dba996c9 linux-user: move xtensa signal.c parts to xtensa directory
33a303fd45 linux-user: move hppa signal.c parts to hppa directory
9e8ca47db2 linux-user: move riscv signal.c parts to riscv directory
9e4c7dd51c linux-user: move tilegx signal.c parts to tilegx directory
7cf07bd709 linux-user: move alpha signal.c parts to alpha directory
15c702a466 linux-user: move m68k signal.c parts to m68k directory
e36b1c7fa1 linux-user: move s390x signal.c parts to s390x directory
dfde7dcf26 linux-user: move openrisc signal.c parts to openrisc directory
77679be421 linux-user: move nios2 signal.c parts to nios2 directory
0bc1083544 linux-user: move cris signal.c parts to cris directory
69b1454cda linux-user: move microblaze signal.c parts to microblaze directory
5442673d4d linux-user: move sh4 signal.c parts to sh4 directory
499bcd5f1c linux-user: move arm signal.c parts to arm directory
7328bbf943 linux-user: move aarch64 signal.c parts to aarch64 directory
603a10d154 linux-user: create a dummy per arch signal.c

=== OUTPUT BEGIN ===
Checking PATCH 1/20: linux-user: create a dummy per arch signal.c...
Checking PATCH 2/20: linux-user: move aarch64 signal.c parts to aarch64 
directory...
Checking PATCH 3/20: linux-user: move arm signal.c parts to arm directory...
ERROR: open brace '{' following struct go on the same line
#109: FILE: linux-user/arm/signal.c:102:
+struct sigframe_v1
+{

ERROR: spaces required around that '-' (ctx:VxV)
#111: FILE: linux-user/arm/signal.c:104:
+abi_ulong extramask[TARGET_NSIG_WORDS-1];
  ^

ERROR: open brace '{' following struct go on the same line
#116: FILE: linux-user/arm/signal.c:109:
+struct sigframe_v2
+{

ERROR: open brace '{' following struct go on the same line
#122: FILE: linux-user/arm/signal.c:115:
+struct rt_sigframe_v1
+{

ERROR: open brace '{' following struct go on the same line
#131: FILE: linux-user/arm/signal.c:124:
+struct rt_sigframe_v2
+{

WARNING: line over 80 characters
#142: FILE: linux-user/arm/signal.c:135:
+#define SWI_SYS_SIGRETURN   (0xef00|(TARGET_NR_sigreturn + 
ARM_SYSCALL_BASE))

ERROR: spaces required around that '|' (ctx:VxV)
#142: FILE: linux-user/arm/signal.c:135:
+#define SWI_SYS_SIGRETURN   (0xef00|(TARGET_NR_sigreturn + 
ARM_SYSCALL_BASE))
^

WARNING: line over 80 characters
#143: FILE: linux-user/arm/signal.c:136:
+#define SWI_SYS_RT_SIGRETURN(0xef00|(TARGET_NR_rt_sigreturn + 
ARM_SYSCALL_BASE))

ERROR: spaces required around that '|' (ctx:VxV)
#143: FILE: linux-user/arm/signal.c:136:
+#define SWI_SYS_RT_SIGRETURN(0xef00|(TARGET_NR_rt_sigreturn + 
ARM_SYSCALL_BASE))
^

WARNING: line over 80 characters
#150: FILE: linux-user/arm/signal.c:143:
+#define SWI_THUMB_RT_SIGRETURN  (0xdf00 << 16 | 0x2700 | 
(TARGET_NR_rt_sigreturn))

ERROR: "(foo*)" should be "(foo *)"
#261: FILE: linux-user/arm/signal.c:254:
+return (abi_ulong*)(vfpframe+1);

ERROR: spaces required around that '+' (ctx:VxV)
#261: FILE: linux-user/arm/signal.c:254:
+return (abi_ulong*)(vfpframe+1);
 ^

ERROR: "(foo*)" should be "(foo *)"
#281: FILE: linux-user/arm/signal.c:274:
+return (abi_ulong*)(iwmmxtframe+1);

ERROR: spaces required around that '+' (ctx:VxV)
#281: FILE: linux-user/arm/signal.c:274:
+return (abi_ulong*)(iwmmxtframe+1);
^

ERROR: space required before the open parenthesis '('
#313: FILE: linux-user/arm/signal.c:306:
+for(i = 0; i < 

[Qemu-devel] [PATCH for 2.13] linux-user: introduce target_sigsp() and target_save_altstack()

2018-04-11 Thread Laurent Vivier
Signed-off-by: Laurent Vivier 
---
Based-on: <20180411185651.21351-1-laur...@vivier.eu>
"[PATCH for 2.13 v2 00/19] linux-user: move arch specific parts from main.c to 
arch directories"

 linux-user/aarch64/signal.c| 13 ++---
 linux-user/alpha/signal.c  | 17 ++---
 linux-user/arm/signal.c| 17 -
 linux-user/hppa/signal.c   | 14 --
 linux-user/i386/signal.c   | 12 +++-
 linux-user/m68k/signal.c   | 15 +++
 linux-user/microblaze/signal.c |  4 +---
 linux-user/mips/signal.c   | 15 ++-
 linux-user/nios2/signal.c  | 21 ++---
 linux-user/openrisc/signal.c   | 14 +++---
 linux-user/ppc/signal.c| 15 ++-
 linux-user/riscv/signal.c  | 28 +---
 linux-user/s390x/signal.c  | 12 +++-
 linux-user/sh4/signal.c| 11 ++-
 linux-user/signal-common.h | 15 ---
 linux-user/signal.c| 32 
 linux-user/sparc/signal.c  | 28 +++-
 linux-user/tilegx/signal.c | 13 +++--
 linux-user/xtensa/signal.c | 15 ---
 19 files changed, 108 insertions(+), 203 deletions(-)

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 72d20975f3..dc6f1c6d2e 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -120,9 +120,7 @@ static void target_setup_general_frame(struct 
target_rt_sigframe *sf,
 __put_user(0, >uc.tuc_flags);
 __put_user(0, >uc.tuc_link);
 
-__put_user(target_sigaltstack_used.ss_sp, >uc.tuc_stack.ss_sp);
-__put_user(sas_ss_flags(env->xregs[31]), >uc.tuc_stack.ss_flags);
-__put_user(target_sigaltstack_used.ss_size, >uc.tuc_stack.ss_size);
+target_save_altstack(>uc.tuc_stack, env);
 
 for (i = 0; i < 31; i++) {
 __put_user(env->xregs[i], >uc.tuc_mcontext.regs[i]);
@@ -372,14 +370,7 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
 {
 abi_ulong sp;
 
-sp = env->xregs[31];
-
-/*
- * This is the X/Open sanctioned signal stack switching.
- */
-if ((ka->sa_flags & TARGET_SA_ONSTACK) && !sas_ss_flags(sp)) {
-sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
-}
+sp = target_sigsp(get_sp_from_cpustate(env), ka);
 
 sp = (sp - size) & ~15;
 
diff --git a/linux-user/alpha/signal.c b/linux-user/alpha/signal.c
index a8c718f2c6..f24de02c6f 100644
--- a/linux-user/alpha/signal.c
+++ b/linux-user/alpha/signal.c
@@ -117,12 +117,10 @@ static inline abi_ulong get_sigframe(struct 
target_sigaction *sa,
  CPUAlphaState *env,
  unsigned long framesize)
 {
-abi_ulong sp = env->ir[IR_SP];
+abi_ulong sp;
+
+sp = target_sigsp(get_sp_from_cpustate(env), sa);
 
-/* This is the X/Open sanctioned signal stack switching.  */
-if ((sa->sa_flags & TARGET_SA_ONSTACK) != 0 && !sas_ss_flags(sp)) {
-sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
-}
 return (sp - framesize) & -32;
 }
 
@@ -187,12 +185,9 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 __put_user(0, >uc.tuc_flags);
 __put_user(0, >uc.tuc_link);
 __put_user(set->sig[0], >uc.tuc_osf_sigmask);
-__put_user(target_sigaltstack_used.ss_sp,
-   >uc.tuc_stack.ss_sp);
-__put_user(sas_ss_flags(env->ir[IR_SP]),
-   >uc.tuc_stack.ss_flags);
-__put_user(target_sigaltstack_used.ss_size,
-   >uc.tuc_stack.ss_size);
+
+target_save_altstack(>uc.tuc_stack, env);
+
 setup_sigcontext(>uc.tuc_mcontext, env, frame_addr, set);
 for (i = 0; i < TARGET_NSIG_WORDS; ++i) {
 __put_user(set->sig[i], >uc.tuc_sigmask.sig[i]);
diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index 0c1ec53025..0759b5dd8f 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -186,14 +186,9 @@ setup_sigcontext(struct target_sigcontext *sc, /*struct 
_fpstate *fpstate,*/
 static inline abi_ulong
 get_sigframe(struct target_sigaction *ka, CPUARMState *regs, int framesize)
 {
-unsigned long sp = regs->regs[13];
+unsigned long sp;
 
-/*
- * This is the X/Open sanctioned signal stack switching.
- */
-if ((ka->sa_flags & TARGET_SA_ONSTACK) && !sas_ss_flags(sp)) {
-sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
-}
+sp = target_sigsp(get_sp_from_cpustate(regs), ka);
 /*
  * ATPCS B01 mandates 8-byte alignment
  */
@@ -285,9 +280,7 @@ static void setup_sigframe_v2(struct target_ucontext_v2 *uc,
 memset(uc, 0, offsetof(struct target_ucontext_v2, tuc_mcontext));
 
 memset(, 0, sizeof(stack));
-__put_user(target_sigaltstack_used.ss_sp, _sp);
-__put_user(target_sigaltstack_used.ss_size, _size);
-

[Qemu-devel] [PATCH for 2.13 v2 16/19] linux-user: move tilegx cpu loop to tilegx directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
tilegx/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/main.c| 267 ---
 linux-user/tilegx/cpu_loop.c | 260 +
 2 files changed, 260 insertions(+), 267 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 7f6cfa5548..32922110f1 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,262 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_TILEGX
-
-static void gen_sigill_reg(CPUTLGState *env)
-{
-target_siginfo_t info;
-
-info.si_signo = TARGET_SIGILL;
-info.si_errno = 0;
-info.si_code = TARGET_ILL_PRVREG;
-info._sifields._sigfault._addr = env->pc;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-}
-
-static void do_signal(CPUTLGState *env, int signo, int sigcode)
-{
-target_siginfo_t info;
-
-info.si_signo = signo;
-info.si_errno = 0;
-info._sifields._sigfault._addr = env->pc;
-
-if (signo == TARGET_SIGSEGV) {
-/* The passed in sigcode is a dummy; check for a page mapping
-   and pass either MAPERR or ACCERR.  */
-target_ulong addr = env->excaddr;
-info._sifields._sigfault._addr = addr;
-if (page_check_range(addr, 1, PAGE_VALID) < 0) {
-sigcode = TARGET_SEGV_MAPERR;
-} else {
-sigcode = TARGET_SEGV_ACCERR;
-}
-}
-info.si_code = sigcode;
-
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-}
-
-static void gen_sigsegv_maperr(CPUTLGState *env, target_ulong addr)
-{
-env->excaddr = addr;
-do_signal(env, TARGET_SIGSEGV, 0);
-}
-
-static void set_regval(CPUTLGState *env, uint8_t reg, uint64_t val)
-{
-if (unlikely(reg >= TILEGX_R_COUNT)) {
-switch (reg) {
-case TILEGX_R_SN:
-case TILEGX_R_ZERO:
-return;
-case TILEGX_R_IDN0:
-case TILEGX_R_IDN1:
-case TILEGX_R_UDN0:
-case TILEGX_R_UDN1:
-case TILEGX_R_UDN2:
-case TILEGX_R_UDN3:
-gen_sigill_reg(env);
-return;
-default:
-g_assert_not_reached();
-}
-}
-env->regs[reg] = val;
-}
-
-/*
- * Compare the 8-byte contents of the CmpValue SPR with the 8-byte value in
- * memory at the address held in the first source register. If the values are
- * not equal, then no memory operation is performed. If the values are equal,
- * the 8-byte quantity from the second source register is written into memory
- * at the address held in the first source register. In either case, the result
- * of the instruction is the value read from memory. The compare and write to
- * memory are atomic and thus can be used for synchronization purposes. This
- * instruction only operates for addresses aligned to a 8-byte boundary.
- * Unaligned memory access causes an Unaligned Data Reference interrupt.
- *
- * Functional Description (64-bit)
- *   uint64_t memVal = memoryReadDoubleWord (rf[SrcA]);
- *   rf[Dest] = memVal;
- *   if (memVal == SPR[CmpValueSPR])
- *   memoryWriteDoubleWord (rf[SrcA], rf[SrcB]);
- *
- * Functional Description (32-bit)
- *   uint64_t memVal = signExtend32 (memoryReadWord (rf[SrcA]));
- *   rf[Dest] = memVal;
- *   if (memVal == signExtend32 (SPR[CmpValueSPR]))
- *   memoryWriteWord (rf[SrcA], rf[SrcB]);
- *
- *
- * This function also processes exch and exch4 which need not process SPR.
- */
-static void do_exch(CPUTLGState *env, bool quad, bool cmp)
-{
-target_ulong addr;
-target_long val, sprval;
-
-start_exclusive();
-
-addr = env->atomic_srca;
-if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) {
-goto sigsegv_maperr;
-}
-
-if (cmp) {
-if (quad) {
-sprval = env->spregs[TILEGX_SPR_CMPEXCH];
-} else {
-sprval = sextract64(env->spregs[TILEGX_SPR_CMPEXCH], 0, 32);
-}
-}
-
-if (!cmp || val == sprval) {
-target_long valb = env->atomic_srcb;
-if (quad ? put_user_u64(valb, addr) : put_user_u32(valb, addr)) {
-goto sigsegv_maperr;
-}
-}
-
-set_regval(env, env->atomic_dstr, val);
-end_exclusive();
-return;
-
- sigsegv_maperr:
-end_exclusive();
-gen_sigsegv_maperr(env, addr);
-}
-
-static void do_fetch(CPUTLGState *env, int trapnr, bool quad)
-{
-int8_t write = 1;
-target_ulong addr;
-target_long val, valb;
-
-start_exclusive();
-
-addr = env->atomic_srca;
-valb = env->atomic_srcb;
-if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) {
-goto sigsegv_maperr;
-}
-
-switch (trapnr) {
-case TILEGX_EXCP_OPCODE_FETCHADD:
-case TILEGX_EXCP_OPCODE_FETCHADD4:
-valb += val;
-break;
-case TILEGX_EXCP_OPCODE_FETCHADDGEZ:
-valb += val;
-if 

[Qemu-devel] [PATCH for 2.13 v2 08/19] linux-user: move nios2 cpu loop to nios2 directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
nios2/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/main.c   | 133 
 linux-user/nios2/cpu_loop.c | 126 +
 2 files changed, 126 insertions(+), 133 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index a760c19379..88f807549f 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,109 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_NIOS2
-
-void cpu_loop(CPUNios2State *env)
-{
-CPUState *cs = ENV_GET_CPU(env);
-Nios2CPU *cpu = NIOS2_CPU(cs);
-target_siginfo_t info;
-int trapnr, gdbsig, ret;
-
-for (;;) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-gdbsig = 0;
-
-switch (trapnr) {
-case EXCP_INTERRUPT:
-/* just indicate that signals should be handled asap */
-break;
-case EXCP_TRAP:
-if (env->regs[R_AT] == 0) {
-abi_long ret;
-qemu_log_mask(CPU_LOG_INT, "\nSyscall\n");
-
-ret = do_syscall(env, env->regs[2],
- env->regs[4], env->regs[5], env->regs[6],
- env->regs[7], env->regs[8], env->regs[9],
- 0, 0);
-
-if (env->regs[2] == 0) {/* FIXME: syscall 0 workaround */
-ret = 0;
-}
-
-env->regs[2] = abs(ret);
-/* Return value is 0..4096 */
-env->regs[7] = (ret > 0xf000ULL);
-env->regs[CR_ESTATUS] = env->regs[CR_STATUS];
-env->regs[CR_STATUS] &= ~0x3;
-env->regs[R_EA] = env->regs[R_PC] + 4;
-env->regs[R_PC] += 4;
-break;
-} else {
-qemu_log_mask(CPU_LOG_INT, "\nTrap\n");
-
-env->regs[CR_ESTATUS] = env->regs[CR_STATUS];
-env->regs[CR_STATUS] &= ~0x3;
-env->regs[R_EA] = env->regs[R_PC] + 4;
-env->regs[R_PC] = cpu->exception_addr;
-
-gdbsig = TARGET_SIGTRAP;
-break;
-}
-case 0xaa:
-switch (env->regs[R_PC]) {
-/*case 0x1000:*/  /* TODO:__kuser_helper_version */
-case 0x1004:  /* __kuser_cmpxchg */
-start_exclusive();
-if (env->regs[4] & 0x3) {
-goto kuser_fail;
-}
-ret = get_user_u32(env->regs[2], env->regs[4]);
-if (ret) {
-end_exclusive();
-goto kuser_fail;
-}
-env->regs[2] -= env->regs[5];
-if (env->regs[2] == 0) {
-put_user_u32(env->regs[6], env->regs[4]);
-}
-end_exclusive();
-env->regs[R_PC] = env->regs[R_RA];
-break;
-/*case 0x1040:*/  /* TODO:__kuser_sigtramp */
-default:
-;
-kuser_fail:
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-/* TODO: check env->error_code */
-info.si_code = TARGET_SEGV_MAPERR;
-info._sifields._sigfault._addr = env->regs[R_PC];
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-}
-break;
-default:
-EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n",
- trapnr);
-gdbsig = TARGET_SIGILL;
-break;
-}
-if (gdbsig) {
-gdb_handlesig(cs, gdbsig);
-if (gdbsig != TARGET_SIGTRAP) {
-exit(EXIT_FAILURE);
-}
-}
-
-process_pending_signals(env);
-}
-}
-
-#endif /* TARGET_NIOS2 */
-
 #ifdef TARGET_OPENRISC
 
 void cpu_loop(CPUOpenRISCState *env)
@@ -2545,36 +2442,6 @@ int main(int argc, char **argv, char **envp)
 env->regs[31] = regs->r31; 
 env->sregs[SR_PC] = regs->pc;
 }
-#elif defined(TARGET_NIOS2)
-{
-env->regs[0] = 0;
-env->regs[1] = regs->r1;
-env->regs[2] = regs->r2;
-env->regs[3] = regs->r3;
-env->regs[4] = regs->r4;
-env->regs[5] = regs->r5;
-env->regs[6] = regs->r6;
-env->regs[7] = regs->r7;
-env->regs[8] = regs->r8;
-env->regs[9] = regs->r9;
-env->regs[10] = regs->r10;
-env->regs[11] = regs->r11;
-env->regs[12] = regs->r12;
-env->regs[13] = regs->r13;
-env->regs[14] = regs->r14;
-env->regs[15] = regs->r15;
-/* TODO: unsigned long  orig_r2; */
-env->regs[R_RA] = regs->ra;
-env->regs[R_FP] = regs->fp;
-

[Qemu-devel] [PATCH for 2.13 v2 07/19] linux-user: move mips/mips64 cpu loop to mips directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
mips/cpu_loop.c.

Include mips/cpu_loop.c in mips64/cpu_loop.c
to avoid to duplicate code.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
---

Notes:
v2: expand tabs

 linux-user/main.c| 725 ---
 linux-user/mips/cpu_loop.c   | 723 ++
 linux-user/mips64/cpu_loop.c |   8 +-
 3 files changed, 724 insertions(+), 732 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 2340320818..a760c19379 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,705 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_MIPS
-
-# ifdef TARGET_ABI_MIPSO32
-#  define MIPS_SYS(name, args) args,
-static const uint8_t mips_syscall_args[] = {
-   MIPS_SYS(sys_syscall, 8)/* 4000 */
-   MIPS_SYS(sys_exit   , 1)
-   MIPS_SYS(sys_fork   , 0)
-   MIPS_SYS(sys_read   , 3)
-   MIPS_SYS(sys_write  , 3)
-   MIPS_SYS(sys_open   , 3)/* 4005 */
-   MIPS_SYS(sys_close  , 1)
-   MIPS_SYS(sys_waitpid, 3)
-   MIPS_SYS(sys_creat  , 2)
-   MIPS_SYS(sys_link   , 2)
-   MIPS_SYS(sys_unlink , 1)/* 4010 */
-   MIPS_SYS(sys_execve , 0)
-   MIPS_SYS(sys_chdir  , 1)
-   MIPS_SYS(sys_time   , 1)
-   MIPS_SYS(sys_mknod  , 3)
-   MIPS_SYS(sys_chmod  , 2)/* 4015 */
-   MIPS_SYS(sys_lchown , 3)
-   MIPS_SYS(sys_ni_syscall , 0)
-   MIPS_SYS(sys_ni_syscall , 0)/* was sys_stat */
-   MIPS_SYS(sys_lseek  , 3)
-   MIPS_SYS(sys_getpid , 0)/* 4020 */
-   MIPS_SYS(sys_mount  , 5)
-   MIPS_SYS(sys_umount , 1)
-   MIPS_SYS(sys_setuid , 1)
-   MIPS_SYS(sys_getuid , 0)
-   MIPS_SYS(sys_stime  , 1)/* 4025 */
-   MIPS_SYS(sys_ptrace , 4)
-   MIPS_SYS(sys_alarm  , 1)
-   MIPS_SYS(sys_ni_syscall , 0)/* was sys_fstat */
-   MIPS_SYS(sys_pause  , 0)
-   MIPS_SYS(sys_utime  , 2)/* 4030 */
-   MIPS_SYS(sys_ni_syscall , 0)
-   MIPS_SYS(sys_ni_syscall , 0)
-   MIPS_SYS(sys_access , 2)
-   MIPS_SYS(sys_nice   , 1)
-   MIPS_SYS(sys_ni_syscall , 0)/* 4035 */
-   MIPS_SYS(sys_sync   , 0)
-   MIPS_SYS(sys_kill   , 2)
-   MIPS_SYS(sys_rename , 2)
-   MIPS_SYS(sys_mkdir  , 2)
-   MIPS_SYS(sys_rmdir  , 1)/* 4040 */
-   MIPS_SYS(sys_dup, 1)
-   MIPS_SYS(sys_pipe   , 0)
-   MIPS_SYS(sys_times  , 1)
-   MIPS_SYS(sys_ni_syscall , 0)
-   MIPS_SYS(sys_brk, 1)/* 4045 */
-   MIPS_SYS(sys_setgid , 1)
-   MIPS_SYS(sys_getgid , 0)
-   MIPS_SYS(sys_ni_syscall , 0)/* was signal(2) */
-   MIPS_SYS(sys_geteuid, 0)
-   MIPS_SYS(sys_getegid, 0)/* 4050 */
-   MIPS_SYS(sys_acct   , 0)
-   MIPS_SYS(sys_umount2, 2)
-   MIPS_SYS(sys_ni_syscall , 0)
-   MIPS_SYS(sys_ioctl  , 3)
-   MIPS_SYS(sys_fcntl  , 3)/* 4055 */
-   MIPS_SYS(sys_ni_syscall , 2)
-   MIPS_SYS(sys_setpgid, 2)
-   MIPS_SYS(sys_ni_syscall , 0)
-   MIPS_SYS(sys_olduname   , 1)
-   MIPS_SYS(sys_umask  , 1)/* 4060 */
-   MIPS_SYS(sys_chroot , 1)
-   MIPS_SYS(sys_ustat  , 2)
-   MIPS_SYS(sys_dup2   , 2)
-   MIPS_SYS(sys_getppid, 0)
-   MIPS_SYS(sys_getpgrp, 0)/* 4065 */
-   MIPS_SYS(sys_setsid , 0)
-   MIPS_SYS(sys_sigaction  , 3)
-   MIPS_SYS(sys_sgetmask   , 0)
-   MIPS_SYS(sys_ssetmask   , 1)
-   MIPS_SYS(sys_setreuid   , 2)/* 4070 */
-   MIPS_SYS(sys_setregid   , 2)
-   MIPS_SYS(sys_sigsuspend , 0)
-   MIPS_SYS(sys_sigpending , 1)
-   MIPS_SYS(sys_sethostname, 2)
-   MIPS_SYS(sys_setrlimit  , 2)/* 4075 */
-   MIPS_SYS(sys_getrlimit  , 2)
-   MIPS_SYS(sys_getrusage  , 2)
-   MIPS_SYS(sys_gettimeofday, 2)
-   MIPS_SYS(sys_settimeofday, 2)
-   MIPS_SYS(sys_getgroups  , 2)/* 4080 */
-   MIPS_SYS(sys_setgroups  , 2)
-   MIPS_SYS(sys_ni_syscall , 0)/* old_select */
-   MIPS_SYS(sys_symlink, 2)
-   MIPS_SYS(sys_ni_syscall , 0)/* was sys_lstat */
-   MIPS_SYS(sys_readlink   , 3)/* 4085 */
-   MIPS_SYS(sys_uselib , 1)
-   MIPS_SYS(sys_swapon , 2)
-   MIPS_SYS(sys_reboot , 3)
-   MIPS_SYS(old_readdir, 3)
-   MIPS_SYS(old_mmap   , 6)/* 4090 */
-   MIPS_SYS(sys_munmap , 2)
-   MIPS_SYS(sys_truncate   , 2)
-   MIPS_SYS(sys_ftruncate  , 2)
-   MIPS_SYS(sys_fchmod , 2)
-   MIPS_SYS(sys_fchown , 3)/* 4095 */
-   MIPS_SYS(sys_getpriority, 2)
-   MIPS_SYS(sys_setpriority, 3)
-   

[Qemu-devel] [PATCH for 2.13 v2 04/19] linux-user: move arm cpu loop to arm directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
arm/cpu_loop.c and duplicate some macro
defined for both arm and aarch64.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/arm/cpu_loop.c | 430 +
 linux-user/main.c | 433 +-
 2 files changed, 432 insertions(+), 431 deletions(-)

diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c
index b7700a5561..d911929bf6 100644
--- a/linux-user/arm/cpu_loop.c
+++ b/linux-user/arm/cpu_loop.c
@@ -19,8 +19,438 @@
 
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "elf.h"
 #include "cpu_loop-common.h"
 
+#define get_user_code_u32(x, gaddr, env)\
+({ abi_long __r = get_user_u32((x), (gaddr));   \
+if (!__r && bswap_code(arm_sctlr_b(env))) { \
+(x) = bswap32(x);   \
+}   \
+__r;\
+})
+
+#define get_user_code_u16(x, gaddr, env)\
+({ abi_long __r = get_user_u16((x), (gaddr));   \
+if (!__r && bswap_code(arm_sctlr_b(env))) { \
+(x) = bswap16(x);   \
+}   \
+__r;\
+})
+
+#define get_user_data_u32(x, gaddr, env)\
+({ abi_long __r = get_user_u32((x), (gaddr));   \
+if (!__r && arm_cpu_bswap_data(env)) {  \
+(x) = bswap32(x);   \
+}   \
+__r;\
+})
+
+#define get_user_data_u16(x, gaddr, env)\
+({ abi_long __r = get_user_u16((x), (gaddr));   \
+if (!__r && arm_cpu_bswap_data(env)) {  \
+(x) = bswap16(x);   \
+}   \
+__r;\
+})
+
+#define put_user_data_u32(x, gaddr, env)\
+({ typeof(x) __x = (x); \
+if (arm_cpu_bswap_data(env)) {  \
+__x = bswap32(__x); \
+}   \
+put_user_u32(__x, (gaddr)); \
+})
+
+#define put_user_data_u16(x, gaddr, env)\
+({ typeof(x) __x = (x); \
+if (arm_cpu_bswap_data(env)) {  \
+__x = bswap16(__x); \
+}   \
+put_user_u16(__x, (gaddr)); \
+})
+
+/* Commpage handling -- there is no commpage for AArch64 */
+
+/*
+ * See the Linux kernel's Documentation/arm/kernel_user_helpers.txt
+ * Input:
+ * r0 = pointer to oldval
+ * r1 = pointer to newval
+ * r2 = pointer to target value
+ *
+ * Output:
+ * r0 = 0 if *ptr was changed, non-0 if no exchange happened
+ * C set if *ptr was changed, clear if no exchange happened
+ *
+ * Note segv's in kernel helpers are a bit tricky, we can set the
+ * data address sensibly but the PC address is just the entry point.
+ */
+static void arm_kernel_cmpxchg64_helper(CPUARMState *env)
+{
+uint64_t oldval, newval, val;
+uint32_t addr, cpsr;
+target_siginfo_t info;
+
+/* Based on the 32 bit code in do_kernel_trap */
+
+/* XXX: This only works between threads, not between processes.
+   It's probably possible to implement this with native host
+   operations. However things like ldrex/strex are much harder so
+   there's not much point trying.  */
+start_exclusive();
+cpsr = cpsr_read(env);
+addr = env->regs[2];
+
+if (get_user_u64(oldval, env->regs[0])) {
+env->exception.vaddress = env->regs[0];
+goto segv;
+};
+
+if (get_user_u64(newval, env->regs[1])) {
+env->exception.vaddress = env->regs[1];
+goto segv;
+};
+
+if (get_user_u64(val, addr)) {
+env->exception.vaddress = addr;
+goto segv;
+}
+
+if (val == oldval) {
+val = newval;
+
+if (put_user_u64(val, addr)) {
+env->exception.vaddress = addr;
+goto segv;
+};
+
+env->regs[0] = 0;
+cpsr |= CPSR_C;
+} else {
+env->regs[0] = -1;
+cpsr &= ~CPSR_C;
+}
+cpsr_write(env, cpsr, CPSR_C, CPSRWriteByInstr);
+end_exclusive();
+return;
+
+segv:
+end_exclusive();
+/* We get the PC of the entry address - which is as good as anything,
+   on a real kernel what you get depends on which mode it uses. */
+info.si_signo = TARGET_SIGSEGV;
+info.si_errno = 0;
+/* XXX: check env->error_code 

[Qemu-devel] [PATCH for 2.13 v2 11/19] linux-user: move cris cpu loop to cris directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
cris/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---

Notes:
v2: expand tabs

 linux-user/cris/cpu_loop.c | 89 +
 linux-user/main.c  | 90 --
 2 files changed, 89 insertions(+), 90 deletions(-)

diff --git a/linux-user/cris/cpu_loop.c b/linux-user/cris/cpu_loop.c
index b7700a5561..1c5eca9f83 100644
--- a/linux-user/cris/cpu_loop.c
+++ b/linux-user/cris/cpu_loop.c
@@ -21,6 +21,95 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+void cpu_loop(CPUCRISState *env)
+{
+CPUState *cs = CPU(cris_env_get_cpu(env));
+int trapnr, ret;
+target_siginfo_t info;
+
+while (1) {
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch (trapnr) {
+case 0xaa:
+{
+info.si_signo = TARGET_SIGSEGV;
+info.si_errno = 0;
+/* XXX: check env->error_code */
+info.si_code = TARGET_SEGV_MAPERR;
+info._sifields._sigfault._addr = env->pregs[PR_EDA];
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+}
+break;
+case EXCP_INTERRUPT:
+  /* just indicate that signals should be handled asap */
+  break;
+case EXCP_BREAK:
+ret = do_syscall(env, 
+ env->regs[9], 
+ env->regs[10], 
+ env->regs[11], 
+ env->regs[12], 
+ env->regs[13], 
+ env->pregs[7], 
+ env->pregs[11],
+ 0, 0);
+if (ret == -TARGET_ERESTARTSYS) {
+env->pc -= 2;
+} else if (ret != -TARGET_QEMU_ESIGRETURN) {
+env->regs[10] = ret;
+}
+break;
+case EXCP_DEBUG:
+{
+int sig;
+
+sig = gdb_handlesig(cs, TARGET_SIGTRAP);
+if (sig)
+  {
+info.si_signo = sig;
+info.si_errno = 0;
+info.si_code = TARGET_TRAP_BRKPT;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+  }
+}
+break;
+case EXCP_ATOMIC:
+cpu_exec_step_atomic(cs);
+break;
+default:
+printf ("Unhandled trap: 0x%x\n", trapnr);
+cpu_dump_state(cs, stderr, fprintf, 0);
+exit(EXIT_FAILURE);
+}
+process_pending_signals (env);
+}
+}
+
 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
 {
+CPUState *cpu = ENV_GET_CPU(env);
+TaskState *ts = cpu->opaque;
+struct image_info *info = ts->info;
+
+env->regs[0] = regs->r0;
+env->regs[1] = regs->r1;
+env->regs[2] = regs->r2;
+env->regs[3] = regs->r3;
+env->regs[4] = regs->r4;
+env->regs[5] = regs->r5;
+env->regs[6] = regs->r6;
+env->regs[7] = regs->r7;
+env->regs[8] = regs->r8;
+env->regs[9] = regs->r9;
+env->regs[10] = regs->r10;
+env->regs[11] = regs->r11;
+env->regs[12] = regs->r12;
+env->regs[13] = regs->r13;
+env->regs[14] = info->start_stack;
+env->regs[15] = regs->acr;
+env->pc = regs->erp;
 }
diff --git a/linux-user/main.c b/linux-user/main.c
index d7fee3e3db..9e01325d6a 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,76 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_CRIS
-void cpu_loop(CPUCRISState *env)
-{
-CPUState *cs = CPU(cris_env_get_cpu(env));
-int trapnr, ret;
-target_siginfo_t info;
-
-while (1) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-switch (trapnr) {
-case 0xaa:
-{
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-/* XXX: check env->error_code */
-info.si_code = TARGET_SEGV_MAPERR;
-info._sifields._sigfault._addr = env->pregs[PR_EDA];
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-}
-break;
-   case EXCP_INTERRUPT:
- /* just indicate that signals should be handled asap */
- break;
-case EXCP_BREAK:
-ret = do_syscall(env, 
- env->regs[9], 
- env->regs[10], 
- env->regs[11], 
- env->regs[12], 
- env->regs[13], 
- env->pregs[7], 
- 

[Qemu-devel] [PATCH for 2.13 v2 05/19] linux-user: move sparc/sparc64 cpu loop to sparc directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
sparc/cpu_loop.c.

Include sparc/cpu_loop.c in sparc64/cpu_loop.c
to avoid to duplicate code.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/main.c | 288 +-
 linux-user/sparc/cpu_loop.c   | 280 
 linux-user/sparc64/cpu_loop.c |   8 +-
 3 files changed, 282 insertions(+), 294 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 45d1588958..4816ec54bb 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,281 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_SPARC
-#define SPARC64_STACK_BIAS 2047
-
-//#define DEBUG_WIN
-
-/* WARNING: dealing with register windows _is_ complicated. More info
-   can be found at http://www.sics.se/~psm/sparcstack.html */
-static inline int get_reg_index(CPUSPARCState *env, int cwp, int index)
-{
-index = (index + cwp * 16) % (16 * env->nwindows);
-/* wrap handling : if cwp is on the last window, then we use the
-   registers 'after' the end */
-if (index < 8 && env->cwp == env->nwindows - 1)
-index += 16 * env->nwindows;
-return index;
-}
-
-/* save the register window 'cwp1' */
-static inline void save_window_offset(CPUSPARCState *env, int cwp1)
-{
-unsigned int i;
-abi_ulong sp_ptr;
-
-sp_ptr = env->regbase[get_reg_index(env, cwp1, 6)];
-#ifdef TARGET_SPARC64
-if (sp_ptr & 3)
-sp_ptr += SPARC64_STACK_BIAS;
-#endif
-#if defined(DEBUG_WIN)
-printf("win_overflow: sp_ptr=0x" TARGET_ABI_FMT_lx " save_cwp=%d\n",
-   sp_ptr, cwp1);
-#endif
-for(i = 0; i < 16; i++) {
-/* FIXME - what to do if put_user() fails? */
-put_user_ual(env->regbase[get_reg_index(env, cwp1, 8 + i)], sp_ptr);
-sp_ptr += sizeof(abi_ulong);
-}
-}
-
-static void save_window(CPUSPARCState *env)
-{
-#ifndef TARGET_SPARC64
-unsigned int new_wim;
-new_wim = ((env->wim >> 1) | (env->wim << (env->nwindows - 1))) &
-((1LL << env->nwindows) - 1);
-save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2));
-env->wim = new_wim;
-#else
-save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2));
-env->cansave++;
-env->canrestore--;
-#endif
-}
-
-static void restore_window(CPUSPARCState *env)
-{
-#ifndef TARGET_SPARC64
-unsigned int new_wim;
-#endif
-unsigned int i, cwp1;
-abi_ulong sp_ptr;
-
-#ifndef TARGET_SPARC64
-new_wim = ((env->wim << 1) | (env->wim >> (env->nwindows - 1))) &
-((1LL << env->nwindows) - 1);
-#endif
-
-/* restore the invalid window */
-cwp1 = cpu_cwp_inc(env, env->cwp + 1);
-sp_ptr = env->regbase[get_reg_index(env, cwp1, 6)];
-#ifdef TARGET_SPARC64
-if (sp_ptr & 3)
-sp_ptr += SPARC64_STACK_BIAS;
-#endif
-#if defined(DEBUG_WIN)
-printf("win_underflow: sp_ptr=0x" TARGET_ABI_FMT_lx " load_cwp=%d\n",
-   sp_ptr, cwp1);
-#endif
-for(i = 0; i < 16; i++) {
-/* FIXME - what to do if get_user() fails? */
-get_user_ual(env->regbase[get_reg_index(env, cwp1, 8 + i)], sp_ptr);
-sp_ptr += sizeof(abi_ulong);
-}
-#ifdef TARGET_SPARC64
-env->canrestore++;
-if (env->cleanwin < env->nwindows - 1)
-env->cleanwin++;
-env->cansave--;
-#else
-env->wim = new_wim;
-#endif
-}
-
-static void flush_windows(CPUSPARCState *env)
-{
-int offset, cwp1;
-
-offset = 1;
-for(;;) {
-/* if restore would invoke restore_window(), then we can stop */
-cwp1 = cpu_cwp_inc(env, env->cwp + offset);
-#ifndef TARGET_SPARC64
-if (env->wim & (1 << cwp1))
-break;
-#else
-if (env->canrestore == 0)
-break;
-env->cansave++;
-env->canrestore--;
-#endif
-save_window_offset(env, cwp1);
-offset++;
-}
-cwp1 = cpu_cwp_inc(env, env->cwp + 1);
-#ifndef TARGET_SPARC64
-/* set wim so that restore will reload the registers */
-env->wim = 1 << cwp1;
-#endif
-#if defined(DEBUG_WIN)
-printf("flush_windows: nb=%d\n", offset - 1);
-#endif
-}
-
-void cpu_loop (CPUSPARCState *env)
-{
-CPUState *cs = CPU(sparc_env_get_cpu(env));
-int trapnr;
-abi_long ret;
-target_siginfo_t info;
-
-while (1) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-/* Compute PSR before exposing state.  */
-if (env->cc_op != CC_OP_FLAGS) {
-cpu_get_psr(env);
-}
-
-switch (trapnr) {
-#ifndef TARGET_SPARC64
-case 0x88:
-case 0x90:
-#else
-case 0x110:
-case 0x16d:
-#endif
-ret = do_syscall (env, env->gregs[1],
-  env->regwptr[0], env->regwptr[1],
-  env->regwptr[2], env->regwptr[3],
-  env->regwptr[4], env->regwptr[5],
-

[Qemu-devel] [PATCH for 2.13 v2 19/19] linux-user: move xtensa cpu loop to xtensa directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
xtensa/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---

Notes:
v2: report changes introduced by
4a6bf7adb9 target/xtensa: linux-user: rewind pc for restarted syscall

 linux-user/main.c| 250 ---
 linux-user/xtensa/cpu_loop.c | 241 +
 2 files changed, 241 insertions(+), 250 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index aa48b048a7..32347545c9 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,245 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_XTENSA
-
-static void xtensa_rfw(CPUXtensaState *env)
-{
-xtensa_restore_owb(env);
-env->pc = env->sregs[EPC1];
-}
-
-static void xtensa_rfwu(CPUXtensaState *env)
-{
-env->sregs[WINDOW_START] |= (1 << env->sregs[WINDOW_BASE]);
-xtensa_rfw(env);
-}
-
-static void xtensa_rfwo(CPUXtensaState *env)
-{
-env->sregs[WINDOW_START] &= ~(1 << env->sregs[WINDOW_BASE]);
-xtensa_rfw(env);
-}
-
-static void xtensa_overflow4(CPUXtensaState *env)
-{
-put_user_ual(env->regs[0], env->regs[5] - 16);
-put_user_ual(env->regs[1], env->regs[5] - 12);
-put_user_ual(env->regs[2], env->regs[5] -  8);
-put_user_ual(env->regs[3], env->regs[5] -  4);
-xtensa_rfwo(env);
-}
-
-static void xtensa_underflow4(CPUXtensaState *env)
-{
-get_user_ual(env->regs[0], env->regs[5] - 16);
-get_user_ual(env->regs[1], env->regs[5] - 12);
-get_user_ual(env->regs[2], env->regs[5] -  8);
-get_user_ual(env->regs[3], env->regs[5] -  4);
-xtensa_rfwu(env);
-}
-
-static void xtensa_overflow8(CPUXtensaState *env)
-{
-put_user_ual(env->regs[0], env->regs[9] - 16);
-get_user_ual(env->regs[0], env->regs[1] - 12);
-put_user_ual(env->regs[1], env->regs[9] - 12);
-put_user_ual(env->regs[2], env->regs[9] -  8);
-put_user_ual(env->regs[3], env->regs[9] -  4);
-put_user_ual(env->regs[4], env->regs[0] - 32);
-put_user_ual(env->regs[5], env->regs[0] - 28);
-put_user_ual(env->regs[6], env->regs[0] - 24);
-put_user_ual(env->regs[7], env->regs[0] - 20);
-xtensa_rfwo(env);
-}
-
-static void xtensa_underflow8(CPUXtensaState *env)
-{
-get_user_ual(env->regs[0], env->regs[9] - 16);
-get_user_ual(env->regs[1], env->regs[9] - 12);
-get_user_ual(env->regs[2], env->regs[9] -  8);
-get_user_ual(env->regs[7], env->regs[1] - 12);
-get_user_ual(env->regs[3], env->regs[9] -  4);
-get_user_ual(env->regs[4], env->regs[7] - 32);
-get_user_ual(env->regs[5], env->regs[7] - 28);
-get_user_ual(env->regs[6], env->regs[7] - 24);
-get_user_ual(env->regs[7], env->regs[7] - 20);
-xtensa_rfwu(env);
-}
-
-static void xtensa_overflow12(CPUXtensaState *env)
-{
-put_user_ual(env->regs[0],  env->regs[13] - 16);
-get_user_ual(env->regs[0],  env->regs[1]  - 12);
-put_user_ual(env->regs[1],  env->regs[13] - 12);
-put_user_ual(env->regs[2],  env->regs[13] -  8);
-put_user_ual(env->regs[3],  env->regs[13] -  4);
-put_user_ual(env->regs[4],  env->regs[0]  - 48);
-put_user_ual(env->regs[5],  env->regs[0]  - 44);
-put_user_ual(env->regs[6],  env->regs[0]  - 40);
-put_user_ual(env->regs[7],  env->regs[0]  - 36);
-put_user_ual(env->regs[8],  env->regs[0]  - 32);
-put_user_ual(env->regs[9],  env->regs[0]  - 28);
-put_user_ual(env->regs[10], env->regs[0]  - 24);
-put_user_ual(env->regs[11], env->regs[0]  - 20);
-xtensa_rfwo(env);
-}
-
-static void xtensa_underflow12(CPUXtensaState *env)
-{
-get_user_ual(env->regs[0],  env->regs[13] - 16);
-get_user_ual(env->regs[1],  env->regs[13] - 12);
-get_user_ual(env->regs[2],  env->regs[13] -  8);
-get_user_ual(env->regs[11], env->regs[1]  - 12);
-get_user_ual(env->regs[3],  env->regs[13] -  4);
-get_user_ual(env->regs[4],  env->regs[11] - 48);
-get_user_ual(env->regs[5],  env->regs[11] - 44);
-get_user_ual(env->regs[6],  env->regs[11] - 40);
-get_user_ual(env->regs[7],  env->regs[11] - 36);
-get_user_ual(env->regs[8],  env->regs[11] - 32);
-get_user_ual(env->regs[9],  env->regs[11] - 28);
-get_user_ual(env->regs[10], env->regs[11] - 24);
-get_user_ual(env->regs[11], env->regs[11] - 20);
-xtensa_rfwu(env);
-}
-
-void cpu_loop(CPUXtensaState *env)
-{
-CPUState *cs = CPU(xtensa_env_get_cpu(env));
-target_siginfo_t info;
-abi_ulong ret;
-int trapnr;
-
-while (1) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-env->sregs[PS] &= ~PS_EXCM;
-switch (trapnr) {
-case EXCP_INTERRUPT:
-break;
-
-case EXC_WINDOW_OVERFLOW4:
-xtensa_overflow4(env);
-break;
-case EXC_WINDOW_UNDERFLOW4:
-xtensa_underflow4(env);
-break;
-  

[Qemu-devel] [PATCH for 2.13 v2 06/19] linux-user: move ppc/ppc64 cpu loop to ppc directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
ppc/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/main.c | 560 +-
 linux-user/ppc/cpu_loop.c | 553 +
 2 files changed, 554 insertions(+), 559 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 4816ec54bb..2340320818 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,547 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_PPC
-static inline uint64_t cpu_ppc_get_tb(CPUPPCState *env)
-{
-return cpu_get_host_ticks();
-}
-
-uint64_t cpu_ppc_load_tbl(CPUPPCState *env)
-{
-return cpu_ppc_get_tb(env);
-}
-
-uint32_t cpu_ppc_load_tbu(CPUPPCState *env)
-{
-return cpu_ppc_get_tb(env) >> 32;
-}
-
-uint64_t cpu_ppc_load_atbl(CPUPPCState *env)
-{
-return cpu_ppc_get_tb(env);
-}
-
-uint32_t cpu_ppc_load_atbu(CPUPPCState *env)
-{
-return cpu_ppc_get_tb(env) >> 32;
-}
-
-uint32_t cpu_ppc601_load_rtcu(CPUPPCState *env)
-__attribute__ (( alias ("cpu_ppc_load_tbu") ));
-
-uint32_t cpu_ppc601_load_rtcl(CPUPPCState *env)
-{
-return cpu_ppc_load_tbl(env) & 0x3F80;
-}
-
-/* XXX: to be fixed */
-int ppc_dcr_read (ppc_dcr_t *dcr_env, int dcrn, uint32_t *valp)
-{
-return -1;
-}
-
-int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val)
-{
-return -1;
-}
-
-static int do_store_exclusive(CPUPPCState *env)
-{
-target_ulong addr;
-target_ulong page_addr;
-target_ulong val, val2 __attribute__((unused)) = 0;
-int flags;
-int segv = 0;
-
-addr = env->reserve_ea;
-page_addr = addr & TARGET_PAGE_MASK;
-start_exclusive();
-mmap_lock();
-flags = page_get_flags(page_addr);
-if ((flags & PAGE_READ) == 0) {
-segv = 1;
-} else {
-int reg = env->reserve_info & 0x1f;
-int size = env->reserve_info >> 5;
-int stored = 0;
-
-if (addr == env->reserve_addr) {
-switch (size) {
-case 1: segv = get_user_u8(val, addr); break;
-case 2: segv = get_user_u16(val, addr); break;
-case 4: segv = get_user_u32(val, addr); break;
-#if defined(TARGET_PPC64)
-case 8: segv = get_user_u64(val, addr); break;
-case 16: {
-segv = get_user_u64(val, addr);
-if (!segv) {
-segv = get_user_u64(val2, addr + 8);
-}
-break;
-}
-#endif
-default: abort();
-}
-if (!segv && val == env->reserve_val) {
-val = env->gpr[reg];
-switch (size) {
-case 1: segv = put_user_u8(val, addr); break;
-case 2: segv = put_user_u16(val, addr); break;
-case 4: segv = put_user_u32(val, addr); break;
-#if defined(TARGET_PPC64)
-case 8: segv = put_user_u64(val, addr); break;
-case 16: {
-if (val2 == env->reserve_val2) {
-if (msr_le) {
-val2 = val;
-val = env->gpr[reg+1];
-} else {
-val2 = env->gpr[reg+1];
-}
-segv = put_user_u64(val, addr);
-if (!segv) {
-segv = put_user_u64(val2, addr + 8);
-}
-}
-break;
-}
-#endif
-default: abort();
-}
-if (!segv) {
-stored = 1;
-}
-}
-}
-env->crf[0] = (stored << 1) | xer_so;
-env->reserve_addr = (target_ulong)-1;
-}
-if (!segv) {
-env->nip += 4;
-}
-mmap_unlock();
-end_exclusive();
-return segv;
-}
-
-void cpu_loop(CPUPPCState *env)
-{
-CPUState *cs = CPU(ppc_env_get_cpu(env));
-target_siginfo_t info;
-int trapnr;
-target_ulong ret;
-
-for(;;) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-switch(trapnr) {
-case POWERPC_EXCP_NONE:
-/* Just go on */
-break;
-case POWERPC_EXCP_CRITICAL: /* Critical input*/
-cpu_abort(cs, "Critical interrupt while in user mode. "
-  "Aborting\n");
-break;
-case POWERPC_EXCP_MCHECK:   /* Machine check exception   */
-cpu_abort(cs, "Machine check exception while in user mode. "
-  "Aborting\n");
-break;
-case POWERPC_EXCP_DSI:  /* Data storage exception*/
-/* XXX: check this. Seems bugged */
-switch (env->error_code & 

[Qemu-devel] [PATCH for 2.13 v2 13/19] linux-user: move m68k cpu loop to m68k directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
m68k/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/m68k/cpu_loop.c | 144 +++
 linux-user/main.c  | 150 +
 2 files changed, 145 insertions(+), 149 deletions(-)

diff --git a/linux-user/m68k/cpu_loop.c b/linux-user/m68k/cpu_loop.c
index b7700a5561..b4d3d8af3d 100644
--- a/linux-user/m68k/cpu_loop.c
+++ b/linux-user/m68k/cpu_loop.c
@@ -21,6 +21,150 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+void cpu_loop(CPUM68KState *env)
+{
+CPUState *cs = CPU(m68k_env_get_cpu(env));
+int trapnr;
+unsigned int n;
+target_siginfo_t info;
+TaskState *ts = cs->opaque;
+
+for(;;) {
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch(trapnr) {
+case EXCP_ILLEGAL:
+{
+if (ts->sim_syscalls) {
+uint16_t nr;
+get_user_u16(nr, env->pc + 2);
+env->pc += 4;
+do_m68k_simcall(env, nr);
+} else {
+goto do_sigill;
+}
+}
+break;
+case EXCP_HALT_INSN:
+/* Semihosing syscall.  */
+env->pc += 4;
+do_m68k_semihosting(env, env->dregs[0]);
+break;
+case EXCP_LINEA:
+case EXCP_LINEF:
+case EXCP_UNSUPPORTED:
+do_sigill:
+info.si_signo = TARGET_SIGILL;
+info.si_errno = 0;
+info.si_code = TARGET_ILL_ILLOPN;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_CHK:
+info.si_signo = TARGET_SIGFPE;
+info.si_errno = 0;
+info.si_code = TARGET_FPE_INTOVF;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_DIV0:
+info.si_signo = TARGET_SIGFPE;
+info.si_errno = 0;
+info.si_code = TARGET_FPE_INTDIV;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_TRAP0:
+{
+abi_long ret;
+ts->sim_syscalls = 0;
+n = env->dregs[0];
+env->pc += 2;
+ret = do_syscall(env,
+ n,
+ env->dregs[1],
+ env->dregs[2],
+ env->dregs[3],
+ env->dregs[4],
+ env->dregs[5],
+ env->aregs[0],
+ 0, 0);
+if (ret == -TARGET_ERESTARTSYS) {
+env->pc -= 2;
+} else if (ret != -TARGET_QEMU_ESIGRETURN) {
+env->dregs[0] = ret;
+}
+}
+break;
+case EXCP_INTERRUPT:
+/* just indicate that signals should be handled asap */
+break;
+case EXCP_ACCESS:
+{
+info.si_signo = TARGET_SIGSEGV;
+info.si_errno = 0;
+/* XXX: check env->error_code */
+info.si_code = TARGET_SEGV_MAPERR;
+info._sifields._sigfault._addr = env->mmu.ar;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+}
+break;
+case EXCP_DEBUG:
+{
+int sig;
+
+sig = gdb_handlesig(cs, TARGET_SIGTRAP);
+if (sig)
+  {
+info.si_signo = sig;
+info.si_errno = 0;
+info.si_code = TARGET_TRAP_BRKPT;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+  }
+}
+break;
+case EXCP_ATOMIC:
+cpu_exec_step_atomic(cs);
+break;
+default:
+EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", 
trapnr);
+abort();
+}
+process_pending_signals(env);
+}
+}
+
 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
 {
+CPUState *cpu = ENV_GET_CPU(env);
+TaskState *ts = cpu->opaque;
+struct image_info *info = ts->info;
+
+env->pc = regs->pc;
+env->dregs[0] = regs->d0;
+env->dregs[1] = regs->d1;
+env->dregs[2] = regs->d2;
+env->dregs[3] = regs->d3;
+env->dregs[4] = regs->d4;
+env->dregs[5] = regs->d5;
+env->dregs[6] = regs->d6;
+env->dregs[7] = regs->d7;

[Qemu-devel] [PATCH for 2.13 v2 18/19] linux-user: move hppa cpu loop to hppa directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
hppa/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/hppa/cpu_loop.c | 185 ++
 linux-user/main.c  | 194 +
 2 files changed, 186 insertions(+), 193 deletions(-)

diff --git a/linux-user/hppa/cpu_loop.c b/linux-user/hppa/cpu_loop.c
index b7700a5561..0301c766c6 100644
--- a/linux-user/hppa/cpu_loop.c
+++ b/linux-user/hppa/cpu_loop.c
@@ -21,6 +21,191 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+static abi_ulong hppa_lws(CPUHPPAState *env)
+{
+uint32_t which = env->gr[20];
+abi_ulong addr = env->gr[26];
+abi_ulong old = env->gr[25];
+abi_ulong new = env->gr[24];
+abi_ulong size, ret;
+
+switch (which) {
+default:
+return -TARGET_ENOSYS;
+
+case 0: /* elf32 atomic 32bit cmpxchg */
+if ((addr & 3) || !access_ok(VERIFY_WRITE, addr, 4)) {
+return -TARGET_EFAULT;
+}
+old = tswap32(old);
+new = tswap32(new);
+ret = atomic_cmpxchg((uint32_t *)g2h(addr), old, new);
+ret = tswap32(ret);
+break;
+
+case 2: /* elf32 atomic "new" cmpxchg */
+size = env->gr[23];
+if (size >= 4) {
+return -TARGET_ENOSYS;
+}
+if (((addr | old | new) & ((1 << size) - 1))
+|| !access_ok(VERIFY_WRITE, addr, 1 << size)
+|| !access_ok(VERIFY_READ, old, 1 << size)
+|| !access_ok(VERIFY_READ, new, 1 << size)) {
+return -TARGET_EFAULT;
+}
+/* Note that below we use host-endian loads so that the cmpxchg
+   can be host-endian as well.  */
+switch (size) {
+case 0:
+old = *(uint8_t *)g2h(old);
+new = *(uint8_t *)g2h(new);
+ret = atomic_cmpxchg((uint8_t *)g2h(addr), old, new);
+ret = ret != old;
+break;
+case 1:
+old = *(uint16_t *)g2h(old);
+new = *(uint16_t *)g2h(new);
+ret = atomic_cmpxchg((uint16_t *)g2h(addr), old, new);
+ret = ret != old;
+break;
+case 2:
+old = *(uint32_t *)g2h(old);
+new = *(uint32_t *)g2h(new);
+ret = atomic_cmpxchg((uint32_t *)g2h(addr), old, new);
+ret = ret != old;
+break;
+case 3:
+{
+uint64_t o64, n64, r64;
+o64 = *(uint64_t *)g2h(old);
+n64 = *(uint64_t *)g2h(new);
+#ifdef CONFIG_ATOMIC64
+r64 = atomic_cmpxchg__nocheck((uint64_t *)g2h(addr), o64, n64);
+ret = r64 != o64;
+#else
+start_exclusive();
+r64 = *(uint64_t *)g2h(addr);
+ret = 1;
+if (r64 == o64) {
+*(uint64_t *)g2h(addr) = n64;
+ret = 0;
+}
+end_exclusive();
+#endif
+}
+break;
+}
+break;
+}
+
+env->gr[28] = ret;
+return 0;
+}
+
+void cpu_loop(CPUHPPAState *env)
+{
+CPUState *cs = CPU(hppa_env_get_cpu(env));
+target_siginfo_t info;
+abi_ulong ret;
+int trapnr;
+
+while (1) {
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch (trapnr) {
+case EXCP_SYSCALL:
+ret = do_syscall(env, env->gr[20],
+ env->gr[26], env->gr[25],
+ env->gr[24], env->gr[23],
+ env->gr[22], env->gr[21], 0, 0);
+switch (ret) {
+default:
+env->gr[28] = ret;
+/* We arrived here by faking the gateway page.  Return.  */
+env->iaoq_f = env->gr[31];
+env->iaoq_b = env->gr[31] + 4;
+break;
+case -TARGET_ERESTARTSYS:
+case -TARGET_QEMU_ESIGRETURN:
+break;
+}
+break;
+case EXCP_SYSCALL_LWS:
+env->gr[21] = hppa_lws(env);
+/* We arrived here by faking the gateway page.  Return.  */
+env->iaoq_f = env->gr[31];
+env->iaoq_b = env->gr[31] + 4;
+break;
+case EXCP_ITLB_MISS:
+case EXCP_DTLB_MISS:
+case EXCP_NA_ITLB_MISS:
+case EXCP_NA_DTLB_MISS:
+case EXCP_IMP:
+case EXCP_DMP:
+case EXCP_DMB:
+case EXCP_PAGE_REF:
+case EXCP_DMAR:
+case EXCP_DMPI:
+info.si_signo = TARGET_SIGSEGV;
+info.si_errno = 0;
+info.si_code = TARGET_SEGV_ACCERR;
+info._sifields._sigfault._addr = env->cr[CR_IOR];
+queue_signal(env, info.si_signo, 

[Qemu-devel] [PATCH for 2.13 v2 03/19] linux-user: move aarch64 cpu loop to aarch64 directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
aarch64/cpu_loop.c and duplicate some macro
defined for both arm and aarch64.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/aarch64/cpu_loop.c | 156 ++
 linux-user/main.c | 109 +
 2 files changed, 158 insertions(+), 107 deletions(-)

diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
index b7700a5561..c97a646546 100644
--- a/linux-user/aarch64/cpu_loop.c
+++ b/linux-user/aarch64/cpu_loop.c
@@ -21,6 +21,162 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+#define get_user_code_u32(x, gaddr, env)\
+({ abi_long __r = get_user_u32((x), (gaddr));   \
+if (!__r && bswap_code(arm_sctlr_b(env))) { \
+(x) = bswap32(x);   \
+}   \
+__r;\
+})
+
+#define get_user_code_u16(x, gaddr, env)\
+({ abi_long __r = get_user_u16((x), (gaddr));   \
+if (!__r && bswap_code(arm_sctlr_b(env))) { \
+(x) = bswap16(x);   \
+}   \
+__r;\
+})
+
+#define get_user_data_u32(x, gaddr, env)\
+({ abi_long __r = get_user_u32((x), (gaddr));   \
+if (!__r && arm_cpu_bswap_data(env)) {  \
+(x) = bswap32(x);   \
+}   \
+__r;\
+})
+
+#define get_user_data_u16(x, gaddr, env)\
+({ abi_long __r = get_user_u16((x), (gaddr));   \
+if (!__r && arm_cpu_bswap_data(env)) {  \
+(x) = bswap16(x);   \
+}   \
+__r;\
+})
+
+#define put_user_data_u32(x, gaddr, env)\
+({ typeof(x) __x = (x); \
+if (arm_cpu_bswap_data(env)) {  \
+__x = bswap32(__x); \
+}   \
+put_user_u32(__x, (gaddr)); \
+})
+
+#define put_user_data_u16(x, gaddr, env)\
+({ typeof(x) __x = (x); \
+if (arm_cpu_bswap_data(env)) {  \
+__x = bswap16(__x); \
+}   \
+put_user_u16(__x, (gaddr)); \
+})
+
+/* AArch64 main loop */
+void cpu_loop(CPUARMState *env)
+{
+CPUState *cs = CPU(arm_env_get_cpu(env));
+int trapnr, sig;
+abi_long ret;
+target_siginfo_t info;
+
+for (;;) {
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch (trapnr) {
+case EXCP_SWI:
+ret = do_syscall(env,
+ env->xregs[8],
+ env->xregs[0],
+ env->xregs[1],
+ env->xregs[2],
+ env->xregs[3],
+ env->xregs[4],
+ env->xregs[5],
+ 0, 0);
+if (ret == -TARGET_ERESTARTSYS) {
+env->pc -= 4;
+} else if (ret != -TARGET_QEMU_ESIGRETURN) {
+env->xregs[0] = ret;
+}
+break;
+case EXCP_INTERRUPT:
+/* just indicate that signals should be handled asap */
+break;
+case EXCP_UDEF:
+info.si_signo = TARGET_SIGILL;
+info.si_errno = 0;
+info.si_code = TARGET_ILL_ILLOPN;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_PREFETCH_ABORT:
+case EXCP_DATA_ABORT:
+info.si_signo = TARGET_SIGSEGV;
+info.si_errno = 0;
+/* XXX: check env->error_code */
+info.si_code = TARGET_SEGV_MAPERR;
+info._sifields._sigfault._addr = env->exception.vaddress;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP_DEBUG:
+case EXCP_BKPT:
+sig = gdb_handlesig(cs, TARGET_SIGTRAP);
+if (sig) {
+info.si_signo = sig;
+info.si_errno = 0;
+info.si_code = TARGET_TRAP_BRKPT;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+}
+

Re: [Qemu-devel] [RFC PATCH] migration: discard RAMBlocks of type ram_device

2018-04-11 Thread Kirti Wankhede


On 4/11/2018 11:25 PM, Alex Williamson wrote:
> [cc +folks working on vfio-mdev migration]
> 
> On Wed, 11 Apr 2018 19:20:14 +0200
> Cédric Le Goater  wrote:
> 
>> Here is some context for this strange change request.
>>
>> On the POWER9 processor, the XIVE interrupt controller can control
>> interrupt sources using MMIO to trigger events, to EOI or to turn off
>> the sources. Priority management and interrupt acknowledgment is also
>> controlled by MMIO in the presenter subengine.
>>
>> These MMIO regions are exposed to guests in QEMU with a set of 'ram
>> device' memory mappings, similarly to VFIO, and the VMAs are populated
>> dynamically with the appropriate pages using a fault handler.
>>
>> But, these regions are an issue for migration. We need to discard the
>> associated RAMBlocks from the RAM state on the source VM and let the
>> destination VM rebuild the memory mappings on the new host in the
>> post_load() operation just before resuming the system.
>>
>> This is the goal of the following proposal. Does it make sense ? It
>> seems to be working enough to migrate a running guest but there might
>> be a better, more subtle, approach.
> 
> Yulei, is this something you've run into with GVT-g migration?  I don't
> see how we can read from or write to ram_device regions in a useful way
> during migration anyway, so the change initially looks correct to me.
> Thanks,
> 

I ran into this problem with vGPU migration. I have very similar patch
in my local branch to test vGPU migration. This patch looks good to me.

Thanks,
Kirti


> Alex
> 
>> Signed-off-by: Cédric Le Goater 
>> ---
>>  migration/ram.c | 42 --
>>  1 file changed, 40 insertions(+), 2 deletions(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 0e90efa09236..6404ccd046d8 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -780,6 +780,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, 
>> RAMBlock *rb,
>>  unsigned long *bitmap = rb->bmap;
>>  unsigned long next;
>>  
>> +if (memory_region_is_ram_device(rb->mr)) {
>> +return size;
>> +}
>> +
>>  if (rs->ram_bulk_stage && start > 0) {
>>  next = start + 1;
>>  } else {
>> @@ -826,6 +830,9 @@ uint64_t ram_pagesize_summary(void)
>>  uint64_t summary = 0;
>>  
>>  RAMBLOCK_FOREACH(block) {
>> +if (memory_region_is_ram_device(block->mr)) {
>> +continue;
>> +}
>>  summary |= block->page_size;
>>  }
>>  
>> @@ -850,6 +857,9 @@ static void migration_bitmap_sync(RAMState *rs)
>>  qemu_mutex_lock(>bitmap_mutex);
>>  rcu_read_lock();
>>  RAMBLOCK_FOREACH(block) {
>> +if (memory_region_is_ram_device(block->mr)) {
>> +continue;
>> +}
>>  migration_bitmap_sync_range(rs, block, 0, block->used_length);
>>  }
>>  rcu_read_unlock();
>> @@ -1499,6 +1509,10 @@ static int ram_save_host_page(RAMState *rs, 
>> PageSearchStatus *pss,
>>  size_t pagesize_bits =
>>  qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
>>  
>> +if (memory_region_is_ram_device(pss->block->mr)) {
>> +return 0;
>> +}
>> +
>>  do {
>>  tmppages = ram_save_target_page(rs, pss, last_stage);
>>  if (tmppages < 0) {
>> @@ -1588,6 +1602,9 @@ uint64_t ram_bytes_total(void)
>>  
>>  rcu_read_lock();
>>  RAMBLOCK_FOREACH(block) {
>> +if (memory_region_is_ram_device(block->mr)) {
>> +continue;
>> +}
>>  total += block->used_length;
>>  }
>>  rcu_read_unlock();
>> @@ -1643,6 +1660,9 @@ static void ram_save_cleanup(void *opaque)
>>  memory_global_dirty_log_stop();
>>  
>>  QLIST_FOREACH_RCU(block, _list.blocks, next) {
>> +if (memory_region_is_ram_device(block->mr)) {
>> +continue;
>> +}
>>  g_free(block->bmap);
>>  block->bmap = NULL;
>>  g_free(block->unsentmap);
>> @@ -1710,6 +1730,9 @@ void 
>> ram_postcopy_migrated_memory_release(MigrationState *ms)
>>  unsigned long range = block->used_length >> TARGET_PAGE_BITS;
>>  unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
>>  
>> +if (memory_region_is_ram_device(block->mr)) {
>> +continue;
>> +}
>>  while (run_start < range) {
>>  unsigned long run_end = find_next_bit(bitmap, range, run_start 
>> + 1);
>>  ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
>> @@ -1784,8 +1807,13 @@ static int 
>> postcopy_each_ram_send_discard(MigrationState *ms)
>>  int ret;
>>  
>>  RAMBLOCK_FOREACH(block) {
>> -PostcopyDiscardState *pds =
>> -postcopy_discard_send_init(ms, block->idstr);
>> +PostcopyDiscardState *pds;
>> +
>> +if (memory_region_is_ram_device(block->mr)) {
>> +continue;
>> +}
>> +
>> +pds = postcopy_discard_send_init(ms, 

[Qemu-devel] [PATCH v4 09/13] block/dirty-bitmap: Add bdrv_dirty_iter_next_area

2018-04-11 Thread Max Reitz
This new function allows to look for a consecutively dirty area in a
dirty bitmap.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
Reviewed-by: John Snow 
---
 include/block/dirty-bitmap.h |  2 ++
 block/dirty-bitmap.c | 55 
 2 files changed, 57 insertions(+)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 1ff8949b1b..0d52cdaf3b 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -82,6 +82,8 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
 void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
 int64_t offset, int64_t bytes);
 int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
+bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset,
+   uint64_t *offset, int *bytes);
 void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset);
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
 int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index df7b711610..8758edb261 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -549,6 +549,61 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
 return hbitmap_iter_next(>hbi, true);
 }
 
+/**
+ * Return the next consecutively dirty area in the dirty bitmap
+ * belonging to the given iterator @iter.
+ *
+ * @max_offset: Maximum value that may be returned for
+ *  *offset + *bytes
+ * @offset: Will contain the start offset of the next dirty area
+ * @bytes:  Will contain the length of the next dirty area
+ *
+ * Returns: True if a dirty area could be found before max_offset
+ *  (which means that *offset and *bytes then contain valid
+ *  values), false otherwise.
+ *
+ * Note that @iter is never advanced if false is returned.  If an area
+ * is found (which means that true is returned), it will be advanced
+ * past that area.
+ */
+bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset,
+   uint64_t *offset, int *bytes)
+{
+uint32_t granularity = bdrv_dirty_bitmap_granularity(iter->bitmap);
+uint64_t gran_max_offset;
+int64_t ret;
+int size;
+
+if (max_offset == iter->bitmap->size) {
+/* If max_offset points to the image end, round it up by the
+ * bitmap granularity */
+gran_max_offset = ROUND_UP(max_offset, granularity);
+} else {
+gran_max_offset = max_offset;
+}
+
+ret = hbitmap_iter_next(>hbi, false);
+if (ret < 0 || ret + granularity > gran_max_offset) {
+return false;
+}
+
+*offset = ret;
+size = 0;
+
+assert(granularity <= INT_MAX);
+
+do {
+/* Advance iterator */
+ret = hbitmap_iter_next(>hbi, true);
+size += granularity;
+} while (ret + granularity <= gran_max_offset &&
+ hbitmap_iter_next(>hbi, false) == ret + granularity &&
+ size <= INT_MAX - granularity);
+
+*bytes = MIN(size, max_offset - *offset);
+return true;
+}
+
 /* Called within bdrv_dirty_bitmap_lock..unlock */
 void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
   int64_t offset, int64_t bytes)
-- 
2.14.3




[Qemu-devel] [PATCH for 2.13 v2 10/19] linux-user: move sh4 cpu loop to sh4 directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
sh4/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---

Notes:
v2: expand tabs

 linux-user/main.c | 90 ---
 linux-user/sh4/cpu_loop.c | 85 
 2 files changed, 85 insertions(+), 90 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index af26a17c46..d7fee3e3db 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,87 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_SH4
-void cpu_loop(CPUSH4State *env)
-{
-CPUState *cs = CPU(sh_env_get_cpu(env));
-int trapnr, ret;
-target_siginfo_t info;
-
-while (1) {
-bool arch_interrupt = true;
-
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-switch (trapnr) {
-case 0x160:
-env->pc += 2;
-ret = do_syscall(env,
- env->gregs[3],
- env->gregs[4],
- env->gregs[5],
- env->gregs[6],
- env->gregs[7],
- env->gregs[0],
- env->gregs[1],
- 0, 0);
-if (ret == -TARGET_ERESTARTSYS) {
-env->pc -= 2;
-} else if (ret != -TARGET_QEMU_ESIGRETURN) {
-env->gregs[0] = ret;
-}
-break;
-case EXCP_INTERRUPT:
-/* just indicate that signals should be handled asap */
-break;
-case EXCP_DEBUG:
-{
-int sig;
-
-sig = gdb_handlesig(cs, TARGET_SIGTRAP);
-if (sig) {
-info.si_signo = sig;
-info.si_errno = 0;
-info.si_code = TARGET_TRAP_BRKPT;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-} else {
-arch_interrupt = false;
-}
-}
-break;
-   case 0xa0:
-   case 0xc0:
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-info.si_code = TARGET_SEGV_MAPERR;
-info._sifields._sigfault._addr = env->tea;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-   break;
-case EXCP_ATOMIC:
-cpu_exec_step_atomic(cs);
-arch_interrupt = false;
-break;
-default:
-printf ("Unhandled trap: 0x%x\n", trapnr);
-cpu_dump_state(cs, stderr, fprintf, 0);
-exit(EXIT_FAILURE);
-}
-process_pending_signals (env);
-
-/* Most of the traps imply an exception or interrupt, which
-   implies an REI instruction has been executed.  Which means
-   that LDST (aka LOK_ADDR) should be cleared.  But there are
-   a few exceptions for traps internal to QEMU.  */
-if (arch_interrupt) {
-env->lock_addr = -1;
-}
-}
-}
-#endif
-
 #ifdef TARGET_CRIS
 void cpu_loop(CPUCRISState *env)
 {
@@ -2361,15 +2280,6 @@ int main(int argc, char **argv, char **envp)
 env->pc = regs->sepc;
 env->gpr[xSP] = regs->sp;
 }
-#elif defined(TARGET_SH4)
-{
-int i;
-
-for(i = 0; i < 16; i++) {
-env->gregs[i] = regs->regs[i];
-}
-env->pc = regs->pc;
-}
 #elif defined(TARGET_ALPHA)
 {
 int i;
diff --git a/linux-user/sh4/cpu_loop.c b/linux-user/sh4/cpu_loop.c
index b7700a5561..418833ea25 100644
--- a/linux-user/sh4/cpu_loop.c
+++ b/linux-user/sh4/cpu_loop.c
@@ -21,6 +21,91 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+void cpu_loop(CPUSH4State *env)
+{
+CPUState *cs = CPU(sh_env_get_cpu(env));
+int trapnr, ret;
+target_siginfo_t info;
+
+while (1) {
+bool arch_interrupt = true;
+
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch (trapnr) {
+case 0x160:
+env->pc += 2;
+ret = do_syscall(env,
+ env->gregs[3],
+ env->gregs[4],
+ env->gregs[5],
+ env->gregs[6],
+ env->gregs[7],
+ env->gregs[0],
+ env->gregs[1],
+ 0, 0);
+if (ret == -TARGET_ERESTARTSYS) {
+env->pc -= 2;
+} else if (ret != -TARGET_QEMU_ESIGRETURN) {
+env->gregs[0] = ret;
+}
+break;
+case EXCP_INTERRUPT:
+/* just indicate that signals should be handled asap */
+

[Qemu-devel] [PATCH for 2.13 v2 02/19] linux-user: move i386/x86_64 cpu loop to i386 directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
i386/cpu_loop.c.

Include i386/cpu_loop.c in x86_64/cpu_loop.c
to avoid to duplicate code.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/i386/cpu_loop.c   | 343 ++
 linux-user/main.c| 348 +--
 linux-user/x86_64/cpu_loop.c |   8 +-
 3 files changed, 345 insertions(+), 354 deletions(-)

diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
index b7700a5561..2374abfd0b 100644
--- a/linux-user/i386/cpu_loop.c
+++ b/linux-user/i386/cpu_loop.c
@@ -21,6 +21,349 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+/***/
+/* CPUX86 core interface */
+
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+return cpu_get_host_ticks();
+}
+
+static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
+  int flags)
+{
+unsigned int e1, e2;
+uint32_t *p;
+e1 = (addr << 16) | (limit & 0x);
+e2 = ((addr >> 16) & 0xff) | (addr & 0xff00) | (limit & 0x000f);
+e2 |= flags;
+p = ptr;
+p[0] = tswap32(e1);
+p[1] = tswap32(e2);
+}
+
+static uint64_t *idt_table;
+#ifdef TARGET_X86_64
+static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
+   uint64_t addr, unsigned int sel)
+{
+uint32_t *p, e1, e2;
+e1 = (addr & 0x) | (sel << 16);
+e2 = (addr & 0x) | 0x8000 | (dpl << 13) | (type << 8);
+p = ptr;
+p[0] = tswap32(e1);
+p[1] = tswap32(e2);
+p[2] = tswap32(addr >> 32);
+p[3] = 0;
+}
+/* only dpl matters as we do only user space emulation */
+static void set_idt(int n, unsigned int dpl)
+{
+set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
+}
+#else
+static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
+ uint32_t addr, unsigned int sel)
+{
+uint32_t *p, e1, e2;
+e1 = (addr & 0x) | (sel << 16);
+e2 = (addr & 0x) | 0x8000 | (dpl << 13) | (type << 8);
+p = ptr;
+p[0] = tswap32(e1);
+p[1] = tswap32(e2);
+}
+
+/* only dpl matters as we do only user space emulation */
+static void set_idt(int n, unsigned int dpl)
+{
+set_gate(idt_table + n, 0, dpl, 0, 0);
+}
+#endif
+
+void cpu_loop(CPUX86State *env)
+{
+CPUState *cs = CPU(x86_env_get_cpu(env));
+int trapnr;
+abi_ulong pc;
+abi_ulong ret;
+target_siginfo_t info;
+
+for(;;) {
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch(trapnr) {
+case 0x80:
+/* linux syscall from int $0x80 */
+ret = do_syscall(env,
+ env->regs[R_EAX],
+ env->regs[R_EBX],
+ env->regs[R_ECX],
+ env->regs[R_EDX],
+ env->regs[R_ESI],
+ env->regs[R_EDI],
+ env->regs[R_EBP],
+ 0, 0);
+if (ret == -TARGET_ERESTARTSYS) {
+env->eip -= 2;
+} else if (ret != -TARGET_QEMU_ESIGRETURN) {
+env->regs[R_EAX] = ret;
+}
+break;
+#ifndef TARGET_ABI32
+case EXCP_SYSCALL:
+/* linux syscall from syscall instruction */
+ret = do_syscall(env,
+ env->regs[R_EAX],
+ env->regs[R_EDI],
+ env->regs[R_ESI],
+ env->regs[R_EDX],
+ env->regs[10],
+ env->regs[8],
+ env->regs[9],
+ 0, 0);
+if (ret == -TARGET_ERESTARTSYS) {
+env->eip -= 2;
+} else if (ret != -TARGET_QEMU_ESIGRETURN) {
+env->regs[R_EAX] = ret;
+}
+break;
+#endif
+case EXCP0B_NOSEG:
+case EXCP0C_STACK:
+info.si_signo = TARGET_SIGBUS;
+info.si_errno = 0;
+info.si_code = TARGET_SI_KERNEL;
+info._sifields._sigfault._addr = 0;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+break;
+case EXCP0D_GPF:
+/* XXX: potential problem if ABI32 */
+#ifndef TARGET_X86_64
+if (env->eflags & VM_MASK) {
+handle_vm86_fault(env);
+} else
+#endif
+{
+info.si_signo = TARGET_SIGSEGV;
+info.si_errno = 0;
+info.si_code = TARGET_SI_KERNEL;
+info._sifields._sigfault._addr = 0;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
+}
+break;
+case EXCP0E_PAGE:
+

[Qemu-devel] [PATCH v4 05/13] block/mirror: Use source as a BdrvChild

2018-04-11 Thread Max Reitz
With this, the mirror_top_bs is no longer just a technically required
node in the BDS graph but actually represents the block job operation.

Also, drop MirrorBlockJob.source, as we can reach it through
mirror_top_bs->backing.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
---
 block/mirror.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 964ffbe682..40c7c55f07 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -43,7 +43,6 @@ typedef struct MirrorBlockJob {
 RateLimit limit;
 BlockBackend *target;
 BlockDriverState *mirror_top_bs;
-BlockDriverState *source;
 BlockDriverState *base;
 
 /* The name of the graph node to replace */
@@ -301,7 +300,6 @@ static void coroutine_fn mirror_co_read(void *opaque)
 {
 MirrorOp *op = opaque;
 MirrorBlockJob *s = op->s;
-BlockBackend *source = s->common.blk;
 int nb_chunks;
 uint64_t ret;
 uint64_t max_bytes;
@@ -351,7 +349,8 @@ static void coroutine_fn mirror_co_read(void *opaque)
 s->bytes_in_flight += op->bytes;
 trace_mirror_one_iteration(s, op->offset, op->bytes);
 
-ret = blk_co_preadv(source, op->offset, op->bytes, >qiov, 0);
+ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
+ >qiov, 0);
 mirror_read_complete(op, ret);
 }
 
@@ -429,7 +428,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t 
offset,
 
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
-BlockDriverState *source = s->source;
+BlockDriverState *source = s->mirror_top_bs->backing->bs;
 MirrorOp *pseudo_op;
 int64_t offset;
 uint64_t delay_ns = 0, ret = 0;
@@ -604,7 +603,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
 MirrorExitData *data = opaque;
 AioContext *replace_aio_context = NULL;
-BlockDriverState *src = s->source;
+BlockDriverState *src = s->mirror_top_bs->backing->bs;
 BlockDriverState *target_bs = blk_bs(s->target);
 BlockDriverState *mirror_top_bs = s->mirror_top_bs;
 Error *local_err = NULL;
@@ -719,7 +718,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
 int64_t offset;
 BlockDriverState *base = s->base;
-BlockDriverState *bs = s->source;
+BlockDriverState *bs = s->mirror_top_bs->backing->bs;
 BlockDriverState *target_bs = blk_bs(s->target);
 int ret;
 int64_t count;
@@ -801,7 +800,7 @@ static void coroutine_fn mirror_run(void *opaque)
 {
 MirrorBlockJob *s = opaque;
 MirrorExitData *data;
-BlockDriverState *bs = s->source;
+BlockDriverState *bs = s->mirror_top_bs->backing->bs;
 BlockDriverState *target_bs = blk_bs(s->target);
 bool need_drain = true;
 int64_t length;
@@ -1286,7 +1285,6 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 /* The block job now has a reference to this node */
 bdrv_unref(mirror_top_bs);
 
-s->source = bs;
 s->mirror_top_bs = mirror_top_bs;
 
 /* No resize for the target either; while the mirror is still running, a
-- 
2.14.3




[Qemu-devel] [PATCH for 2.13 v2 09/19] linux-user: move openrisc cpu loop to openrisc directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
openrisc/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/main.c  | 96 --
 linux-user/openrisc/cpu_loop.c | 89 +++
 2 files changed, 89 insertions(+), 96 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 88f807549f..af26a17c46 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,92 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_OPENRISC
-
-void cpu_loop(CPUOpenRISCState *env)
-{
-CPUState *cs = CPU(openrisc_env_get_cpu(env));
-int trapnr;
-abi_long ret;
-target_siginfo_t info;
-
-for (;;) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-switch (trapnr) {
-case EXCP_SYSCALL:
-env->pc += 4;   /* 0xc00; */
-ret = do_syscall(env,
- cpu_get_gpr(env, 11), /* return value   */
- cpu_get_gpr(env, 3),  /* r3 - r7 are params */
- cpu_get_gpr(env, 4),
- cpu_get_gpr(env, 5),
- cpu_get_gpr(env, 6),
- cpu_get_gpr(env, 7),
- cpu_get_gpr(env, 8), 0, 0);
-if (ret == -TARGET_ERESTARTSYS) {
-env->pc -= 4;
-} else if (ret != -TARGET_QEMU_ESIGRETURN) {
-cpu_set_gpr(env, 11, ret);
-}
-break;
-case EXCP_DPF:
-case EXCP_IPF:
-case EXCP_RANGE:
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-info.si_code = TARGET_SEGV_MAPERR;
-info._sifields._sigfault._addr = env->pc;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-break;
-case EXCP_ALIGN:
-info.si_signo = TARGET_SIGBUS;
-info.si_errno = 0;
-info.si_code = TARGET_BUS_ADRALN;
-info._sifields._sigfault._addr = env->pc;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-break;
-case EXCP_ILLEGAL:
-info.si_signo = TARGET_SIGILL;
-info.si_errno = 0;
-info.si_code = TARGET_ILL_ILLOPC;
-info._sifields._sigfault._addr = env->pc;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-break;
-case EXCP_FPE:
-info.si_signo = TARGET_SIGFPE;
-info.si_errno = 0;
-info.si_code = 0;
-info._sifields._sigfault._addr = env->pc;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-break;
-case EXCP_INTERRUPT:
-/* We processed the pending cpu work above.  */
-break;
-case EXCP_DEBUG:
-trapnr = gdb_handlesig(cs, TARGET_SIGTRAP);
-if (trapnr) {
-info.si_signo = trapnr;
-info.si_errno = 0;
-info.si_code = TARGET_TRAP_BRKPT;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-}
-break;
-case EXCP_ATOMIC:
-cpu_exec_step_atomic(cs);
-break;
-default:
-g_assert_not_reached();
-}
-process_pending_signals(env);
-}
-}
-
-#endif /* TARGET_OPENRISC */
-
 #ifdef TARGET_SH4
 void cpu_loop(CPUSH4State *env)
 {
@@ -2442,16 +2356,6 @@ int main(int argc, char **argv, char **envp)
 env->regs[31] = regs->r31; 
 env->sregs[SR_PC] = regs->pc;
 }
-#elif defined(TARGET_OPENRISC)
-{
-int i;
-
-for (i = 0; i < 32; i++) {
-cpu_set_gpr(env, i, regs->gpr[i]);
-}
-env->pc = regs->pc;
-cpu_set_sr(env, regs->sr);
-}
 #elif defined(TARGET_RISCV)
 {
 env->pc = regs->sepc;
diff --git a/linux-user/openrisc/cpu_loop.c b/linux-user/openrisc/cpu_loop.c
index b7700a5561..6c6ea871e1 100644
--- a/linux-user/openrisc/cpu_loop.c
+++ b/linux-user/openrisc/cpu_loop.c
@@ -21,6 +21,95 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 
+void cpu_loop(CPUOpenRISCState *env)
+{
+CPUState *cs = CPU(openrisc_env_get_cpu(env));
+int trapnr;
+abi_long ret;
+target_siginfo_t info;
+
+for (;;) {
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch (trapnr) {
+case EXCP_SYSCALL:
+env->pc += 4;   /* 0xc00; */
+ret = do_syscall(env,
+ cpu_get_gpr(env, 11), /* return value   */
+ cpu_get_gpr(env, 3),  /* r3 - r7 are params */
+ cpu_get_gpr(env, 4),
+ 

[Qemu-devel] [PATCH v4 13/13] iotests: Add test for active mirroring

2018-04-11 Thread Max Reitz
Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
---
 tests/qemu-iotests/151 | 120 +
 tests/qemu-iotests/151.out |   5 ++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 126 insertions(+)
 create mode 100755 tests/qemu-iotests/151
 create mode 100644 tests/qemu-iotests/151.out

diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151
new file mode 100755
index 00..8d8e050f98
--- /dev/null
+++ b/tests/qemu-iotests/151
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+# Tests for active mirroring
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import iotests
+from iotests import qemu_img
+
+source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt)
+target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt)
+
+class TestActiveMirror(iotests.QMPTestCase):
+image_len = 128 * 1024 * 1024 # MB
+potential_writes_in_flight = True
+
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, source_img, '128M')
+qemu_img('create', '-f', iotests.imgfmt, target_img, '128M')
+
+blk_source = {'id': 'source',
+  'if': 'none',
+  'node-name': 'source-node',
+  'driver': iotests.imgfmt,
+  'file': {'driver': 'file',
+   'filename': source_img}}
+
+blk_target = {'node-name': 'target-node',
+  'driver': iotests.imgfmt,
+  'file': {'driver': 'file',
+   'filename': target_img}}
+
+self.vm = iotests.VM()
+self.vm.add_drive_raw(self.qmp_to_opts(blk_source))
+self.vm.add_blockdev(self.qmp_to_opts(blk_target))
+self.vm.add_device('virtio-blk,drive=source')
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+
+if not self.potential_writes_in_flight:
+self.assertTrue(iotests.compare_images(source_img, target_img),
+'mirror target does not match source')
+
+os.remove(source_img)
+os.remove(target_img)
+
+def doActiveIO(self, sync_source_and_target):
+# Fill the source image
+self.vm.hmp_qemu_io('source',
+'write -P 1 0 %i' % self.image_len);
+
+# Start some background requests
+for offset in range(1 * self.image_len / 8, 3 * self.image_len / 8, 
1024 * 1024):
+self.vm.hmp_qemu_io('source', 'aio_write -P 2 %i 1M' % offset)
+for offset in range(2 * self.image_len / 8, 3 * self.image_len / 8, 
1024 * 1024):
+self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
+
+# Start the block job
+result = self.vm.qmp('blockdev-mirror',
+ job_id='mirror',
+ filter_node_name='mirror-node',
+ device='source-node',
+ target='target-node',
+ sync='full',
+ copy_mode='write-blocking')
+self.assert_qmp(result, 'return', {})
+
+# Start some more requests
+for offset in range(3 * self.image_len / 8, 5 * self.image_len / 8, 
1024 * 1024):
+self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset)
+for offset in range(4 * self.image_len / 8, 5 * self.image_len / 8, 
1024 * 1024):
+self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
+
+# Wait for the READY event
+self.wait_ready(drive='mirror')
+
+# Now start some final requests; all of these (which land on
+# the source) should be settled using the active mechanism.
+# The mirror code itself asserts that the source BDS's dirty
+# bitmap will stay clean between READY and COMPLETED.
+for offset in range(5 * self.image_len / 8, 7 * self.image_len / 8, 
1024 * 1024):
+self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset)
+for offset in range(6 * self.image_len / 8, 7 * self.image_len / 8, 
1024 * 1024):
+self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
+
+if sync_source_and_target:
+# If source and target should be in sync 

[Qemu-devel] [PATCH for 2.13 v2 15/19] linux-user: move s390x cpu loop to s390x directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
s390x/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
Acked-by: Cornelia Huck 
---
 linux-user/main.c   | 146 
 linux-user/s390x/cpu_loop.c | 139 +
 2 files changed, 139 insertions(+), 146 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 37cf3a7d6f..7f6cfa5548 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,143 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_S390X
-
-/* s390x masks the fault address it reports in si_addr for SIGSEGV and SIGBUS 
*/
-#define S390X_FAIL_ADDR_MASK -4096LL
-
-void cpu_loop(CPUS390XState *env)
-{
-CPUState *cs = CPU(s390_env_get_cpu(env));
-int trapnr, n, sig;
-target_siginfo_t info;
-target_ulong addr;
-abi_long ret;
-
-while (1) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-switch (trapnr) {
-case EXCP_INTERRUPT:
-/* Just indicate that signals should be handled asap.  */
-break;
-
-case EXCP_SVC:
-n = env->int_svc_code;
-if (!n) {
-/* syscalls > 255 */
-n = env->regs[1];
-}
-env->psw.addr += env->int_svc_ilen;
-ret = do_syscall(env, n, env->regs[2], env->regs[3],
- env->regs[4], env->regs[5],
- env->regs[6], env->regs[7], 0, 0);
-if (ret == -TARGET_ERESTARTSYS) {
-env->psw.addr -= env->int_svc_ilen;
-} else if (ret != -TARGET_QEMU_ESIGRETURN) {
-env->regs[2] = ret;
-}
-break;
-
-case EXCP_DEBUG:
-sig = gdb_handlesig(cs, TARGET_SIGTRAP);
-if (sig) {
-n = TARGET_TRAP_BRKPT;
-goto do_signal_pc;
-}
-break;
-case EXCP_PGM:
-n = env->int_pgm_code;
-switch (n) {
-case PGM_OPERATION:
-case PGM_PRIVILEGED:
-sig = TARGET_SIGILL;
-n = TARGET_ILL_ILLOPC;
-goto do_signal_pc;
-case PGM_PROTECTION:
-case PGM_ADDRESSING:
-sig = TARGET_SIGSEGV;
-/* XXX: check env->error_code */
-n = TARGET_SEGV_MAPERR;
-addr = env->__excp_addr & S390X_FAIL_ADDR_MASK;
-goto do_signal;
-case PGM_EXECUTE:
-case PGM_SPECIFICATION:
-case PGM_SPECIAL_OP:
-case PGM_OPERAND:
-do_sigill_opn:
-sig = TARGET_SIGILL;
-n = TARGET_ILL_ILLOPN;
-goto do_signal_pc;
-
-case PGM_FIXPT_OVERFLOW:
-sig = TARGET_SIGFPE;
-n = TARGET_FPE_INTOVF;
-goto do_signal_pc;
-case PGM_FIXPT_DIVIDE:
-sig = TARGET_SIGFPE;
-n = TARGET_FPE_INTDIV;
-goto do_signal_pc;
-
-case PGM_DATA:
-n = (env->fpc >> 8) & 0xff;
-if (n == 0xff) {
-/* compare-and-trap */
-goto do_sigill_opn;
-} else {
-/* An IEEE exception, simulated or otherwise.  */
-if (n & 0x80) {
-n = TARGET_FPE_FLTINV;
-} else if (n & 0x40) {
-n = TARGET_FPE_FLTDIV;
-} else if (n & 0x20) {
-n = TARGET_FPE_FLTOVF;
-} else if (n & 0x10) {
-n = TARGET_FPE_FLTUND;
-} else if (n & 0x08) {
-n = TARGET_FPE_FLTRES;
-} else {
-/* ??? Quantum exception; BFP, DFP error.  */
-goto do_sigill_opn;
-}
-sig = TARGET_SIGFPE;
-goto do_signal_pc;
-}
-
-default:
-fprintf(stderr, "Unhandled program exception: %#x\n", n);
-cpu_dump_state(cs, stderr, fprintf, 0);
-exit(EXIT_FAILURE);
-}
-break;
-
-do_signal_pc:
-addr = env->psw.addr;
-do_signal:
-info.si_signo = sig;
-info.si_errno = 0;
-info.si_code = n;
-info._sifields._sigfault._addr = addr;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-break;
-
-case EXCP_ATOMIC:
-cpu_exec_step_atomic(cs);
-break;
-default:
-fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr);
-

[Qemu-devel] [PATCH for 2.13 v3 04/20] linux-user: move sh4 signal.c parts to sh4 directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
sh4/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---

Notes:
v3: expand tabs

 linux-user/sh4/signal.c| 332 +
 linux-user/sh4/target_signal.h |   5 +
 linux-user/signal.c| 328 
 3 files changed, 337 insertions(+), 328 deletions(-)

diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index 02ca338b6c..5ce182aff7 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -16,3 +16,335 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+/*
+ * code and data structures from linux kernel:
+ * include/asm-sh/sigcontext.h
+ * arch/sh/kernel/signal.c
+ */
+
+struct target_sigcontext {
+target_ulong  oldmask;
+
+/* CPU registers */
+target_ulong  sc_gregs[16];
+target_ulong  sc_pc;
+target_ulong  sc_pr;
+target_ulong  sc_sr;
+target_ulong  sc_gbr;
+target_ulong  sc_mach;
+target_ulong  sc_macl;
+
+/* FPU registers */
+target_ulong  sc_fpregs[16];
+target_ulong  sc_xfpregs[16];
+unsigned int sc_fpscr;
+unsigned int sc_fpul;
+unsigned int sc_ownedfp;
+};
+
+struct target_sigframe
+{
+struct target_sigcontext sc;
+target_ulong extramask[TARGET_NSIG_WORDS-1];
+uint16_t retcode[3];
+};
+
+
+struct target_ucontext {
+target_ulong tuc_flags;
+struct target_ucontext *tuc_link;
+target_stack_t tuc_stack;
+struct target_sigcontext tuc_mcontext;
+target_sigset_t tuc_sigmask;/* mask last for extensibility */
+};
+
+struct target_rt_sigframe
+{
+struct target_siginfo info;
+struct target_ucontext uc;
+uint16_t retcode[3];
+};
+
+
+#define MOVW(n)  (0x9300|((n)-2)) /* Move mem word at PC+n to R3 */
+#define TRAP_NOARG 0xc310 /* Syscall w/no args (NR in R3) SH3/4 */
+
+static abi_ulong get_sigframe(struct target_sigaction *ka,
+  unsigned long sp, size_t frame_size)
+{
+if ((ka->sa_flags & TARGET_SA_ONSTACK) && (sas_ss_flags(sp) == 0)) {
+sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+}
+
+return (sp - frame_size) & -8ul;
+}
+
+/* Notice when we're in the middle of a gUSA region and reset.
+   Note that this will only occur for !parallel_cpus, as we will
+   translate such sequences differently in a parallel context.  */
+static void unwind_gusa(CPUSH4State *regs)
+{
+/* If the stack pointer is sufficiently negative, and we haven't
+   completed the sequence, then reset to the entry to the region.  */
+/* ??? The SH4 kernel checks for and address above 0xC000.
+   However, the page mappings in qemu linux-user aren't as restricted
+   and we wind up with the normal stack mapped above 0xF000.
+   That said, there is no reason why the kernel should be allowing
+   a gUSA region that spans 1GB.  Use a tighter check here, for what
+   can actually be enabled by the immediate move.  */
+if (regs->gregs[15] >= -128u && regs->pc < regs->gregs[0]) {
+/* Reset the PC to before the gUSA region, as computed from
+   R0 = region end, SP = -(region size), plus one more for the
+   insn that actually initializes SP to the region size.  */
+regs->pc = regs->gregs[0] + regs->gregs[15] - 2;
+
+/* Reset the SP to the saved version in R1.  */
+regs->gregs[15] = regs->gregs[1];
+}
+}
+
+static void setup_sigcontext(struct target_sigcontext *sc,
+ CPUSH4State *regs, unsigned long mask)
+{
+int i;
+
+#define COPY(x) __put_user(regs->x, >sc_##x)
+COPY(gregs[0]); COPY(gregs[1]);
+COPY(gregs[2]); COPY(gregs[3]);
+COPY(gregs[4]); COPY(gregs[5]);
+COPY(gregs[6]); COPY(gregs[7]);
+COPY(gregs[8]); COPY(gregs[9]);
+COPY(gregs[10]); COPY(gregs[11]);
+COPY(gregs[12]); COPY(gregs[13]);
+COPY(gregs[14]); COPY(gregs[15]);
+COPY(gbr); COPY(mach);
+COPY(macl); COPY(pr);
+COPY(sr); COPY(pc);
+#undef COPY
+
+for (i=0; i<16; i++) {
+__put_user(regs->fregs[i], >sc_fpregs[i]);
+}
+__put_user(regs->fpscr, >sc_fpscr);
+__put_user(regs->fpul, >sc_fpul);
+
+/* non-iBCS2 extensions.. */
+__put_user(mask, >oldmask);
+}
+
+static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
+{
+int i;
+
+#define COPY(x) __get_user(regs->x, >sc_##x)
+COPY(gregs[0]); COPY(gregs[1]);
+COPY(gregs[2]); COPY(gregs[3]);
+COPY(gregs[4]); COPY(gregs[5]);
+   

[Qemu-devel] [PATCH v4 12/13] block/mirror: Add copy mode QAPI interface

2018-04-11 Thread Max Reitz
This patch allows the user to specify whether to use active or only
background mode for mirror block jobs.  Currently, this setting will
remain constant for the duration of the entire block job.

Signed-off-by: Max Reitz 
---
 qapi/block-core.json  | 11 +--
 include/block/block_int.h |  4 +++-
 block/mirror.c| 12 +++-
 blockdev.c|  9 -
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8210d601f4..1653f4ce93 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1791,6 +1791,9 @@
 # written. Both will result in identical contents.
 # Default is true. (Since 2.4)
 #
+# @copy-mode: when to copy data to the destination; defaults to 'background'
+# (Since: 2.13)
+#
 # Since: 1.3
 ##
 { 'struct': 'DriveMirror',
@@ -1800,7 +1803,7 @@
 '*speed': 'int', '*granularity': 'uint32',
 '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
 '*on-target-error': 'BlockdevOnError',
-'*unmap': 'bool' } }
+'*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } }
 
 ##
 # @BlockDirtyBitmap:
@@ -1979,6 +1982,9 @@
 #above @device. If this option is not given, a node name is
 #autogenerated. (Since: 2.9)
 #
+# @copy-mode: when to copy data to the destination; defaults to 'background'
+# (Since: 2.13)
+#
 # Returns: nothing on success.
 #
 # Since: 2.6
@@ -1999,7 +2005,8 @@
 '*speed': 'int', '*granularity': 'uint32',
 '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
 '*on-target-error': 'BlockdevOnError',
-'*filter-node-name': 'str' } }
+'*filter-node-name': 'str',
+'*copy-mode': 'MirrorCopyMode' } }
 
 ##
 # @block_set_io_throttle:
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8d63a1b0c1..9a307cc7ad 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -975,6 +975,7 @@ void commit_active_start(const char *job_id, 
BlockDriverState *bs,
  * @filter_node_name: The node name that should be assigned to the filter
  * driver that the mirror job inserts into the graph above @bs. NULL means that
  * a node name should be autogenerated.
+ * @copy_mode: When to trigger writes to the target.
  * @errp: Error object.
  *
  * Start a mirroring operation on @bs.  Clusters that are allocated
@@ -988,7 +989,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
   BlockdevOnError on_source_error,
   BlockdevOnError on_target_error,
-  bool unmap, const char *filter_node_name, Error **errp);
+  bool unmap, const char *filter_node_name,
+  MirrorCopyMode copy_mode, Error **errp);
 
 /*
  * backup_job_create:
diff --git a/block/mirror.c b/block/mirror.c
index a700862029..6144fc25b0 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1468,7 +1468,7 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
  const BlockJobDriver *driver,
  bool is_none_mode, BlockDriverState *base,
  bool auto_complete, const char *filter_node_name,
- bool is_mirror,
+ bool is_mirror, MirrorCopyMode copy_mode,
  Error **errp)
 {
 MirrorBlockJob *s;
@@ -1574,7 +1574,7 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 s->on_target_error = on_target_error;
 s->is_none_mode = is_none_mode;
 s->backing_mode = backing_mode;
-s->copy_mode = MIRROR_COPY_MODE_BACKGROUND;
+s->copy_mode = copy_mode;
 s->base = base;
 s->granularity = granularity;
 s->buf_size = ROUND_UP(buf_size, granularity);
@@ -1641,7 +1641,8 @@ void mirror_start(const char *job_id, BlockDriverState 
*bs,
   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
   BlockdevOnError on_source_error,
   BlockdevOnError on_target_error,
-  bool unmap, const char *filter_node_name, Error **errp)
+  bool unmap, const char *filter_node_name,
+  MirrorCopyMode copy_mode, Error **errp)
 {
 bool is_none_mode;
 BlockDriverState *base;
@@ -1656,7 +1657,7 @@ void mirror_start(const char *job_id, BlockDriverState 
*bs,
  speed, granularity, buf_size, backing_mode,
  on_source_error, on_target_error, unmap, NULL, NULL,
  _job_driver, is_none_mode, base, false,
- filter_node_name, true, errp);
+ filter_node_name, true, copy_mode, errp);
 }
 
 void commit_active_start(const char *job_id, 

[Qemu-devel] [PATCH v4 11/13] block/mirror: Add active mirroring

2018-04-11 Thread Max Reitz
This patch implements active synchronous mirroring.  In active mode, the
passive mechanism will still be in place and is used to copy all
initially dirty clusters off the source disk; but every write request
will write data both to the source and the target disk, so the source
cannot be dirtied faster than data is mirrored to the target.  Also,
once the block job has converged (BLOCK_JOB_READY sent), source and
target are guaranteed to stay in sync (unless an error occurs).

Active mode is completely optional and currently disabled at runtime.  A
later patch will add a way for users to enable it.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
---
 qapi/block-core.json |  18 
 block/mirror.c   | 252 ++-
 2 files changed, 265 insertions(+), 5 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index c50517bff3..8210d601f4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1049,6 +1049,24 @@
 { 'enum': 'MirrorSyncMode',
   'data': ['top', 'full', 'none', 'incremental'] }
 
+##
+# @MirrorCopyMode:
+#
+# An enumeration whose values tell the mirror block job when to
+# trigger writes to the target.
+#
+# @background: copy data in background only.
+#
+# @write-blocking: when data is written to the source, write it
+#  (synchronously) to the target as well.  In
+#  addition, data is copied in background just like in
+#  @background mode.
+#
+# Since: 2.12
+##
+{ 'enum': 'MirrorCopyMode',
+  'data': ['background', 'write-blocking'] }
+
 ##
 # @BlockJobType:
 #
diff --git a/block/mirror.c b/block/mirror.c
index abaf2a83c7..a700862029 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -53,8 +53,12 @@ typedef struct MirrorBlockJob {
 Error *replace_blocker;
 bool is_none_mode;
 BlockMirrorBackingMode backing_mode;
+MirrorCopyMode copy_mode;
 BlockdevOnError on_source_error, on_target_error;
 bool synced;
+/* Set when the target is synced (dirty bitmap is clean, nothing
+ * in flight) and the job is running in active mode */
+bool actively_synced;
 bool should_complete;
 int64_t granularity;
 size_t buf_size;
@@ -76,6 +80,7 @@ typedef struct MirrorBlockJob {
 int target_cluster_size;
 int max_iov;
 bool initial_zeroing_ongoing;
+int in_active_write_counter;
 } MirrorBlockJob;
 
 typedef struct MirrorBDSOpaque {
@@ -93,6 +98,7 @@ struct MirrorOp {
 int64_t *bytes_handled;
 
 bool is_pseudo_op;
+bool is_active_write;
 CoQueue waiting_requests;
 
 QTAILQ_ENTRY(MirrorOp) next;
@@ -108,6 +114,7 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob 
*s, bool read,
 int error)
 {
 s->synced = false;
+s->actively_synced = false;
 if (read) {
 return block_job_error_action(>common, s->on_source_error,
   true, error);
@@ -274,7 +281,7 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t 
*offset,
 return ret;
 }
 
-static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
+static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool 
active)
 {
 MirrorOp *op;
 
@@ -284,7 +291,7 @@ static inline void 
mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
  * caller of this function.  Since there is only one pseudo op
  * at any given time, we will always find some real operation
  * to wait on. */
-if (!op->is_pseudo_op) {
+if (!op->is_pseudo_op && op->is_active_write == active) {
 qemu_co_queue_wait(>waiting_requests, NULL);
 return;
 }
@@ -292,6 +299,12 @@ static inline void 
mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
 abort();
 }
 
+static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
+{
+/* Only non-active operations use up in-flight slots */
+mirror_wait_for_any_operation(s, false);
+}
+
 /* Perform a mirror copy operation.
  *
  * *op->bytes_handled is set to the number of bytes copied after and
@@ -849,6 +862,7 @@ static void coroutine_fn mirror_run(void *opaque)
 /* Report BLOCK_JOB_READY and wait for complete. */
 block_job_event_ready(>common);
 s->synced = true;
+s->actively_synced = true;
 while (!block_job_is_cancelled(>common) && !s->should_complete) {
 block_job_yield(>common);
 }
@@ -900,6 +914,12 @@ static void coroutine_fn mirror_run(void *opaque)
 int64_t cnt, delta;
 bool should_complete;
 
+/* Do not start passive operations while there are active
+ * writes in progress */
+while (s->in_active_write_counter) {
+mirror_wait_for_any_operation(s, true);
+}
+
 if (s->ret < 0) {
 ret = s->ret;
 goto immediate_exit;
@@ -947,6 +967,9 @@ static 

[Qemu-devel] [PATCH for 2.13 v2 12/19] linux-user: move microblaze cpu loop to microblaze directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from main.c to
microblaze/cpu_loop.c.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---

Notes:
v2: expand tabs

 linux-user/main.c| 155 ---
 linux-user/microblaze/cpu_loop.c | 150 +
 2 files changed, 150 insertions(+), 155 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 9e01325d6a..9e49c8a30c 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -149,125 +149,6 @@ void fork_end(int child)
 }
 }
 
-#ifdef TARGET_MICROBLAZE
-void cpu_loop(CPUMBState *env)
-{
-CPUState *cs = CPU(mb_env_get_cpu(env));
-int trapnr, ret;
-target_siginfo_t info;
-
-while (1) {
-cpu_exec_start(cs);
-trapnr = cpu_exec(cs);
-cpu_exec_end(cs);
-process_queued_cpu_work(cs);
-
-switch (trapnr) {
-case 0xaa:
-{
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-/* XXX: check env->error_code */
-info.si_code = TARGET_SEGV_MAPERR;
-info._sifields._sigfault._addr = 0;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-}
-break;
-   case EXCP_INTERRUPT:
- /* just indicate that signals should be handled asap */
- break;
-case EXCP_BREAK:
-/* Return address is 4 bytes after the call.  */
-env->regs[14] += 4;
-env->sregs[SR_PC] = env->regs[14];
-ret = do_syscall(env, 
- env->regs[12], 
- env->regs[5], 
- env->regs[6], 
- env->regs[7], 
- env->regs[8], 
- env->regs[9], 
- env->regs[10],
- 0, 0);
-if (ret == -TARGET_ERESTARTSYS) {
-/* Wind back to before the syscall. */
-env->sregs[SR_PC] -= 4;
-} else if (ret != -TARGET_QEMU_ESIGRETURN) {
-env->regs[3] = ret;
-}
-/* All syscall exits result in guest r14 being equal to the
- * PC we return to, because the kernel syscall exit "rtbd" does
- * this. (This is true even for sigreturn(); note that r14 is
- * not a userspace-usable register, as the kernel may clobber it
- * at any point.)
- */
-env->regs[14] = env->sregs[SR_PC];
-break;
-case EXCP_HW_EXCP:
-env->regs[17] = env->sregs[SR_PC] + 4;
-if (env->iflags & D_FLAG) {
-env->sregs[SR_ESR] |= 1 << 12;
-env->sregs[SR_PC] -= 4;
-/* FIXME: if branch was immed, replay the imm as well.  */
-}
-
-env->iflags &= ~(IMM_FLAG | D_FLAG);
-
-switch (env->sregs[SR_ESR] & 31) {
-case ESR_EC_DIVZERO:
-info.si_signo = TARGET_SIGFPE;
-info.si_errno = 0;
-info.si_code = TARGET_FPE_FLTDIV;
-info._sifields._sigfault._addr = 0;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-break;
-case ESR_EC_FPU:
-info.si_signo = TARGET_SIGFPE;
-info.si_errno = 0;
-if (env->sregs[SR_FSR] & FSR_IO) {
-info.si_code = TARGET_FPE_FLTINV;
-}
-if (env->sregs[SR_FSR] & FSR_DZ) {
-info.si_code = TARGET_FPE_FLTDIV;
-}
-info._sifields._sigfault._addr = 0;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-break;
-default:
-printf ("Unhandled hw-exception: 0x%x\n",
-env->sregs[SR_ESR] & ESR_EC_MASK);
-cpu_dump_state(cs, stderr, fprintf, 0);
-exit(EXIT_FAILURE);
-break;
-}
-break;
-case EXCP_DEBUG:
-{
-int sig;
-
-sig = gdb_handlesig(cs, TARGET_SIGTRAP);
-if (sig)
-  {
-info.si_signo = sig;
-info.si_errno = 0;
-info.si_code = TARGET_TRAP_BRKPT;
-queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
-  }
-}
-break;
-case EXCP_ATOMIC:
-cpu_exec_step_atomic(cs);
-break;
-default:
-printf ("Unhandled trap: 0x%x\n", trapnr);
-cpu_dump_state(cs, stderr, fprintf, 0);
-exit(EXIT_FAILURE);
-}
-  

[Qemu-devel] [PATCH v4 10/13] block/mirror: Add MirrorBDSOpaque

2018-04-11 Thread Max Reitz
This will allow us to access the block job data when the mirror block
driver becomes more complex.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
---
 block/mirror.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index 40c7c55f07..abaf2a83c7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -78,6 +78,10 @@ typedef struct MirrorBlockJob {
 bool initial_zeroing_ongoing;
 } MirrorBlockJob;
 
+typedef struct MirrorBDSOpaque {
+MirrorBlockJob *job;
+} MirrorBDSOpaque;
+
 struct MirrorOp {
 MirrorBlockJob *s;
 QEMUIOVector qiov;
@@ -602,6 +606,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
 {
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
 MirrorExitData *data = opaque;
+MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque;
 AioContext *replace_aio_context = NULL;
 BlockDriverState *src = s->mirror_top_bs->backing->bs;
 BlockDriverState *target_bs = blk_bs(s->target);
@@ -694,6 +699,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
 blk_set_perm(job->blk, 0, BLK_PERM_ALL, _abort);
 blk_insert_bs(job->blk, mirror_top_bs, _abort);
 
+bs_opaque->job = NULL;
 block_job_completed(>common, data->ret);
 
 g_free(data);
@@ -1225,6 +1231,7 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
  Error **errp)
 {
 MirrorBlockJob *s;
+MirrorBDSOpaque *bs_opaque;
 BlockDriverState *mirror_top_bs;
 bool target_graph_mod;
 bool target_is_backing;
@@ -1258,6 +1265,8 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 mirror_top_bs->implicit = true;
 }
 mirror_top_bs->total_sectors = bs->total_sectors;
+bs_opaque = g_new0(MirrorBDSOpaque, 1);
+mirror_top_bs->opaque = bs_opaque;
 bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
 
 /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
@@ -1282,6 +1291,8 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 if (!s) {
 goto fail;
 }
+bs_opaque->job = s;
+
 /* The block job now has a reference to this node */
 bdrv_unref(mirror_top_bs);
 
@@ -1371,6 +1382,7 @@ fail:
 
 g_free(s->replaces);
 blk_unref(s->target);
+bs_opaque->job = NULL;
 block_job_early_fail(>common);
 }
 
-- 
2.14.3




[Qemu-devel] [PATCH v4 08/13] test-hbitmap: Add non-advancing iter_next tests

2018-04-11 Thread Max Reitz
Add a function that wraps hbitmap_iter_next() and always calls it in
non-advancing mode first, and in advancing mode next.  The result should
always be the same.

By using this function everywhere we called hbitmap_iter_next() before,
we should get good test coverage for non-advancing hbitmap_iter_next().

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
Reviewed-by: John Snow 
---
 tests/test-hbitmap.c | 36 
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index f2158f767d..5e67ac1d3a 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -30,6 +30,18 @@ typedef struct TestHBitmapData {
 } TestHBitmapData;
 
 
+static int64_t check_hbitmap_iter_next(HBitmapIter *hbi)
+{
+int next0, next1;
+
+next0 = hbitmap_iter_next(hbi, false);
+next1 = hbitmap_iter_next(hbi, true);
+
+g_assert_cmpint(next0, ==, next1);
+
+return next0;
+}
+
 /* Check that the HBitmap and the shadow bitmap contain the same data,
  * ignoring the same "first" bits.
  */
@@ -46,7 +58,7 @@ static void hbitmap_test_check(TestHBitmapData *data,
 
 i = first;
 for (;;) {
-next = hbitmap_iter_next(, true);
+next = check_hbitmap_iter_next();
 if (next < 0) {
 next = data->size;
 }
@@ -435,25 +447,25 @@ static void test_hbitmap_iter_granularity(TestHBitmapData 
*data,
 /* Note that hbitmap_test_check has to be invoked manually in this test.  
*/
 hbitmap_test_init(data, 131072 << 7, 7);
 hbitmap_iter_init(, data->hb, 0);
-g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
+g_assert_cmpint(check_hbitmap_iter_next(), <, 0);
 
 hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
 hbitmap_iter_init(, data->hb, 0);
-g_assert_cmpint(hbitmap_iter_next(, true), ==, (L2 + L1 + 1) << 7);
-g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
+g_assert_cmpint(check_hbitmap_iter_next(), ==, (L2 + L1 + 1) << 7);
+g_assert_cmpint(check_hbitmap_iter_next(), <, 0);
 
 hbitmap_iter_init(, data->hb, (L2 + L1 + 2) << 7);
 g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
 
 hbitmap_test_set(data, (131072 << 7) - 8, 8);
 hbitmap_iter_init(, data->hb, 0);
-g_assert_cmpint(hbitmap_iter_next(, true), ==, (L2 + L1 + 1) << 7);
-g_assert_cmpint(hbitmap_iter_next(, true), ==, 131071 << 7);
-g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
+g_assert_cmpint(check_hbitmap_iter_next(), ==, (L2 + L1 + 1) << 7);
+g_assert_cmpint(check_hbitmap_iter_next(), ==, 131071 << 7);
+g_assert_cmpint(check_hbitmap_iter_next(), <, 0);
 
 hbitmap_iter_init(, data->hb, (L2 + L1 + 2) << 7);
-g_assert_cmpint(hbitmap_iter_next(, true), ==, 131071 << 7);
-g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
+g_assert_cmpint(check_hbitmap_iter_next(), ==, 131071 << 7);
+g_assert_cmpint(check_hbitmap_iter_next(), <, 0);
 }
 
 static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff)
@@ -893,7 +905,7 @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData 
*data,
 for (i = 0; i < num_positions; i++) {
 hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true);
 hbitmap_iter_init(, data->hb, 0);
-next = hbitmap_iter_next(, true);
+next = check_hbitmap_iter_next();
 if (i == num_positions - 1) {
 g_assert_cmpint(next, ==, -1);
 } else {
@@ -919,10 +931,10 @@ static void test_hbitmap_iter_and_reset(TestHBitmapData 
*data,
 
 hbitmap_iter_init(, data->hb, BITS_PER_LONG - 1);
 
-hbitmap_iter_next(, true);
+check_hbitmap_iter_next();
 
 hbitmap_reset_all(data->hb);
-hbitmap_iter_next(, true);
+check_hbitmap_iter_next();
 }
 
 static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start)
-- 
2.14.3




[Qemu-devel] [PATCH for 2.13 v3 03/20] linux-user: move arm signal.c parts to arm directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
arm/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---

Notes:
v3: expand tabs

 linux-user/arm/signal.c| 754 +
 linux-user/arm/target_signal.h |   6 +-
 linux-user/signal.c| 751 
 3 files changed, 759 insertions(+), 752 deletions(-)

diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index 02ca338b6c..0c1ec53025 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -16,3 +16,757 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+struct target_sigcontext {
+abi_ulong trap_no;
+abi_ulong error_code;
+abi_ulong oldmask;
+abi_ulong arm_r0;
+abi_ulong arm_r1;
+abi_ulong arm_r2;
+abi_ulong arm_r3;
+abi_ulong arm_r4;
+abi_ulong arm_r5;
+abi_ulong arm_r6;
+abi_ulong arm_r7;
+abi_ulong arm_r8;
+abi_ulong arm_r9;
+abi_ulong arm_r10;
+abi_ulong arm_fp;
+abi_ulong arm_ip;
+abi_ulong arm_sp;
+abi_ulong arm_lr;
+abi_ulong arm_pc;
+abi_ulong arm_cpsr;
+abi_ulong fault_address;
+};
+
+struct target_ucontext_v1 {
+abi_ulong tuc_flags;
+abi_ulong tuc_link;
+target_stack_t tuc_stack;
+struct target_sigcontext tuc_mcontext;
+target_sigset_t  tuc_sigmask;   /* mask last for extensibility */
+};
+
+struct target_ucontext_v2 {
+abi_ulong tuc_flags;
+abi_ulong tuc_link;
+target_stack_t tuc_stack;
+struct target_sigcontext tuc_mcontext;
+target_sigset_t  tuc_sigmask;   /* mask last for extensibility */
+char __unused[128 - sizeof(target_sigset_t)];
+abi_ulong tuc_regspace[128] __attribute__((__aligned__(8)));
+};
+
+struct target_user_vfp {
+uint64_t fpregs[32];
+abi_ulong fpscr;
+};
+
+struct target_user_vfp_exc {
+abi_ulong fpexc;
+abi_ulong fpinst;
+abi_ulong fpinst2;
+};
+
+struct target_vfp_sigframe {
+abi_ulong magic;
+abi_ulong size;
+struct target_user_vfp ufp;
+struct target_user_vfp_exc ufp_exc;
+} __attribute__((__aligned__(8)));
+
+struct target_iwmmxt_sigframe {
+abi_ulong magic;
+abi_ulong size;
+uint64_t regs[16];
+/* Note that not all the coprocessor control registers are stored here */
+uint32_t wcssf;
+uint32_t wcasf;
+uint32_t wcgr0;
+uint32_t wcgr1;
+uint32_t wcgr2;
+uint32_t wcgr3;
+} __attribute__((__aligned__(8)));
+
+#define TARGET_VFP_MAGIC 0x56465001
+#define TARGET_IWMMXT_MAGIC 0x12ef842a
+
+struct sigframe_v1
+{
+struct target_sigcontext sc;
+abi_ulong extramask[TARGET_NSIG_WORDS-1];
+abi_ulong retcode;
+};
+
+struct sigframe_v2
+{
+struct target_ucontext_v2 uc;
+abi_ulong retcode;
+};
+
+struct rt_sigframe_v1
+{
+abi_ulong pinfo;
+abi_ulong puc;
+struct target_siginfo info;
+struct target_ucontext_v1 uc;
+abi_ulong retcode;
+};
+
+struct rt_sigframe_v2
+{
+struct target_siginfo info;
+struct target_ucontext_v2 uc;
+abi_ulong retcode;
+};
+
+#define TARGET_CONFIG_CPU_32 1
+
+/*
+ * For ARM syscalls, we encode the syscall number into the instruction.
+ */
+#define SWI_SYS_SIGRETURN   (0xef00|(TARGET_NR_sigreturn + 
ARM_SYSCALL_BASE))
+#define SWI_SYS_RT_SIGRETURN(0xef00|(TARGET_NR_rt_sigreturn + 
ARM_SYSCALL_BASE))
+
+/*
+ * For Thumb syscalls, we pass the syscall number via r7.  We therefore
+ * need two 16-bit instructions.
+ */
+#define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (TARGET_NR_sigreturn))
+#define SWI_THUMB_RT_SIGRETURN  (0xdf00 << 16 | 0x2700 | 
(TARGET_NR_rt_sigreturn))
+
+static const abi_ulong retcodes[4] = {
+SWI_SYS_SIGRETURN,  SWI_THUMB_SIGRETURN,
+SWI_SYS_RT_SIGRETURN,   SWI_THUMB_RT_SIGRETURN
+};
+
+
+static inline int valid_user_regs(CPUARMState *regs)
+{
+return 1;
+}
+
+static void
+setup_sigcontext(struct target_sigcontext *sc, /*struct _fpstate *fpstate,*/
+ CPUARMState *env, abi_ulong mask)
+{
+__put_user(env->regs[0], >arm_r0);
+__put_user(env->regs[1], >arm_r1);
+__put_user(env->regs[2], >arm_r2);
+__put_user(env->regs[3], >arm_r3);
+__put_user(env->regs[4], >arm_r4);
+__put_user(env->regs[5], >arm_r5);
+__put_user(env->regs[6], >arm_r6);
+__put_user(env->regs[7], >arm_r7);
+__put_user(env->regs[8], >arm_r8);
+__put_user(env->regs[9], >arm_r9);
+__put_user(env->regs[10], >arm_r10);
+__put_user(env->regs[11], >arm_fp);
+__put_user(env->regs[12], >arm_ip);
+__put_user(env->regs[13], 

[Qemu-devel] [PATCH for 2.13 v2 01/19] linux-user: create a dummy per arch cpu_loop.c

2018-04-11 Thread Laurent Vivier
Create a cpu_loop-common.h for future use by
these new files and use it in the existing
main.c

Introduce target_cpu_copy_regs():
declare the function in cpu_loop-common.h
and an empty function for each target,
to move all the cpu_loop prologues to this function.

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 linux-user/Makefile.objs |  3 ++-
 linux-user/aarch64/cpu_loop.c| 26 ++
 linux-user/alpha/cpu_loop.c  | 26 ++
 linux-user/arm/cpu_loop.c| 26 ++
 linux-user/cpu_loop-common.h | 37 +
 linux-user/cris/cpu_loop.c   | 26 ++
 linux-user/hppa/cpu_loop.c   | 26 ++
 linux-user/i386/cpu_loop.c   | 26 ++
 linux-user/m68k/cpu_loop.c   | 26 ++
 linux-user/main.c| 17 +++--
 linux-user/microblaze/cpu_loop.c | 26 ++
 linux-user/mips/cpu_loop.c   | 26 ++
 linux-user/mips64/cpu_loop.c | 26 ++
 linux-user/nios2/cpu_loop.c  | 26 ++
 linux-user/openrisc/cpu_loop.c   | 26 ++
 linux-user/ppc/cpu_loop.c| 26 ++
 linux-user/riscv/cpu_loop.c  | 26 ++
 linux-user/s390x/cpu_loop.c  | 26 ++
 linux-user/sh4/cpu_loop.c| 26 ++
 linux-user/sparc/cpu_loop.c  | 26 ++
 linux-user/sparc64/cpu_loop.c| 26 ++
 linux-user/tilegx/cpu_loop.c | 26 ++
 linux-user/x86_64/cpu_loop.c | 26 ++
 linux-user/xtensa/cpu_loop.c | 26 ++
 24 files changed, 588 insertions(+), 15 deletions(-)
 create mode 100644 linux-user/aarch64/cpu_loop.c
 create mode 100644 linux-user/alpha/cpu_loop.c
 create mode 100644 linux-user/arm/cpu_loop.c
 create mode 100644 linux-user/cpu_loop-common.h
 create mode 100644 linux-user/cris/cpu_loop.c
 create mode 100644 linux-user/hppa/cpu_loop.c
 create mode 100644 linux-user/i386/cpu_loop.c
 create mode 100644 linux-user/m68k/cpu_loop.c
 create mode 100644 linux-user/microblaze/cpu_loop.c
 create mode 100644 linux-user/mips/cpu_loop.c
 create mode 100644 linux-user/mips64/cpu_loop.c
 create mode 100644 linux-user/nios2/cpu_loop.c
 create mode 100644 linux-user/openrisc/cpu_loop.c
 create mode 100644 linux-user/ppc/cpu_loop.c
 create mode 100644 linux-user/riscv/cpu_loop.c
 create mode 100644 linux-user/s390x/cpu_loop.c
 create mode 100644 linux-user/sh4/cpu_loop.c
 create mode 100644 linux-user/sparc/cpu_loop.c
 create mode 100644 linux-user/sparc64/cpu_loop.c
 create mode 100644 linux-user/tilegx/cpu_loop.c
 create mode 100644 linux-user/x86_64/cpu_loop.c
 create mode 100644 linux-user/xtensa/cpu_loop.c

diff --git a/linux-user/Makefile.objs b/linux-user/Makefile.objs
index 811a7f5ce5..59a5c17354 100644
--- a/linux-user/Makefile.objs
+++ b/linux-user/Makefile.objs
@@ -1,6 +1,7 @@
 obj-y = main.o syscall.o strace.o mmap.o signal.o \
elfload.o linuxload.o uaccess.o uname.o \
-   safe-syscall.o $(TARGET_ABI_DIR)/signal.o
+   safe-syscall.o $(TARGET_ABI_DIR)/signal.o \
+$(TARGET_ABI_DIR)/cpu_loop.o
 
 obj-$(TARGET_HAS_BFLT) += flatload.o
 obj-$(TARGET_I386) += vm86.o
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
new file mode 100644
index 00..b7700a5561
--- /dev/null
+++ b/linux-user/aarch64/cpu_loop.c
@@ -0,0 +1,26 @@
+/*
+ *  qemu user cpu loop
+ *
+ *  Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "cpu_loop-common.h"
+
+void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
+{
+}
diff --git a/linux-user/alpha/cpu_loop.c b/linux-user/alpha/cpu_loop.c
new file mode 100644
index 00..b7700a5561
--- /dev/null
+++ b/linux-user/alpha/cpu_loop.c
@@ -0,0 +1,26 @@
+/*
+ *  qemu user cpu loop
+ *
+ *  Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify

[Qemu-devel] [PATCH for 2.13 v3 01/20] linux-user: create a dummy per arch signal.c

2018-04-11 Thread Laurent Vivier
Create a signal-common.h for future use by these new files
and use it in the existing signal.c

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/Makefile.objs   |  2 +-
 linux-user/aarch64/signal.c| 18 +++
 linux-user/alpha/signal.c  | 18 +++
 linux-user/arm/signal.c| 18 +++
 linux-user/cris/signal.c   | 18 +++
 linux-user/hppa/signal.c   | 18 +++
 linux-user/i386/signal.c   | 18 +++
 linux-user/m68k/signal.c   | 18 +++
 linux-user/microblaze/signal.c | 18 +++
 linux-user/mips/signal.c   | 18 +++
 linux-user/mips64/signal.c | 18 +++
 linux-user/nios2/signal.c  | 18 +++
 linux-user/openrisc/signal.c   | 18 +++
 linux-user/ppc/signal.c| 18 +++
 linux-user/riscv/signal.c  | 18 +++
 linux-user/s390x/signal.c  | 18 +++
 linux-user/sh4/signal.c| 18 +++
 linux-user/signal-common.h | 50 ++
 linux-user/signal.c| 41 ++
 linux-user/sparc/signal.c  | 18 +++
 linux-user/sparc64/signal.c| 18 +++
 linux-user/tilegx/signal.c | 18 +++
 linux-user/x86_64/signal.c | 18 +++
 linux-user/xtensa/signal.c | 18 +++
 24 files changed, 440 insertions(+), 31 deletions(-)
 create mode 100644 linux-user/aarch64/signal.c
 create mode 100644 linux-user/alpha/signal.c
 create mode 100644 linux-user/arm/signal.c
 create mode 100644 linux-user/cris/signal.c
 create mode 100644 linux-user/hppa/signal.c
 create mode 100644 linux-user/i386/signal.c
 create mode 100644 linux-user/m68k/signal.c
 create mode 100644 linux-user/microblaze/signal.c
 create mode 100644 linux-user/mips/signal.c
 create mode 100644 linux-user/mips64/signal.c
 create mode 100644 linux-user/nios2/signal.c
 create mode 100644 linux-user/openrisc/signal.c
 create mode 100644 linux-user/ppc/signal.c
 create mode 100644 linux-user/riscv/signal.c
 create mode 100644 linux-user/s390x/signal.c
 create mode 100644 linux-user/sh4/signal.c
 create mode 100644 linux-user/signal-common.h
 create mode 100644 linux-user/sparc/signal.c
 create mode 100644 linux-user/sparc64/signal.c
 create mode 100644 linux-user/tilegx/signal.c
 create mode 100644 linux-user/x86_64/signal.c
 create mode 100644 linux-user/xtensa/signal.c

diff --git a/linux-user/Makefile.objs b/linux-user/Makefile.objs
index 8c93058100..811a7f5ce5 100644
--- a/linux-user/Makefile.objs
+++ b/linux-user/Makefile.objs
@@ -1,6 +1,6 @@
 obj-y = main.o syscall.o strace.o mmap.o signal.o \
elfload.o linuxload.o uaccess.o uname.o \
-   safe-syscall.o
+   safe-syscall.o $(TARGET_ABI_DIR)/signal.o
 
 obj-$(TARGET_HAS_BFLT) += flatload.o
 obj-$(TARGET_I386) += vm86.o
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
new file mode 100644
index 00..02ca338b6c
--- /dev/null
+++ b/linux-user/aarch64/signal.c
@@ -0,0 +1,18 @@
+/*
+ *  Emulation of Linux signals
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
diff --git a/linux-user/alpha/signal.c b/linux-user/alpha/signal.c
new file mode 100644
index 00..02ca338b6c
--- /dev/null
+++ b/linux-user/alpha/signal.c
@@ -0,0 +1,18 @@
+/*
+ *  Emulation of Linux signals
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
diff --git a/linux-user/arm/signal.c 

[Qemu-devel] [PATCH v4 07/13] hbitmap: Add @advance param to hbitmap_iter_next()

2018-04-11 Thread Max Reitz
This new parameter allows the caller to just query the next dirty
position without moving the iterator.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
Reviewed-by: John Snow 
---
 include/qemu/hbitmap.h |  5 -
 block/backup.c |  2 +-
 block/dirty-bitmap.c   |  2 +-
 tests/test-hbitmap.c   | 26 +-
 util/hbitmap.c | 10 +++---
 5 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 6b6490ecad..ddca52c48e 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -324,11 +324,14 @@ void hbitmap_free_meta(HBitmap *hb);
 /**
  * hbitmap_iter_next:
  * @hbi: HBitmapIter to operate on.
+ * @advance: If true, advance the iterator.  Otherwise, the next call
+ *   of this function will return the same result (if that
+ *   position is still dirty).
  *
  * Return the next bit that is set in @hbi's associated HBitmap,
  * or -1 if all remaining bits are zero.
  */
-int64_t hbitmap_iter_next(HBitmapIter *hbi);
+int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance);
 
 /**
  * hbitmap_iter_next_word:
diff --git a/block/backup.c b/block/backup.c
index 453cd62c24..60374fe866 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -368,7 +368,7 @@ static int coroutine_fn 
backup_run_incremental(BackupBlockJob *job)
 HBitmapIter hbi;
 
 hbitmap_iter_init(, job->copy_bitmap, 0);
-while ((cluster = hbitmap_iter_next()) != -1) {
+while ((cluster = hbitmap_iter_next(, true)) != -1) {
 do {
 if (yield_and_check(job)) {
 return 0;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 967159479d..df7b711610 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -546,7 +546,7 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
 
 int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
 {
-return hbitmap_iter_next(>hbi);
+return hbitmap_iter_next(>hbi, true);
 }
 
 /* Called within bdrv_dirty_bitmap_lock..unlock */
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index f29631f939..f2158f767d 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -46,7 +46,7 @@ static void hbitmap_test_check(TestHBitmapData *data,
 
 i = first;
 for (;;) {
-next = hbitmap_iter_next();
+next = hbitmap_iter_next(, true);
 if (next < 0) {
 next = data->size;
 }
@@ -435,25 +435,25 @@ static void test_hbitmap_iter_granularity(TestHBitmapData 
*data,
 /* Note that hbitmap_test_check has to be invoked manually in this test.  
*/
 hbitmap_test_init(data, 131072 << 7, 7);
 hbitmap_iter_init(, data->hb, 0);
-g_assert_cmpint(hbitmap_iter_next(), <, 0);
+g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
 
 hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
 hbitmap_iter_init(, data->hb, 0);
-g_assert_cmpint(hbitmap_iter_next(), ==, (L2 + L1 + 1) << 7);
-g_assert_cmpint(hbitmap_iter_next(), <, 0);
+g_assert_cmpint(hbitmap_iter_next(, true), ==, (L2 + L1 + 1) << 7);
+g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
 
 hbitmap_iter_init(, data->hb, (L2 + L1 + 2) << 7);
-g_assert_cmpint(hbitmap_iter_next(), <, 0);
+g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
 
 hbitmap_test_set(data, (131072 << 7) - 8, 8);
 hbitmap_iter_init(, data->hb, 0);
-g_assert_cmpint(hbitmap_iter_next(), ==, (L2 + L1 + 1) << 7);
-g_assert_cmpint(hbitmap_iter_next(), ==, 131071 << 7);
-g_assert_cmpint(hbitmap_iter_next(), <, 0);
+g_assert_cmpint(hbitmap_iter_next(, true), ==, (L2 + L1 + 1) << 7);
+g_assert_cmpint(hbitmap_iter_next(, true), ==, 131071 << 7);
+g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
 
 hbitmap_iter_init(, data->hb, (L2 + L1 + 2) << 7);
-g_assert_cmpint(hbitmap_iter_next(), ==, 131071 << 7);
-g_assert_cmpint(hbitmap_iter_next(), <, 0);
+g_assert_cmpint(hbitmap_iter_next(, true), ==, 131071 << 7);
+g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
 }
 
 static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff)
@@ -893,7 +893,7 @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData 
*data,
 for (i = 0; i < num_positions; i++) {
 hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true);
 hbitmap_iter_init(, data->hb, 0);
-next = hbitmap_iter_next();
+next = hbitmap_iter_next(, true);
 if (i == num_positions - 1) {
 g_assert_cmpint(next, ==, -1);
 } else {
@@ -919,10 +919,10 @@ static void test_hbitmap_iter_and_reset(TestHBitmapData 
*data,
 
 hbitmap_iter_init(, data->hb, BITS_PER_LONG - 1);
 
-hbitmap_iter_next();
+hbitmap_iter_next(, true);
 
 hbitmap_reset_all(data->hb);
-hbitmap_iter_next();
+hbitmap_iter_next(, true);
 }
 
 static void test_hbitmap_next_zero_check(TestHBitmapData *data, 

[Qemu-devel] [PATCH v4 04/13] block/mirror: Wait for in-flight op conflicts

2018-04-11 Thread Max Reitz
This patch makes the mirror code differentiate between simply waiting
for any operation to complete (mirror_wait_for_free_in_flight_slot())
and specifically waiting for all operations touching a certain range of
the virtual disk to complete (mirror_wait_on_conflicts()).

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
---
 block/mirror.c | 102 +++--
 1 file changed, 84 insertions(+), 18 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 1af6481876..964ffbe682 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -14,6 +14,7 @@
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "qemu/coroutine.h"
+#include "qemu/range.h"
 #include "trace.h"
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
@@ -88,6 +89,7 @@ struct MirrorOp {
  * mirror_co_discard() before yielding for the first time */
 int64_t *bytes_handled;
 
+bool is_pseudo_op;
 CoQueue waiting_requests;
 
 QTAILQ_ENTRY(MirrorOp) next;
@@ -112,6 +114,41 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob 
*s, bool read,
 }
 }
 
+static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
+  MirrorBlockJob *s,
+  uint64_t offset,
+  uint64_t bytes)
+{
+uint64_t self_start_chunk = offset / s->granularity;
+uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
+uint64_t self_nb_chunks = self_end_chunk - self_start_chunk;
+
+while (find_next_bit(s->in_flight_bitmap, self_end_chunk,
+ self_start_chunk) < self_end_chunk &&
+   s->ret >= 0)
+{
+MirrorOp *op;
+
+QTAILQ_FOREACH(op, >ops_in_flight, next) {
+uint64_t op_start_chunk = op->offset / s->granularity;
+uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes,
+ s->granularity) -
+op_start_chunk;
+
+if (op == self) {
+continue;
+}
+
+if (ranges_overlap(self_start_chunk, self_nb_chunks,
+   op_start_chunk, op_nb_chunks))
+{
+qemu_co_queue_wait(>waiting_requests, NULL);
+break;
+}
+}
+}
+}
+
 static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
 {
 MirrorBlockJob *s = op->s;
@@ -234,13 +271,22 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t 
*offset,
 return ret;
 }
 
-static inline void mirror_wait_for_io(MirrorBlockJob *s)
+static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
 {
 MirrorOp *op;
 
-op = QTAILQ_FIRST(>ops_in_flight);
-assert(op);
-qemu_co_queue_wait(>waiting_requests, NULL);
+QTAILQ_FOREACH(op, >ops_in_flight, next) {
+/* Do not wait on pseudo ops, because it may in turn wait on
+ * some other operation to start, which may in fact be the
+ * caller of this function.  Since there is only one pseudo op
+ * at any given time, we will always find some real operation
+ * to wait on. */
+if (!op->is_pseudo_op) {
+qemu_co_queue_wait(>waiting_requests, NULL);
+return;
+}
+}
+abort();
 }
 
 /* Perform a mirror copy operation.
@@ -284,7 +330,7 @@ static void coroutine_fn mirror_co_read(void *opaque)
 
 while (s->buf_free_count < nb_chunks) {
 trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
-mirror_wait_for_io(s);
+mirror_wait_for_free_in_flight_slot(s);
 }
 
 /* Now make a QEMUIOVector taking enough granularity-sized chunks
@@ -384,8 +430,9 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t 
offset,
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
 BlockDriverState *source = s->source;
-int64_t offset, first_chunk;
-uint64_t delay_ns = 0;
+MirrorOp *pseudo_op;
+int64_t offset;
+uint64_t delay_ns = 0, ret = 0;
 /* At least the first dirty chunk is mirrored in one iteration. */
 int nb_chunks = 1;
 bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
@@ -401,11 +448,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 }
 bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
 
-first_chunk = offset / s->granularity;
-while (test_bit(first_chunk, s->in_flight_bitmap)) {
-trace_mirror_yield_in_flight(s, offset, s->in_flight);
-mirror_wait_for_io(s);
-}
+mirror_wait_on_conflicts(NULL, s, offset, 1);
 
 block_job_pause_point(>common);
 
@@ -442,6 +485,21 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
nb_chunks * s->granularity);
 

[Qemu-devel] [PATCH for 2.13 v2 00/19] linux-user: move arch specific parts from main.c to arch directories

2018-04-11 Thread Laurent Vivier
This series moves from main.c the architecture specific parts
to the architecture directory.

This is the continuation of my series
  "linux-user: move arch specific parts to arch directories"
that includes since the v2 only the signal.c parts.

For each architecture, there are two parts:

  - cpu_loop(), and the function with its
dependencies is moved to /cpu_loop.c

  - the prologue of the cpu_loop(), that was inlined
in main(). We move it to a new function in
/cpu_loop.c, target_cpu_copy_regs().

The first patch adds the skeleton to move the
parts to the architecture directories, a cpu_loop.c
file with an empty target_cpu_copy_regs() function,
called from main().

There is no change in the code.

Based-on: <20180411184556.17200-1-laur...@vivier.eu>
"[PATCH for 2.13 v3 00/20] move arch specific parts to arch directories"

v2:
  - rebase and add R-b
  - expand tabs
  - report changes introduced by
  4a6bf7adb9 target/xtensa: linux-user: rewind pc for restarted syscall

Laurent Vivier (19):
  linux-user: create a dummy per arch cpu_loop.c
  linux-user: move i386/x86_64 cpu loop to i386 directory
  linux-user: move aarch64 cpu loop to aarch64 directory
  linux-user: move arm cpu loop to arm directory
  linux-user: move sparc/sparc64 cpu loop to sparc directory
  linux-user: move ppc/ppc64 cpu loop to ppc directory
  linux-user: move mips/mips64 cpu loop to mips directory
  linux-user: move nios2 cpu loop to nios2 directory
  linux-user: move openrisc cpu loop to openrisc directory
  linux-user: move sh4 cpu loop to sh4 directory
  linux-user: move cris cpu loop to cris directory
  linux-user: move microblaze cpu loop to microblaze directory
  linux-user: move m68k cpu loop to m68k directory
  linux-user: move alpha cpu loop to alpha directory
  linux-user: move s390x cpu loop to s390x directory
  linux-user: move tilegx cpu loop to tilegx directory
  linux-user: move riscv cpu loop to riscv directory
  linux-user: move hppa cpu loop to hppa directory
  linux-user: move xtensa cpu loop to xtensa directory

 linux-user/Makefile.objs |3 +-
 linux-user/aarch64/cpu_loop.c|  182 ++
 linux-user/alpha/cpu_loop.c  |  225 ++
 linux-user/arm/cpu_loop.c|  456 
 linux-user/cpu_loop-common.h |   37 +
 linux-user/cris/cpu_loop.c   |  115 +
 linux-user/hppa/cpu_loop.c   |  211 ++
 linux-user/i386/cpu_loop.c   |  369 
 linux-user/m68k/cpu_loop.c   |  170 ++
 linux-user/main.c| 4440 +-
 linux-user/microblaze/cpu_loop.c |  176 ++
 linux-user/mips/cpu_loop.c   |  749 +++
 linux-user/mips64/cpu_loop.c |   20 +
 linux-user/nios2/cpu_loop.c  |  152 ++
 linux-user/openrisc/cpu_loop.c   |  115 +
 linux-user/ppc/cpu_loop.c|  579 +
 linux-user/riscv/cpu_loop.c  |  118 +
 linux-user/s390x/cpu_loop.c  |  165 ++
 linux-user/sh4/cpu_loop.c|  111 +
 linux-user/sparc/cpu_loop.c  |  306 +++
 linux-user/sparc64/cpu_loop.c|   20 +
 linux-user/tilegx/cpu_loop.c |  286 +++
 linux-user/x86_64/cpu_loop.c |   20 +
 linux-user/xtensa/cpu_loop.c |  267 +++
 24 files changed, 4906 insertions(+), 4386 deletions(-)
 create mode 100644 linux-user/aarch64/cpu_loop.c
 create mode 100644 linux-user/alpha/cpu_loop.c
 create mode 100644 linux-user/arm/cpu_loop.c
 create mode 100644 linux-user/cpu_loop-common.h
 create mode 100644 linux-user/cris/cpu_loop.c
 create mode 100644 linux-user/hppa/cpu_loop.c
 create mode 100644 linux-user/i386/cpu_loop.c
 create mode 100644 linux-user/m68k/cpu_loop.c
 create mode 100644 linux-user/microblaze/cpu_loop.c
 create mode 100644 linux-user/mips/cpu_loop.c
 create mode 100644 linux-user/mips64/cpu_loop.c
 create mode 100644 linux-user/nios2/cpu_loop.c
 create mode 100644 linux-user/openrisc/cpu_loop.c
 create mode 100644 linux-user/ppc/cpu_loop.c
 create mode 100644 linux-user/riscv/cpu_loop.c
 create mode 100644 linux-user/s390x/cpu_loop.c
 create mode 100644 linux-user/sh4/cpu_loop.c
 create mode 100644 linux-user/sparc/cpu_loop.c
 create mode 100644 linux-user/sparc64/cpu_loop.c
 create mode 100644 linux-user/tilegx/cpu_loop.c
 create mode 100644 linux-user/x86_64/cpu_loop.c
 create mode 100644 linux-user/xtensa/cpu_loop.c

-- 
2.14.3




[Qemu-devel] [PATCH v4 for-2.13 00/13] block/mirror: Add active-sync mirroring

2018-04-11 Thread Max Reitz
This series implements an active and synchronous mirroring mode.

Currently, the mirror block job is passive an asynchronous: Depending on
your start conditions, some part of the source disk starts as "dirty".
Then, the block job will (as a background operation) continuously copy
dirty parts to the target disk until all of the source disk is clean.
In the meantime, any write to the source disk dirties the affected area.

One effect of this operational mode is that the job may never converge:
If the writes to the source happen faster than the block job copies data
to the target, the job can never finish.

When the active mode implemented in this series is enabled, every write
request to the source will automatically trigger a synchronous write to
the target right afterwards.  Therefore, the source can never get dirty
faster than data is copied to the target.  Most importantly, once source
and target are in sync (BLOCK_JOB_READY is emitted), they will not
diverge (unless e.g. an I/O error occurs).

Active mirroring also improves on a second issue of the passive mode: We
do not have to read data from the source in order to write it to the
target.  When new data is written to the source in active mode, it is
automatically mirrored to the target, which saves us the superfluous
read from the source.


Things to do on top of this series:
- Allow switching between active and passive mode at runtime: Mainly
  hinges on the question of how to expose it to the user (ideally
  through a generic block-job-set-option command)

- Implement an asynchronous active mode (launch both write operations to
  the source and the target at the same time, and do not wait for the
  target operation to finish)

- Integrate the mirror BDS more tightly into the BDS graph:  Both source
  and target should be BdrvChildren (and the source should not be the
  "backing" child).  I'm working on this in a follow-up.

- Improve the mirror job coroutine use: Currently more of a hack, a
  follow-up will make this nicer.

- Add read-write-blocking mode: This series adds the write-blocking
  mode, where every write blocks until the data has been mirrored to the
  target.  read-write-blocking would also mirror data on reads from the
  source, which saves some performance (because that data does not have
  to be read twice) at the cost of latency on mirroring read operations.
  (Will be in the same follow-up.)


v4:
- Dropped patches 1 through 3.  Kevin has taken the old patch 3 (a drain
  test case) into his lastest drain series ("Drain fixes and cleanups,
  part 3"), which to me implies that my preceding patches (the old 1 and
  2) may not have been enough.  As I explained in some older cover
  latter (it might have been v1...), all of those patches actually would
  have been only necessary for the follow-up (that is going to come
  along at some point...) where I plan to make the mirror target an
  immediate BdrvChild of the mirror node.
  This series does not make the mirror target such an immediate child,
  thus the mirror node continues to only have a single child, which
  means that those patches are actually not required for this series.  I
  only included them because they still made sense.
  However, now I am no longer convinced it makes sense to include them
  (because that would create a dependency on Kevin's series), so I'll
  push them off to the follow-up.

- Patch 12 (was: 15): Replaced "2.12" by "2.13" [Eric]

- Added Rb-s, rebased (with no effect, judging from
  git-backport-diff...)


git-backport-diff to v3:

Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/13:[] [--] 'block/mirror: Pull out mirror_perform()'
002/13:[] [--] 'block/mirror: Convert to coroutines'
003/13:[] [--] 'block/mirror: Use CoQueue to wait on in-flight ops'
004/13:[] [--] 'block/mirror: Wait for in-flight op conflicts'
005/13:[] [--] 'block/mirror: Use source as a BdrvChild'
006/13:[] [--] 'block: Generalize should_update_child() rule'
007/13:[] [--] 'hbitmap: Add @advance param to hbitmap_iter_next()'
008/13:[] [--] 'test-hbitmap: Add non-advancing iter_next tests'
009/13:[] [--] 'block/dirty-bitmap: Add bdrv_dirty_iter_next_area'
010/13:[] [--] 'block/mirror: Add MirrorBDSOpaque'
011/13:[] [--] 'block/mirror: Add active mirroring'
012/13:[0004] [FC] 'block/mirror: Add copy mode QAPI interface'
013/13:[] [--] 'iotests: Add test for active mirroring'


Max Reitz (13):
  block/mirror: Pull out mirror_perform()
  block/mirror: Convert to coroutines
  block/mirror: Use CoQueue to wait on in-flight ops
  block/mirror: Wait for in-flight op conflicts
  block/mirror: Use source as a BdrvChild
  block: Generalize should_update_child() rule
  hbitmap: Add @advance param to hbitmap_iter_next()
  test-hbitmap: Add non-advancing iter_next 

[Qemu-devel] [PATCH for 2.13 v3 09/20] linux-user: move s390x signal.c parts to s390x directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
s390x/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Acked-by: Cornelia Huck 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/s390x/signal.c| 309 +++
 linux-user/s390x/target_signal.h |   6 +-
 linux-user/signal.c  | 306 --
 3 files changed, 314 insertions(+), 307 deletions(-)

diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c
index 02ca338b6c..a204a85e4a 100644
--- a/linux-user/s390x/signal.c
+++ b/linux-user/s390x/signal.c
@@ -16,3 +16,312 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+#define __NUM_GPRS 16
+#define __NUM_FPRS 16
+#define __NUM_ACRS 16
+
+#define S390_SYSCALL_SIZE   2
+#define __SIGNAL_FRAMESIZE  160 /* FIXME: 31-bit mode -> 96 */
+
+#define _SIGCONTEXT_NSIG64
+#define _SIGCONTEXT_NSIG_BPW64 /* FIXME: 31-bit mode -> 32 */
+#define _SIGCONTEXT_NSIG_WORDS  (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW)
+#define _SIGMASK_COPY_SIZE(sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS)
+#define PSW_ADDR_AMODE0xUL /* 0x8000UL for 
31-bit */
+#define S390_SYSCALL_OPCODE ((uint16_t)0x0a00)
+
+typedef struct {
+target_psw_t psw;
+target_ulong gprs[__NUM_GPRS];
+unsigned int acrs[__NUM_ACRS];
+} target_s390_regs_common;
+
+typedef struct {
+unsigned int fpc;
+double   fprs[__NUM_FPRS];
+} target_s390_fp_regs;
+
+typedef struct {
+target_s390_regs_common regs;
+target_s390_fp_regs fpregs;
+} target_sigregs;
+
+struct target_sigcontext {
+target_ulong   oldmask[_SIGCONTEXT_NSIG_WORDS];
+target_sigregs *sregs;
+};
+
+typedef struct {
+uint8_t callee_used_stack[__SIGNAL_FRAMESIZE];
+struct target_sigcontext sc;
+target_sigregs sregs;
+int signo;
+uint8_t retcode[S390_SYSCALL_SIZE];
+} sigframe;
+
+struct target_ucontext {
+target_ulong tuc_flags;
+struct target_ucontext *tuc_link;
+target_stack_t tuc_stack;
+target_sigregs tuc_mcontext;
+target_sigset_t tuc_sigmask;   /* mask last for extensibility */
+};
+
+typedef struct {
+uint8_t callee_used_stack[__SIGNAL_FRAMESIZE];
+uint8_t retcode[S390_SYSCALL_SIZE];
+struct target_siginfo info;
+struct target_ucontext uc;
+} rt_sigframe;
+
+static inline abi_ulong
+get_sigframe(struct target_sigaction *ka, CPUS390XState *env, size_t 
frame_size)
+{
+abi_ulong sp;
+
+/* Default to using normal stack */
+sp = env->regs[15];
+
+/* This is the X/Open sanctioned signal stack switching.  */
+if (ka->sa_flags & TARGET_SA_ONSTACK) {
+if (!sas_ss_flags(sp)) {
+sp = target_sigaltstack_used.ss_sp +
+ target_sigaltstack_used.ss_size;
+}
+}
+
+/* This is the legacy signal stack switching. */
+else if (/* FIXME !user_mode(regs) */ 0 &&
+ !(ka->sa_flags & TARGET_SA_RESTORER) &&
+ ka->sa_restorer) {
+sp = (abi_ulong) ka->sa_restorer;
+}
+
+return (sp - frame_size) & -8ul;
+}
+
+static void save_sigregs(CPUS390XState *env, target_sigregs *sregs)
+{
+int i;
+//save_access_regs(current->thread.acrs); FIXME
+
+/* Copy a 'clean' PSW mask to the user to avoid leaking
+   information about whether PER is currently on.  */
+__put_user(env->psw.mask, >regs.psw.mask);
+__put_user(env->psw.addr, >regs.psw.addr);
+for (i = 0; i < 16; i++) {
+__put_user(env->regs[i], >regs.gprs[i]);
+}
+for (i = 0; i < 16; i++) {
+__put_user(env->aregs[i], >regs.acrs[i]);
+}
+/*
+ * We have to store the fp registers to current->thread.fp_regs
+ * to merge them with the emulated registers.
+ */
+//save_fp_regs(>thread.fp_regs); FIXME
+for (i = 0; i < 16; i++) {
+__put_user(get_freg(env, i)->ll, >fpregs.fprs[i]);
+}
+}
+
+void setup_frame(int sig, struct target_sigaction *ka,
+ target_sigset_t *set, CPUS390XState *env)
+{
+sigframe *frame;
+abi_ulong frame_addr;
+
+frame_addr = get_sigframe(ka, env, sizeof(*frame));
+trace_user_setup_frame(env, frame_addr);
+if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+goto give_sigsegv;
+}
+
+__put_user(set->sig[0], >sc.oldmask[0]);
+
+save_sigregs(env, >sregs);
+
+__put_user((abi_ulong)(unsigned long)>sregs,
+   (abi_ulong *)>sc.sregs);
+
+/* Set up to return from userspace.  If provided, use a stub
+   already in userspace.  */
+if (ka->sa_flags & 

[Qemu-devel] [PATCH v4 06/13] block: Generalize should_update_child() rule

2018-04-11 Thread Max Reitz
Currently, bdrv_replace_node() refuses to create loops from one BDS to
itself if the BDS to be replaced is the backing node of the BDS to
replace it: Say there is a node A and a node B.  Replacing B by A means
making all references to B point to A.  If B is a child of A (i.e. A has
a reference to B), that would mean we would have to make this reference
point to A itself -- so we'd create a loop.

bdrv_replace_node() (through should_update_child()) refuses to do so if
B is the backing node of A.  There is no reason why we should create
loops if B is not the backing node of A, though.  The BDS graph should
never contain loops, so we should always refuse to create them.

If B is a child of A and B is to be replaced by A, we should simply
leave B in place there because it is the most sensible choice.

A more specific argument would be: Putting filter drivers into the BDS
graph is basically the same as appending an overlay to a backing chain.
But the main child BDS of a filter driver is not "backing" but "file",
so restricting the no-loop rule to backing nodes would fail here.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
---
 include/block/block_int.h |  2 ++
 block.c   | 44 ++--
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index c4dd1d4bb8..8d63a1b0c1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -616,6 +616,8 @@ struct BdrvChild {
 QLIST_ENTRY(BdrvChild) next_parent;
 };
 
+typedef QLIST_HEAD(BdrvChildList, BdrvChild) BdrvChildList;
+
 /*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
diff --git a/block.c b/block.c
index a2caadf0a0..9294b89eb7 100644
--- a/block.c
+++ b/block.c
@@ -3383,16 +3383,39 @@ static bool should_update_child(BdrvChild *c, 
BlockDriverState *to)
 return false;
 }
 
-if (c->role == _backing) {
-/* If @from is a backing file of @to, ignore the child to avoid
- * creating a loop. We only want to change the pointer of other
- * parents. */
-QLIST_FOREACH(to_c, >children, next) {
-if (to_c == c) {
-break;
-}
-}
-if (to_c) {
+/* If the child @c belongs to the BDS @to, replacing the current
+ * c->bs by @to would mean to create a loop.
+ *
+ * Such a case occurs when appending a BDS to a backing chain.
+ * For instance, imagine the following chain:
+ *
+ *   guest device -> node A -> further backing chain...
+ *
+ * Now we create a new BDS B which we want to put on top of this
+ * chain, so we first attach A as its backing node:
+ *
+ *   node B
+ * |
+ * v
+ *   guest device -> node A -> further backing chain...
+ *
+ * Finally we want to replace A by B.  When doing that, we want to
+ * replace all pointers to A by pointers to B -- except for the
+ * pointer from B because (1) that would create a loop, and (2)
+ * that pointer should simply stay intact:
+ *
+ *   guest device -> node B
+ * |
+ * v
+ *   node A -> further backing chain...
+ *
+ * In general, when replacing a node A (c->bs) by a node B (@to),
+ * if A is a child of B, that means we cannot replace A by B there
+ * because that would create a loop.  Silently detaching A from B
+ * is also not really an option.  So overall just leaving A in
+ * place there is the most sensible choice. */
+QLIST_FOREACH(to_c, >children, next) {
+if (to_c == c) {
 return false;
 }
 }
@@ -3418,6 +3441,7 @@ void bdrv_replace_node(BlockDriverState *from, 
BlockDriverState *to,
 
 /* Put all parents into @list and calculate their cumulative permissions */
 QLIST_FOREACH_SAFE(c, >parents, next_parent, next) {
+assert(c->bs == from);
 if (!should_update_child(c, to)) {
 continue;
 }
-- 
2.14.3




[Qemu-devel] [PATCH v4 01/13] block/mirror: Pull out mirror_perform()

2018-04-11 Thread Max Reitz
When converting mirror's I/O to coroutines, we are going to need a point
where these coroutines are created.  mirror_perform() is going to be
that point.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Jeff Cody 
---
 block/mirror.c | 51 +--
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 820f512c7b..1718571766 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -82,6 +82,12 @@ typedef struct MirrorOp {
 uint64_t bytes;
 } MirrorOp;
 
+typedef enum MirrorMethod {
+MIRROR_METHOD_COPY,
+MIRROR_METHOD_ZERO,
+MIRROR_METHOD_DISCARD,
+} MirrorMethod;
+
 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
 int error)
 {
@@ -321,6 +327,22 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
 }
 }
 
+static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
+   unsigned bytes, MirrorMethod mirror_method)
+{
+switch (mirror_method) {
+case MIRROR_METHOD_COPY:
+return mirror_do_read(s, offset, bytes);
+case MIRROR_METHOD_ZERO:
+case MIRROR_METHOD_DISCARD:
+mirror_do_zero_or_discard(s, offset, bytes,
+  mirror_method == MIRROR_METHOD_DISCARD);
+return bytes;
+default:
+abort();
+}
+}
+
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
 BlockDriverState *source = s->source;
@@ -387,11 +409,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 int ret;
 int64_t io_bytes;
 int64_t io_bytes_acct;
-enum MirrorMethod {
-MIRROR_METHOD_COPY,
-MIRROR_METHOD_ZERO,
-MIRROR_METHOD_DISCARD
-} mirror_method = MIRROR_METHOD_COPY;
+MirrorMethod mirror_method = MIRROR_METHOD_COPY;
 
 assert(!(offset % s->granularity));
 ret = bdrv_block_status_above(source, NULL, offset,
@@ -429,22 +447,11 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 }
 
 io_bytes = mirror_clip_bytes(s, offset, io_bytes);
-switch (mirror_method) {
-case MIRROR_METHOD_COPY:
-io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes);
-break;
-case MIRROR_METHOD_ZERO:
-case MIRROR_METHOD_DISCARD:
-mirror_do_zero_or_discard(s, offset, io_bytes,
-  mirror_method == MIRROR_METHOD_DISCARD);
-if (write_zeroes_ok) {
-io_bytes_acct = 0;
-} else {
-io_bytes_acct = io_bytes;
-}
-break;
-default:
-abort();
+io_bytes = mirror_perform(s, offset, io_bytes, mirror_method);
+if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) {
+io_bytes_acct = 0;
+} else {
+io_bytes_acct = io_bytes;
 }
 assert(io_bytes);
 offset += io_bytes;
@@ -638,7 +645,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 continue;
 }
 
-mirror_do_zero_or_discard(s, offset, bytes, false);
+mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO);
 offset += bytes;
 }
 
-- 
2.14.3




[Qemu-devel] [PATCH for 2.13 v3 11/20] linux-user: move alpha signal.c parts to alpha directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
alpha/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---

Notes:
v3: report changes introduced by
95a29a4e3e linux-user: fix alpha signal emulation

 linux-user/alpha/signal.c| 262 +++
 linux-user/alpha/target_signal.h |   5 +
 linux-user/signal.c  | 259 --
 3 files changed, 267 insertions(+), 259 deletions(-)

diff --git a/linux-user/alpha/signal.c b/linux-user/alpha/signal.c
index 02ca338b6c..a8c718f2c6 100644
--- a/linux-user/alpha/signal.c
+++ b/linux-user/alpha/signal.c
@@ -16,3 +16,265 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+struct target_sigcontext {
+abi_long sc_onstack;
+abi_long sc_mask;
+abi_long sc_pc;
+abi_long sc_ps;
+abi_long sc_regs[32];
+abi_long sc_ownedfp;
+abi_long sc_fpregs[32];
+abi_ulong sc_fpcr;
+abi_ulong sc_fp_control;
+abi_ulong sc_reserved1;
+abi_ulong sc_reserved2;
+abi_ulong sc_ssize;
+abi_ulong sc_sbase;
+abi_ulong sc_traparg_a0;
+abi_ulong sc_traparg_a1;
+abi_ulong sc_traparg_a2;
+abi_ulong sc_fp_trap_pc;
+abi_ulong sc_fp_trigger_sum;
+abi_ulong sc_fp_trigger_inst;
+};
+
+struct target_ucontext {
+abi_ulong tuc_flags;
+abi_ulong tuc_link;
+abi_ulong tuc_osf_sigmask;
+target_stack_t tuc_stack;
+struct target_sigcontext tuc_mcontext;
+target_sigset_t tuc_sigmask;
+};
+
+struct target_sigframe {
+struct target_sigcontext sc;
+unsigned int retcode[3];
+};
+
+struct target_rt_sigframe {
+target_siginfo_t info;
+struct target_ucontext uc;
+unsigned int retcode[3];
+};
+
+#define INSN_MOV_R30_R160x47fe0410
+#define INSN_LDI_R0 0x201f
+#define INSN_CALLSYS0x0083
+
+static void setup_sigcontext(struct target_sigcontext *sc, CPUAlphaState *env,
+ abi_ulong frame_addr, target_sigset_t *set)
+{
+int i;
+
+__put_user(on_sig_stack(frame_addr), >sc_onstack);
+__put_user(set->sig[0], >sc_mask);
+__put_user(env->pc, >sc_pc);
+__put_user(8, >sc_ps);
+
+for (i = 0; i < 31; ++i) {
+__put_user(env->ir[i], >sc_regs[i]);
+}
+__put_user(0, >sc_regs[31]);
+
+for (i = 0; i < 31; ++i) {
+__put_user(env->fir[i], >sc_fpregs[i]);
+}
+__put_user(0, >sc_fpregs[31]);
+__put_user(cpu_alpha_load_fpcr(env), >sc_fpcr);
+
+__put_user(0, >sc_traparg_a0); /* FIXME */
+__put_user(0, >sc_traparg_a1); /* FIXME */
+__put_user(0, >sc_traparg_a2); /* FIXME */
+}
+
+static void restore_sigcontext(CPUAlphaState *env,
+   struct target_sigcontext *sc)
+{
+uint64_t fpcr;
+int i;
+
+__get_user(env->pc, >sc_pc);
+
+for (i = 0; i < 31; ++i) {
+__get_user(env->ir[i], >sc_regs[i]);
+}
+for (i = 0; i < 31; ++i) {
+__get_user(env->fir[i], >sc_fpregs[i]);
+}
+
+__get_user(fpcr, >sc_fpcr);
+cpu_alpha_store_fpcr(env, fpcr);
+}
+
+static inline abi_ulong get_sigframe(struct target_sigaction *sa,
+ CPUAlphaState *env,
+ unsigned long framesize)
+{
+abi_ulong sp = env->ir[IR_SP];
+
+/* This is the X/Open sanctioned signal stack switching.  */
+if ((sa->sa_flags & TARGET_SA_ONSTACK) != 0 && !sas_ss_flags(sp)) {
+sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+}
+return (sp - framesize) & -32;
+}
+
+void setup_frame(int sig, struct target_sigaction *ka,
+ target_sigset_t *set, CPUAlphaState *env)
+{
+abi_ulong frame_addr, r26;
+struct target_sigframe *frame;
+int err = 0;
+
+frame_addr = get_sigframe(ka, env, sizeof(*frame));
+trace_user_setup_frame(env, frame_addr);
+if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+goto give_sigsegv;
+}
+
+setup_sigcontext(>sc, env, frame_addr, set);
+
+if (ka->sa_restorer) {
+r26 = ka->sa_restorer;
+} else {
+__put_user(INSN_MOV_R30_R16, >retcode[0]);
+__put_user(INSN_LDI_R0 + TARGET_NR_sigreturn,
+   >retcode[1]);
+__put_user(INSN_CALLSYS, >retcode[2]);
+/* imb() */
+r26 = frame_addr + offsetof(struct target_sigframe, retcode);
+}
+
+unlock_user_struct(frame, frame_addr, 1);
+
+if (err) {
+give_sigsegv:
+force_sigsegv(sig);
+return;
+

[Qemu-devel] [PATCH v4 03/13] block/mirror: Use CoQueue to wait on in-flight ops

2018-04-11 Thread Max Reitz
Attach a CoQueue to each in-flight operation so if we need to wait for
any we can use it to wait instead of just blindly yielding and hoping
for some operation to wake us.

A later patch will use this infrastructure to allow requests accessing
the same area of the virtual disk to specifically wait for each other.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
---
 block/mirror.c | 34 +++---
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index bd8ee7d92c..1af6481876 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -13,6 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/coroutine.h"
 #include "trace.h"
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
@@ -34,6 +35,8 @@ typedef struct MirrorBuffer {
 QSIMPLEQ_ENTRY(MirrorBuffer) next;
 } MirrorBuffer;
 
+typedef struct MirrorOp MirrorOp;
+
 typedef struct MirrorBlockJob {
 BlockJob common;
 RateLimit limit;
@@ -67,15 +70,15 @@ typedef struct MirrorBlockJob {
 unsigned long *in_flight_bitmap;
 int in_flight;
 int64_t bytes_in_flight;
+QTAILQ_HEAD(MirrorOpList, MirrorOp) ops_in_flight;
 int ret;
 bool unmap;
-bool waiting_for_io;
 int target_cluster_size;
 int max_iov;
 bool initial_zeroing_ongoing;
 } MirrorBlockJob;
 
-typedef struct MirrorOp {
+struct MirrorOp {
 MirrorBlockJob *s;
 QEMUIOVector qiov;
 int64_t offset;
@@ -84,7 +87,11 @@ typedef struct MirrorOp {
 /* The pointee is set by mirror_co_read(), mirror_co_zero(), and
  * mirror_co_discard() before yielding for the first time */
 int64_t *bytes_handled;
-} MirrorOp;
+
+CoQueue waiting_requests;
+
+QTAILQ_ENTRY(MirrorOp) next;
+};
 
 typedef enum MirrorMethod {
 MIRROR_METHOD_COPY,
@@ -125,7 +132,9 @@ static void coroutine_fn mirror_iteration_done(MirrorOp 
*op, int ret)
 
 chunk_num = op->offset / s->granularity;
 nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
+
 bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
+QTAILQ_REMOVE(>ops_in_flight, op, next);
 if (ret >= 0) {
 if (s->cow_bitmap) {
 bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
@@ -135,11 +144,9 @@ static void coroutine_fn mirror_iteration_done(MirrorOp 
*op, int ret)
 }
 }
 qemu_iovec_destroy(>qiov);
-g_free(op);
 
-if (s->waiting_for_io) {
-qemu_coroutine_enter(s->common.co);
-}
+qemu_co_queue_restart_all(>waiting_requests);
+g_free(op);
 }
 
 static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret)
@@ -229,10 +236,11 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t 
*offset,
 
 static inline void mirror_wait_for_io(MirrorBlockJob *s)
 {
-assert(!s->waiting_for_io);
-s->waiting_for_io = true;
-qemu_coroutine_yield();
-s->waiting_for_io = false;
+MirrorOp *op;
+
+op = QTAILQ_FIRST(>ops_in_flight);
+assert(op);
+qemu_co_queue_wait(>waiting_requests, NULL);
 }
 
 /* Perform a mirror copy operation.
@@ -342,6 +350,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t 
offset,
 .bytes  = bytes,
 .bytes_handled  = _handled,
 };
+qemu_co_queue_init(>waiting_requests);
 
 switch (mirror_method) {
 case MIRROR_METHOD_COPY:
@@ -357,6 +366,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t 
offset,
 abort();
 }
 
+QTAILQ_INSERT_TAIL(>ops_in_flight, op, next);
 qemu_coroutine_enter(co);
 /* At this point, ownership of op has been moved to the coroutine
  * and the object may already be freed */
@@ -1283,6 +1293,8 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 }
 }
 
+QTAILQ_INIT(>ops_in_flight);
+
 trace_mirror_start(bs, s, opaque);
 block_job_start(>common);
 return;
-- 
2.14.3




[Qemu-devel] [PATCH for 2.13 v3 19/20] linux-user: move ppc/ppc64 signal.c parts to ppc directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
ppc/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/ppc/signal.c| 671 +
 linux-user/ppc/target_signal.h |   8 +-
 linux-user/signal.c| 669 
 3 files changed, 678 insertions(+), 670 deletions(-)

diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c
index 02ca338b6c..15148d54a9 100644
--- a/linux-user/ppc/signal.c
+++ b/linux-user/ppc/signal.c
@@ -16,3 +16,674 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+/* Size of dummy stack frame allocated when calling signal handler.
+   See arch/powerpc/include/asm/ptrace.h.  */
+#if defined(TARGET_PPC64)
+#define SIGNAL_FRAMESIZE 128
+#else
+#define SIGNAL_FRAMESIZE 64
+#endif
+
+/* See arch/powerpc/include/asm/ucontext.h.  Only used for 32-bit PPC;
+   on 64-bit PPC, sigcontext and mcontext are one and the same.  */
+struct target_mcontext {
+target_ulong mc_gregs[48];
+/* Includes fpscr.  */
+uint64_t mc_fregs[33];
+#if defined(TARGET_PPC64)
+/* Pointer to the vector regs */
+target_ulong v_regs;
+#else
+target_ulong mc_pad[2];
+#endif
+/* We need to handle Altivec and SPE at the same time, which no
+   kernel needs to do.  Fortunately, the kernel defines this bit to
+   be Altivec-register-large all the time, rather than trying to
+   twiddle it based on the specific platform.  */
+union {
+/* SPE vector registers.  One extra for SPEFSCR.  */
+uint32_t spe[33];
+/* Altivec vector registers.  The packing of VSCR and VRSAVE
+   varies depending on whether we're PPC64 or not: PPC64 splits
+   them apart; PPC32 stuffs them together.
+   We also need to account for the VSX registers on PPC64
+*/
+#if defined(TARGET_PPC64)
+#define QEMU_NVRREG (34 + 16)
+/* On ppc64, this mcontext structure is naturally *unaligned*,
+ * or rather it is aligned on a 8 bytes boundary but not on
+ * a 16 bytes one. This pad fixes it up. This is also why the
+ * vector regs are referenced by the v_regs pointer above so
+ * any amount of padding can be added here
+ */
+target_ulong pad;
+#else
+/* On ppc32, we are already aligned to 16 bytes */
+#define QEMU_NVRREG 33
+#endif
+/* We cannot use ppc_avr_t here as we do *not* want the implied
+ * 16-bytes alignment that would result from it. This would have
+ * the effect of making the whole struct target_mcontext aligned
+ * which breaks the layout of struct target_ucontext on ppc64.
+ */
+uint64_t altivec[QEMU_NVRREG][2];
+#undef QEMU_NVRREG
+} mc_vregs;
+};
+
+/* See arch/powerpc/include/asm/sigcontext.h.  */
+struct target_sigcontext {
+target_ulong _unused[4];
+int32_t signal;
+#if defined(TARGET_PPC64)
+int32_t pad0;
+#endif
+target_ulong handler;
+target_ulong oldmask;
+target_ulong regs;  /* struct pt_regs __user * */
+#if defined(TARGET_PPC64)
+struct target_mcontext mcontext;
+#endif
+};
+
+/* Indices for target_mcontext.mc_gregs, below.
+   See arch/powerpc/include/asm/ptrace.h for details.  */
+enum {
+TARGET_PT_R0 = 0,
+TARGET_PT_R1 = 1,
+TARGET_PT_R2 = 2,
+TARGET_PT_R3 = 3,
+TARGET_PT_R4 = 4,
+TARGET_PT_R5 = 5,
+TARGET_PT_R6 = 6,
+TARGET_PT_R7 = 7,
+TARGET_PT_R8 = 8,
+TARGET_PT_R9 = 9,
+TARGET_PT_R10 = 10,
+TARGET_PT_R11 = 11,
+TARGET_PT_R12 = 12,
+TARGET_PT_R13 = 13,
+TARGET_PT_R14 = 14,
+TARGET_PT_R15 = 15,
+TARGET_PT_R16 = 16,
+TARGET_PT_R17 = 17,
+TARGET_PT_R18 = 18,
+TARGET_PT_R19 = 19,
+TARGET_PT_R20 = 20,
+TARGET_PT_R21 = 21,
+TARGET_PT_R22 = 22,
+TARGET_PT_R23 = 23,
+TARGET_PT_R24 = 24,
+TARGET_PT_R25 = 25,
+TARGET_PT_R26 = 26,
+TARGET_PT_R27 = 27,
+TARGET_PT_R28 = 28,
+TARGET_PT_R29 = 29,
+TARGET_PT_R30 = 30,
+TARGET_PT_R31 = 31,
+TARGET_PT_NIP = 32,
+TARGET_PT_MSR = 33,
+TARGET_PT_ORIG_R3 = 34,
+TARGET_PT_CTR = 35,
+TARGET_PT_LNK = 36,
+TARGET_PT_XER = 37,
+TARGET_PT_CCR = 38,
+/* Yes, there are two registers with #39.  One is 64-bit only.  */
+TARGET_PT_MQ = 39,
+TARGET_PT_SOFTE = 39,
+TARGET_PT_TRAP = 40,
+TARGET_PT_DAR = 41,
+TARGET_PT_DSISR = 42,
+TARGET_PT_RESULT = 43,
+TARGET_PT_REGS_COUNT = 44
+};
+
+
+struct target_ucontext {
+target_ulong tuc_flags;
+target_ulong tuc_link;

[Qemu-devel] [PATCH for 2.13 v3 14/20] linux-user: move hppa signal.c parts to hppa directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
hppa/signal.c, except adding includes and
exporting setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/hppa/signal.c| 192 
 linux-user/hppa/target_signal.h |   3 +
 linux-user/signal.c | 189 ---
 3 files changed, 195 insertions(+), 189 deletions(-)

diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c
index 02ca338b6c..585af3a37f 100644
--- a/linux-user/hppa/signal.c
+++ b/linux-user/hppa/signal.c
@@ -16,3 +16,195 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+struct target_sigcontext {
+abi_ulong sc_flags;
+abi_ulong sc_gr[32];
+uint64_t sc_fr[32];
+abi_ulong sc_iasq[2];
+abi_ulong sc_iaoq[2];
+abi_ulong sc_sar;
+};
+
+struct target_ucontext {
+abi_uint tuc_flags;
+abi_ulong tuc_link;
+target_stack_t tuc_stack;
+abi_uint pad[1];
+struct target_sigcontext tuc_mcontext;
+target_sigset_t tuc_sigmask;
+};
+
+struct target_rt_sigframe {
+abi_uint tramp[9];
+target_siginfo_t info;
+struct target_ucontext uc;
+/* hidden location of upper halves of pa2.0 64-bit gregs */
+};
+
+static void setup_sigcontext(struct target_sigcontext *sc, CPUArchState *env)
+{
+int flags = 0;
+int i;
+
+/* ??? if on_sig_stack, flags |= 1 (PARISC_SC_FLAG_ONSTACK).  */
+
+if (env->iaoq_f < TARGET_PAGE_SIZE) {
+/* In the gateway page, executing a syscall.  */
+flags |= 2; /* PARISC_SC_FLAG_IN_SYSCALL */
+__put_user(env->gr[31], >sc_iaoq[0]);
+__put_user(env->gr[31] + 4, >sc_iaoq[1]);
+} else {
+__put_user(env->iaoq_f, >sc_iaoq[0]);
+__put_user(env->iaoq_b, >sc_iaoq[1]);
+}
+__put_user(0, >sc_iasq[0]);
+__put_user(0, >sc_iasq[1]);
+__put_user(flags, >sc_flags);
+
+__put_user(cpu_hppa_get_psw(env), >sc_gr[0]);
+for (i = 1; i < 32; ++i) {
+__put_user(env->gr[i], >sc_gr[i]);
+}
+
+__put_user((uint64_t)env->fr0_shadow << 32, >sc_fr[0]);
+for (i = 1; i < 32; ++i) {
+__put_user(env->fr[i], >sc_fr[i]);
+}
+
+__put_user(env->cr[CR_SAR], >sc_sar);
+}
+
+static void restore_sigcontext(CPUArchState *env, struct target_sigcontext *sc)
+{
+target_ulong psw;
+int i;
+
+__get_user(psw, >sc_gr[0]);
+cpu_hppa_put_psw(env, psw);
+
+for (i = 1; i < 32; ++i) {
+__get_user(env->gr[i], >sc_gr[i]);
+}
+for (i = 0; i < 32; ++i) {
+__get_user(env->fr[i], >sc_fr[i]);
+}
+cpu_hppa_loaded_fr0(env);
+
+__get_user(env->iaoq_f, >sc_iaoq[0]);
+__get_user(env->iaoq_b, >sc_iaoq[1]);
+__get_user(env->cr[CR_SAR], >sc_sar);
+}
+
+/* No, this doesn't look right, but it's copied straight from the kernel.  */
+#define PARISC_RT_SIGFRAME_SIZE32 \
+((sizeof(struct target_rt_sigframe) + 48 + 64) & -64)
+
+void setup_rt_frame(int sig, struct target_sigaction *ka,
+target_siginfo_t *info,
+target_sigset_t *set, CPUArchState *env)
+{
+abi_ulong frame_addr, sp, haddr;
+struct target_rt_sigframe *frame;
+int i;
+
+sp = env->gr[30];
+if (ka->sa_flags & TARGET_SA_ONSTACK) {
+if (sas_ss_flags(sp) == 0) {
+sp = (target_sigaltstack_used.ss_sp + 0x7f) & ~0x3f;
+}
+}
+frame_addr = QEMU_ALIGN_UP(sp, 64);
+sp = frame_addr + PARISC_RT_SIGFRAME_SIZE32;
+
+trace_user_setup_rt_frame(env, frame_addr);
+
+if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+goto give_sigsegv;
+}
+
+tswap_siginfo(>info, info);
+frame->uc.tuc_flags = 0;
+frame->uc.tuc_link = 0;
+
+__put_user(target_sigaltstack_used.ss_sp, >uc.tuc_stack.ss_sp);
+__put_user(sas_ss_flags(get_sp_from_cpustate(env)),
+   >uc.tuc_stack.ss_flags);
+__put_user(target_sigaltstack_used.ss_size,
+   >uc.tuc_stack.ss_size);
+
+for (i = 0; i < TARGET_NSIG_WORDS; i++) {
+__put_user(set->sig[i], >uc.tuc_sigmask.sig[i]);
+}
+
+setup_sigcontext(>uc.tuc_mcontext, env);
+
+__put_user(0x3419, frame->tramp + 0); /* ldi 0,%r25 */
+__put_user(0x3414015a, frame->tramp + 1); /* ldi __NR_rt_sigreturn,%r20 */
+__put_user(0xe4008200, frame->tramp + 2); /* be,l 0x100(%sr2,%r0) */
+__put_user(0x08000240, frame->tramp + 3); /* nop */
+
+unlock_user_struct(frame, frame_addr, 1);
+
+env->gr[2] = h2g(frame->tramp);
+env->gr[30] = sp;
+env->gr[26] = sig;
+env->gr[25] = h2g(>info);
+

[Qemu-devel] [PATCH v4 02/13] block/mirror: Convert to coroutines

2018-04-11 Thread Max Reitz
In order to talk to the source BDS (and maybe in the future to the
target BDS as well) directly, we need to convert our existing AIO
requests into coroutine I/O requests.

Signed-off-by: Max Reitz 
Reviewed-by: Fam Zheng 
---
 block/mirror.c | 152 ++---
 1 file changed, 90 insertions(+), 62 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 1718571766..bd8ee7d92c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -80,6 +80,10 @@ typedef struct MirrorOp {
 QEMUIOVector qiov;
 int64_t offset;
 uint64_t bytes;
+
+/* The pointee is set by mirror_co_read(), mirror_co_zero(), and
+ * mirror_co_discard() before yielding for the first time */
+int64_t *bytes_handled;
 } MirrorOp;
 
 typedef enum MirrorMethod {
@@ -101,7 +105,7 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob 
*s, bool read,
 }
 }
 
-static void mirror_iteration_done(MirrorOp *op, int ret)
+static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
 {
 MirrorBlockJob *s = op->s;
 struct iovec *iov;
@@ -138,9 +142,8 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
 }
 }
 
-static void mirror_write_complete(void *opaque, int ret)
+static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret)
 {
-MirrorOp *op = opaque;
 MirrorBlockJob *s = op->s;
 
 aio_context_acquire(blk_get_aio_context(s->common.blk));
@@ -157,9 +160,8 @@ static void mirror_write_complete(void *opaque, int ret)
 aio_context_release(blk_get_aio_context(s->common.blk));
 }
 
-static void mirror_read_complete(void *opaque, int ret)
+static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret)
 {
-MirrorOp *op = opaque;
 MirrorBlockJob *s = op->s;
 
 aio_context_acquire(blk_get_aio_context(s->common.blk));
@@ -174,8 +176,9 @@ static void mirror_read_complete(void *opaque, int ret)
 
 mirror_iteration_done(op, ret);
 } else {
-blk_aio_pwritev(s->target, op->offset, >qiov,
-0, mirror_write_complete, op);
+ret = blk_co_pwritev(s->target, op->offset,
+ op->qiov.size, >qiov, 0);
+mirror_write_complete(op, ret);
 }
 aio_context_release(blk_get_aio_context(s->common.blk));
 }
@@ -232,60 +235,57 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
 s->waiting_for_io = false;
 }
 
-/* Submit async read while handling COW.
- * Returns: The number of bytes copied after and including offset,
- *  excluding any bytes copied prior to offset due to alignment.
- *  This will be @bytes if no alignment is necessary, or
- *  (new_end - offset) if tail is rounded up or down due to
- *  alignment or buffer limit.
+/* Perform a mirror copy operation.
+ *
+ * *op->bytes_handled is set to the number of bytes copied after and
+ * including offset, excluding any bytes copied prior to offset due
+ * to alignment.  This will be op->bytes if no alignment is necessary,
+ * or (new_end - op->offset) if the tail is rounded up or down due to
+ * alignment or buffer limit.
  */
-static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
-   uint64_t bytes)
+static void coroutine_fn mirror_co_read(void *opaque)
 {
+MirrorOp *op = opaque;
+MirrorBlockJob *s = op->s;
 BlockBackend *source = s->common.blk;
 int nb_chunks;
 uint64_t ret;
-MirrorOp *op;
 uint64_t max_bytes;
 
 max_bytes = s->granularity * s->max_iov;
 
 /* We can only handle as much as buf_size at a time. */
-bytes = MIN(s->buf_size, MIN(max_bytes, bytes));
-assert(bytes);
-assert(bytes < BDRV_REQUEST_MAX_BYTES);
-ret = bytes;
+op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes));
+assert(op->bytes);
+assert(op->bytes < BDRV_REQUEST_MAX_BYTES);
+*op->bytes_handled = op->bytes;
 
 if (s->cow_bitmap) {
-ret += mirror_cow_align(s, , );
+*op->bytes_handled += mirror_cow_align(s, >offset, >bytes);
 }
-assert(bytes <= s->buf_size);
+/* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */
+assert(*op->bytes_handled <= UINT_MAX);
+assert(op->bytes <= s->buf_size);
 /* The offset is granularity-aligned because:
  * 1) Caller passes in aligned values;
  * 2) mirror_cow_align is used only when target cluster is larger. */
-assert(QEMU_IS_ALIGNED(offset, s->granularity));
+assert(QEMU_IS_ALIGNED(op->offset, s->granularity));
 /* The range is sector-aligned, since bdrv_getlength() rounds up. */
-assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
-nb_chunks = DIV_ROUND_UP(bytes, s->granularity);
+assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE));
+nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
 
 while (s->buf_free_count < nb_chunks) {
-trace_mirror_yield_in_flight(s, offset, s->in_flight);
+

[Qemu-devel] [PATCH for 2.13 v3 08/20] linux-user: move openrisc signal.c parts to openrisc directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
openrisc/signal.c, except adding includes and
exporting setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/openrisc/signal.c| 213 
 linux-user/openrisc/target_signal.h |   4 +-
 linux-user/signal.c | 211 ---
 3 files changed, 216 insertions(+), 212 deletions(-)

diff --git a/linux-user/openrisc/signal.c b/linux-user/openrisc/signal.c
index 02ca338b6c..0276808b59 100644
--- a/linux-user/openrisc/signal.c
+++ b/linux-user/openrisc/signal.c
@@ -16,3 +16,216 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+struct target_sigcontext {
+struct target_pt_regs regs;
+abi_ulong oldmask;
+abi_ulong usp;
+};
+
+struct target_ucontext {
+abi_ulong tuc_flags;
+abi_ulong tuc_link;
+target_stack_t tuc_stack;
+struct target_sigcontext tuc_mcontext;
+target_sigset_t tuc_sigmask;   /* mask last for extensibility */
+};
+
+struct target_rt_sigframe {
+abi_ulong pinfo;
+uint64_t puc;
+struct target_siginfo info;
+struct target_sigcontext sc;
+struct target_ucontext uc;
+unsigned char retcode[16];  /* trampoline code */
+};
+
+/* This is the asm-generic/ucontext.h version */
+#if 0
+static int restore_sigcontext(CPUOpenRISCState *regs,
+  struct target_sigcontext *sc)
+{
+unsigned int err = 0;
+unsigned long old_usp;
+
+/* Alwys make any pending restarted system call return -EINTR */
+current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+/* restore the regs from >regs (same as sc, since regs is first)
+ * (sc is already checked for VERIFY_READ since the sigframe was
+ *  checked in sys_sigreturn previously)
+ */
+
+if (copy_from_user(regs, , sizeof(struct target_pt_regs))) {
+goto badframe;
+}
+
+/* make sure the U-flag is set so user-mode cannot fool us */
+
+regs->sr &= ~SR_SM;
+
+/* restore the old USP as it was before we stacked the sc etc.
+ * (we cannot just pop the sigcontext since we aligned the sp and
+ *  stuff after pushing it)
+ */
+
+__get_user(old_usp, >usp);
+phx_signal("old_usp 0x%lx", old_usp);
+
+__PHX__ REALLY   /* ??? */
+wrusp(old_usp);
+regs->gpr[1] = old_usp;
+
+/* TODO: the other ports use regs->orig_XX to disable syscall checks
+ * after this completes, but we don't use that mechanism. maybe we can
+ * use it now ?
+ */
+
+return err;
+
+badframe:
+return 1;
+}
+#endif
+
+/* Set up a signal frame.  */
+
+static void setup_sigcontext(struct target_sigcontext *sc,
+ CPUOpenRISCState *regs,
+ unsigned long mask)
+{
+unsigned long usp = cpu_get_gpr(regs, 1);
+
+/* copy the regs. they are first in sc so we can use sc directly */
+
+/*copy_to_user(, regs, sizeof(struct target_pt_regs));*/
+
+/* Set the frametype to CRIS_FRAME_NORMAL for the execution of
+   the signal handler. The frametype will be restored to its previous
+   value in restore_sigcontext. */
+/*regs->frametype = CRIS_FRAME_NORMAL;*/
+
+/* then some other stuff */
+__put_user(mask, >oldmask);
+__put_user(usp, >usp);
+}
+
+static inline unsigned long align_sigframe(unsigned long sp)
+{
+return sp & ~3UL;
+}
+
+static inline abi_ulong get_sigframe(struct target_sigaction *ka,
+ CPUOpenRISCState *regs,
+ size_t frame_size)
+{
+unsigned long sp = cpu_get_gpr(regs, 1);
+int onsigstack = on_sig_stack(sp);
+
+/* redzone */
+/* This is the X/Open sanctioned signal stack switching.  */
+if ((ka->sa_flags & TARGET_SA_ONSTACK) != 0 && !onsigstack) {
+sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+}
+
+sp = align_sigframe(sp - frame_size);
+
+/*
+ * If we are on the alternate signal stack and would overflow it, don't.
+ * Return an always-bogus address instead so we will die with SIGSEGV.
+ */
+
+if (onsigstack && !likely(on_sig_stack(sp))) {
+return -1L;
+}
+
+return sp;
+}
+
+void setup_rt_frame(int sig, struct target_sigaction *ka,
+target_siginfo_t *info,
+target_sigset_t *set, CPUOpenRISCState *env)
+{
+int err = 0;
+abi_ulong frame_addr;
+unsigned long return_ip;
+struct target_rt_sigframe *frame;
+abi_ulong info_addr, uc_addr;
+
+frame_addr = get_sigframe(ka, env, sizeof(*frame));
+

[Qemu-devel] [PATCH for 2.13 v3 18/20] linux-user: move mips/mips64 signal.c parts to mips directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
mips/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

mips64/signal.c includes mips/signal.c

Signed-off-by: Laurent Vivier 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---

Notes:
v3: expand tabs

 linux-user/mips/signal.c  | 382 ++
 linux-user/mips/target_signal.h   |   9 +-
 linux-user/mips64/signal.c|   2 +
 linux-user/mips64/target_signal.h |   4 +-
 linux-user/signal.c   | 381 +
 5 files changed, 396 insertions(+), 382 deletions(-)

diff --git a/linux-user/mips/signal.c b/linux-user/mips/signal.c
index 02ca338b6c..adeb5a4241 100644
--- a/linux-user/mips/signal.c
+++ b/linux-user/mips/signal.c
@@ -16,3 +16,385 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+# if defined(TARGET_ABI_MIPSO32)
+struct target_sigcontext {
+uint32_t   sc_regmask; /* Unused */
+uint32_t   sc_status;
+uint64_t   sc_pc;
+uint64_t   sc_regs[32];
+uint64_t   sc_fpregs[32];
+uint32_t   sc_ownedfp; /* Unused */
+uint32_t   sc_fpc_csr;
+uint32_t   sc_fpc_eir; /* Unused */
+uint32_t   sc_used_math;
+uint32_t   sc_dsp; /* dsp status, was sc_ssflags */
+uint32_t   pad0;
+uint64_t   sc_mdhi;
+uint64_t   sc_mdlo;
+target_ulong   sc_hi1; /* Was sc_cause */
+target_ulong   sc_lo1; /* Was sc_badvaddr */
+target_ulong   sc_hi2; /* Was sc_sigset[4] */
+target_ulong   sc_lo2;
+target_ulong   sc_hi3;
+target_ulong   sc_lo3;
+};
+# else /* N32 || N64 */
+struct target_sigcontext {
+uint64_t sc_regs[32];
+uint64_t sc_fpregs[32];
+uint64_t sc_mdhi;
+uint64_t sc_hi1;
+uint64_t sc_hi2;
+uint64_t sc_hi3;
+uint64_t sc_mdlo;
+uint64_t sc_lo1;
+uint64_t sc_lo2;
+uint64_t sc_lo3;
+uint64_t sc_pc;
+uint32_t sc_fpc_csr;
+uint32_t sc_used_math;
+uint32_t sc_dsp;
+uint32_t sc_reserved;
+};
+# endif /* O32 */
+
+struct sigframe {
+uint32_t sf_ass[4]; /* argument save space for o32 */
+uint32_t sf_code[2];/* signal trampoline */
+struct target_sigcontext sf_sc;
+target_sigset_t sf_mask;
+};
+
+struct target_ucontext {
+target_ulong tuc_flags;
+target_ulong tuc_link;
+target_stack_t tuc_stack;
+target_ulong pad0;
+struct target_sigcontext tuc_mcontext;
+target_sigset_t tuc_sigmask;
+};
+
+struct target_rt_sigframe {
+uint32_t rs_ass[4];   /* argument save space for o32 */
+uint32_t rs_code[2];  /* signal trampoline */
+struct target_siginfo rs_info;
+struct target_ucontext rs_uc;
+};
+
+/* Install trampoline to jump back from signal handler */
+static inline int install_sigtramp(unsigned int *tramp,   unsigned int syscall)
+{
+int err = 0;
+
+/*
+ * Set up the return code ...
+ *
+ * li  v0, __NR__foo_sigreturn
+ * syscall
+ */
+
+__put_user(0x2402 + syscall, tramp + 0);
+__put_user(0x000c  , tramp + 1);
+return err;
+}
+
+static inline void setup_sigcontext(CPUMIPSState *regs,
+struct target_sigcontext *sc)
+{
+int i;
+
+__put_user(exception_resume_pc(regs), >sc_pc);
+regs->hflags &= ~MIPS_HFLAG_BMASK;
+
+__put_user(0, >sc_regs[0]);
+for (i = 1; i < 32; ++i) {
+__put_user(regs->active_tc.gpr[i], >sc_regs[i]);
+}
+
+__put_user(regs->active_tc.HI[0], >sc_mdhi);
+__put_user(regs->active_tc.LO[0], >sc_mdlo);
+
+/* Rather than checking for dsp existence, always copy.  The storage
+   would just be garbage otherwise.  */
+__put_user(regs->active_tc.HI[1], >sc_hi1);
+__put_user(regs->active_tc.HI[2], >sc_hi2);
+__put_user(regs->active_tc.HI[3], >sc_hi3);
+__put_user(regs->active_tc.LO[1], >sc_lo1);
+__put_user(regs->active_tc.LO[2], >sc_lo2);
+__put_user(regs->active_tc.LO[3], >sc_lo3);
+{
+uint32_t dsp = cpu_rddsp(0x3ff, regs);
+__put_user(dsp, >sc_dsp);
+}
+
+__put_user(1, >sc_used_math);
+
+for (i = 0; i < 32; ++i) {
+__put_user(regs->active_fpu.fpr[i].d, >sc_fpregs[i]);
+}
+}
+
+static inline void
+restore_sigcontext(CPUMIPSState *regs, struct target_sigcontext *sc)
+{
+int i;
+
+__get_user(regs->CP0_EPC, >sc_pc);
+
+__get_user(regs->active_tc.HI[0], >sc_mdhi);
+__get_user(regs->active_tc.LO[0], >sc_mdlo);
+
+for (i = 1; i < 32; ++i) {
+

[Qemu-devel] [PATCH for 2.13 v3 10/20] linux-user: move m68k signal.c parts to m68k directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
m68k/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/m68k/signal.c| 410 
 linux-user/m68k/target_signal.h |   6 +-
 linux-user/signal.c | 407 ---
 3 files changed, 415 insertions(+), 408 deletions(-)

diff --git a/linux-user/m68k/signal.c b/linux-user/m68k/signal.c
index 02ca338b6c..fc72468a81 100644
--- a/linux-user/m68k/signal.c
+++ b/linux-user/m68k/signal.c
@@ -16,3 +16,413 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+struct target_sigcontext {
+abi_ulong  sc_mask;
+abi_ulong  sc_usp;
+abi_ulong  sc_d0;
+abi_ulong  sc_d1;
+abi_ulong  sc_a0;
+abi_ulong  sc_a1;
+unsigned short sc_sr;
+abi_ulong  sc_pc;
+};
+
+struct target_sigframe
+{
+abi_ulong pretcode;
+int sig;
+int code;
+abi_ulong psc;
+char retcode[8];
+abi_ulong extramask[TARGET_NSIG_WORDS-1];
+struct target_sigcontext sc;
+};
+
+typedef int target_greg_t;
+#define TARGET_NGREG 18
+typedef target_greg_t target_gregset_t[TARGET_NGREG];
+
+typedef struct target_fpregset {
+int f_fpcntl[3];
+int f_fpregs[8*3];
+} target_fpregset_t;
+
+struct target_mcontext {
+int version;
+target_gregset_t gregs;
+target_fpregset_t fpregs;
+};
+
+#define TARGET_MCONTEXT_VERSION 2
+
+struct target_ucontext {
+abi_ulong tuc_flags;
+abi_ulong tuc_link;
+target_stack_t tuc_stack;
+struct target_mcontext tuc_mcontext;
+abi_long tuc_filler[80];
+target_sigset_t tuc_sigmask;
+};
+
+struct target_rt_sigframe
+{
+abi_ulong pretcode;
+int sig;
+abi_ulong pinfo;
+abi_ulong puc;
+char retcode[8];
+struct target_siginfo info;
+struct target_ucontext uc;
+};
+
+static void setup_sigcontext(struct target_sigcontext *sc, CPUM68KState *env,
+ abi_ulong mask)
+{
+uint32_t sr = (env->sr & 0xff00) | cpu_m68k_get_ccr(env);
+__put_user(mask, >sc_mask);
+__put_user(env->aregs[7], >sc_usp);
+__put_user(env->dregs[0], >sc_d0);
+__put_user(env->dregs[1], >sc_d1);
+__put_user(env->aregs[0], >sc_a0);
+__put_user(env->aregs[1], >sc_a1);
+__put_user(sr, >sc_sr);
+__put_user(env->pc, >sc_pc);
+}
+
+static void
+restore_sigcontext(CPUM68KState *env, struct target_sigcontext *sc)
+{
+int temp;
+
+__get_user(env->aregs[7], >sc_usp);
+__get_user(env->dregs[0], >sc_d0);
+__get_user(env->dregs[1], >sc_d1);
+__get_user(env->aregs[0], >sc_a0);
+__get_user(env->aregs[1], >sc_a1);
+__get_user(env->pc, >sc_pc);
+__get_user(temp, >sc_sr);
+cpu_m68k_set_ccr(env, temp);
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline abi_ulong
+get_sigframe(struct target_sigaction *ka, CPUM68KState *regs,
+ size_t frame_size)
+{
+unsigned long sp;
+
+sp = regs->aregs[7];
+
+/* This is the X/Open sanctioned signal stack switching.  */
+if ((ka->sa_flags & TARGET_SA_ONSTACK) && (sas_ss_flags (sp) == 0)) {
+sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+}
+
+return ((sp - frame_size) & -8UL);
+}
+
+void setup_frame(int sig, struct target_sigaction *ka,
+ target_sigset_t *set, CPUM68KState *env)
+{
+struct target_sigframe *frame;
+abi_ulong frame_addr;
+abi_ulong retcode_addr;
+abi_ulong sc_addr;
+int i;
+
+frame_addr = get_sigframe(ka, env, sizeof *frame);
+trace_user_setup_frame(env, frame_addr);
+if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+goto give_sigsegv;
+}
+
+__put_user(sig, >sig);
+
+sc_addr = frame_addr + offsetof(struct target_sigframe, sc);
+__put_user(sc_addr, >psc);
+
+setup_sigcontext(>sc, env, set->sig[0]);
+
+for(i = 1; i < TARGET_NSIG_WORDS; i++) {
+__put_user(set->sig[i], >extramask[i - 1]);
+}
+
+/* Set up to return from userspace.  */
+
+retcode_addr = frame_addr + offsetof(struct target_sigframe, retcode);
+__put_user(retcode_addr, >pretcode);
+
+/* moveq #,d0; trap #0 */
+
+__put_user(0x70004e40 + (TARGET_NR_sigreturn << 16),
+   (uint32_t *)(frame->retcode));
+
+/* Set up to return from userspace */
+
+env->aregs[7] = frame_addr;
+env->pc = ka->_sa_handler;
+
+unlock_user_struct(frame, frame_addr, 1);
+return;
+
+give_sigsegv:
+force_sigsegv(sig);
+}
+
+static inline void target_rt_save_fpu_state(struct target_ucontext *uc,
+

[Qemu-devel] [PATCH for 2.13 v3 20/20] linux-user: define TARGET_ARCH_HAS_SETUP_FRAME

2018-04-11 Thread Laurent Vivier
Instead of calling setup_frame() conditionally to a list of known targets,
define TARGET_ARCH_HAS_SETUP_FRAME if the target provides the function
and call it only if the macro is defined.

Move declarations of setup_frame() and setup_rt_frame() to
linux-user/signal-common.h

Signed-off-by: Laurent Vivier 
---

Notes:
v3: move declarations of functions to signal-common.h
fix comment in handle_pending_signal()

 linux-user/aarch64/target_signal.h|  6 +-
 linux-user/alpha/target_signal.h  |  6 +-
 linux-user/arm/target_signal.h|  6 +-
 linux-user/cris/target_signal.h   |  6 +-
 linux-user/hppa/target_signal.h   |  4 
 linux-user/i386/target_signal.h   |  6 +-
 linux-user/m68k/target_signal.h   |  6 +-
 linux-user/microblaze/target_signal.h |  6 +-
 linux-user/mips/target_signal.h   |  8 ++--
 linux-user/mips64/target_signal.h |  4 
 linux-user/nios2/target_signal.h  |  5 -
 linux-user/openrisc/target_signal.h   |  4 
 linux-user/ppc/target_signal.h|  6 +-
 linux-user/riscv/target_signal.h  |  4 
 linux-user/s390x/target_signal.h  |  6 +-
 linux-user/sh4/target_signal.h|  7 +--
 linux-user/signal-common.h|  7 +++
 linux-user/signal.c   | 17 +++--
 linux-user/sparc/target_signal.h  |  6 +-
 linux-user/sparc64/target_signal.h|  6 +-
 linux-user/tilegx/target_signal.h |  4 
 linux-user/x86_64/target_signal.h |  4 
 linux-user/xtensa/target_signal.h |  4 
 23 files changed, 28 insertions(+), 110 deletions(-)

diff --git a/linux-user/aarch64/target_signal.h 
b/linux-user/aarch64/target_signal.h
index af1f1320e2..0b7ae25120 100644
--- a/linux-user/aarch64/target_signal.h
+++ b/linux-user/aarch64/target_signal.h
@@ -26,9 +26,5 @@ static inline abi_ulong get_sp_from_cpustate(CPUARMState 
*state)
return state->xregs[31];
 }
 
-void setup_frame(int sig, struct target_sigaction *ka,
- target_sigset_t *set, CPUARMState *env);
-void setup_rt_frame(int sig, struct target_sigaction *ka,
-target_siginfo_t *info, target_sigset_t *set,
-CPUARMState *env);
+#define TARGET_ARCH_HAS_SETUP_FRAME
 #endif /* AARCH64_TARGET_SIGNAL_H */
diff --git a/linux-user/alpha/target_signal.h b/linux-user/alpha/target_signal.h
index 42343a1ae6..4e912e1cf9 100644
--- a/linux-user/alpha/target_signal.h
+++ b/linux-user/alpha/target_signal.h
@@ -55,9 +55,5 @@ static inline abi_ulong get_sp_from_cpustate(CPUAlphaState 
*state)
 #define TARGET_GEN_SUBRNG6 -24
 #define TARGET_GEN_SUBRNG7 -25
 
-void setup_frame(int sig, struct target_sigaction *ka,
- target_sigset_t *set, CPUAlphaState *env);
-void setup_rt_frame(int sig, struct target_sigaction *ka,
-target_siginfo_t *info,
-target_sigset_t *set, CPUAlphaState *env);
+#define TARGET_ARCH_HAS_SETUP_FRAME
 #endif /* ALPHA_TARGET_SIGNAL_H */
diff --git a/linux-user/arm/target_signal.h b/linux-user/arm/target_signal.h
index 4b542c324f..d6a03ec87d 100644
--- a/linux-user/arm/target_signal.h
+++ b/linux-user/arm/target_signal.h
@@ -26,9 +26,5 @@ static inline abi_ulong get_sp_from_cpustate(CPUARMState 
*state)
return state->regs[13];
 }
 
-void setup_frame(int usig, struct target_sigaction *ka,
- target_sigset_t *set, CPUARMState *regs);
-void setup_rt_frame(int usig, struct target_sigaction *ka,
-target_siginfo_t *info,
-target_sigset_t *set, CPUARMState *env);
+#define TARGET_ARCH_HAS_SETUP_FRAME
 #endif /* ARM_TARGET_SIGNAL_H */
diff --git a/linux-user/cris/target_signal.h b/linux-user/cris/target_signal.h
index 19c0d7b539..74ff2f3382 100644
--- a/linux-user/cris/target_signal.h
+++ b/linux-user/cris/target_signal.h
@@ -26,9 +26,5 @@ static inline abi_ulong get_sp_from_cpustate(CPUCRISState 
*state)
 return state->regs[14];
 }
 
-void setup_frame(int sig, struct target_sigaction *ka,
- target_sigset_t *set, CPUCRISState *env);
-void setup_rt_frame(int sig, struct target_sigaction *ka,
-target_siginfo_t *info,
- target_sigset_t *set, CPUCRISState *env);
+#define TARGET_ARCH_HAS_SETUP_FRAME
 #endif /* CRIS_TARGET_SIGNAL_H */
diff --git a/linux-user/hppa/target_signal.h b/linux-user/hppa/target_signal.h
index f19aed93dd..f28b4bf6e8 100644
--- a/linux-user/hppa/target_signal.h
+++ b/linux-user/hppa/target_signal.h
@@ -25,8 +25,4 @@ static inline abi_ulong get_sp_from_cpustate(CPUHPPAState 
*state)
 {
 return state->gr[30];
 }
-
-void setup_rt_frame(int sig, struct target_sigaction *ka,
-target_siginfo_t *info,
-target_sigset_t *set, CPUArchState *env);
 #endif /* HPPA_TARGET_SIGNAL_H */
diff --git a/linux-user/i386/target_signal.h b/linux-user/i386/target_signal.h
index 

[Qemu-devel] [PATCH for 2.13 v3 15/20] linux-user: move xtensa signal.c parts to xtensa directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
xtensa/signal.c, except adding includes and
exporting setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---

Notes:
v3: report changes introduced by
20ef667060 target/xtensa: fix flush_window_regs

 linux-user/signal.c   | 247 -
 linux-user/xtensa/signal.c| 250 ++
 linux-user/xtensa/target_signal.h |   3 +
 3 files changed, 253 insertions(+), 247 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 58bbb7693c..b9ad4c14a3 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -3031,253 +3031,6 @@ sigsegv:
 force_sig(TARGET_SIGSEGV);
 return -TARGET_QEMU_ESIGRETURN;
 }
-
-#elif defined(TARGET_XTENSA)
-
-struct target_sigcontext {
-abi_ulong sc_pc;
-abi_ulong sc_ps;
-abi_ulong sc_lbeg;
-abi_ulong sc_lend;
-abi_ulong sc_lcount;
-abi_ulong sc_sar;
-abi_ulong sc_acclo;
-abi_ulong sc_acchi;
-abi_ulong sc_a[16];
-abi_ulong sc_xtregs;
-};
-
-struct target_ucontext {
-abi_ulong tuc_flags;
-abi_ulong tuc_link;
-target_stack_t tuc_stack;
-struct target_sigcontext tuc_mcontext;
-target_sigset_t tuc_sigmask;
-};
-
-struct target_rt_sigframe {
-target_siginfo_t info;
-struct target_ucontext uc;
-/* TODO: xtregs */
-uint8_t retcode[6];
-abi_ulong window[4];
-};
-
-static abi_ulong get_sigframe(struct target_sigaction *sa,
-  CPUXtensaState *env,
-  unsigned long framesize)
-{
-abi_ulong sp = env->regs[1];
-
-/* This is the X/Open sanctioned signal stack switching.  */
-if ((sa->sa_flags & TARGET_SA_ONSTACK) != 0 && !sas_ss_flags(sp)) {
-sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
-}
-return (sp - framesize) & -16;
-}
-
-static int flush_window_regs(CPUXtensaState *env)
-{
-uint32_t wb = env->sregs[WINDOW_BASE];
-uint32_t ws = xtensa_replicate_windowstart(env) >> (wb + 1);
-unsigned d = ctz32(ws) + 1;
-unsigned i;
-int ret = 0;
-
-for (i = d; i < env->config->nareg / 4; i += d) {
-uint32_t ssp, osp;
-unsigned j;
-
-ws >>= d;
-xtensa_rotate_window(env, d);
-
-if (ws & 0x1) {
-ssp = env->regs[5];
-d = 1;
-} else if (ws & 0x2) {
-ssp = env->regs[9];
-ret |= get_user_ual(osp, env->regs[1] - 12);
-osp -= 32;
-d = 2;
-} else if (ws & 0x4) {
-ssp = env->regs[13];
-ret |= get_user_ual(osp, env->regs[1] - 12);
-osp -= 48;
-d = 3;
-} else {
-g_assert_not_reached();
-}
-
-for (j = 0; j < 4; ++j) {
-ret |= put_user_ual(env->regs[j], ssp - 16 + j * 4);
-}
-for (j = 4; j < d * 4; ++j) {
-ret |= put_user_ual(env->regs[j], osp - 16 + j * 4);
-}
-}
-xtensa_rotate_window(env, d);
-g_assert(env->sregs[WINDOW_BASE] == wb);
-return ret == 0;
-}
-
-static int setup_sigcontext(struct target_rt_sigframe *frame,
-CPUXtensaState *env)
-{
-struct target_sigcontext *sc = >uc.tuc_mcontext;
-int i;
-
-__put_user(env->pc, >sc_pc);
-__put_user(env->sregs[PS], >sc_ps);
-__put_user(env->sregs[LBEG], >sc_lbeg);
-__put_user(env->sregs[LEND], >sc_lend);
-__put_user(env->sregs[LCOUNT], >sc_lcount);
-if (!flush_window_regs(env)) {
-return 0;
-}
-for (i = 0; i < 16; ++i) {
-__put_user(env->regs[i], sc->sc_a + i);
-}
-__put_user(0, >sc_xtregs);
-/* TODO: xtregs */
-return 1;
-}
-
-static void setup_rt_frame(int sig, struct target_sigaction *ka,
-   target_siginfo_t *info,
-   target_sigset_t *set, CPUXtensaState *env)
-{
-abi_ulong frame_addr;
-struct target_rt_sigframe *frame;
-uint32_t ra;
-int i;
-
-frame_addr = get_sigframe(ka, env, sizeof(*frame));
-trace_user_setup_rt_frame(env, frame_addr);
-
-if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-goto give_sigsegv;
-}
-
-if (ka->sa_flags & SA_SIGINFO) {
-tswap_siginfo(>info, info);
-}
-
-__put_user(0, >uc.tuc_flags);
-__put_user(0, >uc.tuc_link);
-__put_user(target_sigaltstack_used.ss_sp,
-   >uc.tuc_stack.ss_sp);
-__put_user(sas_ss_flags(env->regs[1]),
-   >uc.tuc_stack.ss_flags);
-__put_user(target_sigaltstack_used.ss_size,
-   >uc.tuc_stack.ss_size);
-if (!setup_sigcontext(frame, env)) {
-unlock_user_struct(frame, frame_addr, 0);
-goto give_sigsegv;
-}
-for (i = 0; i < TARGET_NSIG_WORDS; ++i) {
-

[Qemu-devel] [PATCH for 2.13 v3 06/20] linux-user: move cris signal.c parts to cris directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
cris/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/cris/signal.c| 171 
 linux-user/cris/target_signal.h |   6 +-
 linux-user/signal.c | 168 ---
 3 files changed, 176 insertions(+), 169 deletions(-)

diff --git a/linux-user/cris/signal.c b/linux-user/cris/signal.c
index 02ca338b6c..322d9db1a7 100644
--- a/linux-user/cris/signal.c
+++ b/linux-user/cris/signal.c
@@ -16,3 +16,174 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+struct target_sigcontext {
+struct target_pt_regs regs;  /* needs to be first */
+uint32_t oldmask;
+uint32_t usp;/* usp before stacking this gunk on it */
+};
+
+/* Signal frames. */
+struct target_signal_frame {
+struct target_sigcontext sc;
+uint32_t extramask[TARGET_NSIG_WORDS - 1];
+uint16_t retcode[4];  /* Trampoline code. */
+};
+
+struct rt_signal_frame {
+siginfo_t *pinfo;
+void *puc;
+siginfo_t info;
+ucontext_t uc;
+uint16_t retcode[4];  /* Trampoline code. */
+};
+
+static void setup_sigcontext(struct target_sigcontext *sc, CPUCRISState *env)
+{
+__put_user(env->regs[0], >regs.r0);
+__put_user(env->regs[1], >regs.r1);
+__put_user(env->regs[2], >regs.r2);
+__put_user(env->regs[3], >regs.r3);
+__put_user(env->regs[4], >regs.r4);
+__put_user(env->regs[5], >regs.r5);
+__put_user(env->regs[6], >regs.r6);
+__put_user(env->regs[7], >regs.r7);
+__put_user(env->regs[8], >regs.r8);
+__put_user(env->regs[9], >regs.r9);
+__put_user(env->regs[10], >regs.r10);
+__put_user(env->regs[11], >regs.r11);
+__put_user(env->regs[12], >regs.r12);
+__put_user(env->regs[13], >regs.r13);
+__put_user(env->regs[14], >usp);
+__put_user(env->regs[15], >regs.acr);
+__put_user(env->pregs[PR_MOF], >regs.mof);
+__put_user(env->pregs[PR_SRP], >regs.srp);
+__put_user(env->pc, >regs.erp);
+}
+
+static void restore_sigcontext(struct target_sigcontext *sc, CPUCRISState *env)
+{
+__get_user(env->regs[0], >regs.r0);
+__get_user(env->regs[1], >regs.r1);
+__get_user(env->regs[2], >regs.r2);
+__get_user(env->regs[3], >regs.r3);
+__get_user(env->regs[4], >regs.r4);
+__get_user(env->regs[5], >regs.r5);
+__get_user(env->regs[6], >regs.r6);
+__get_user(env->regs[7], >regs.r7);
+__get_user(env->regs[8], >regs.r8);
+__get_user(env->regs[9], >regs.r9);
+__get_user(env->regs[10], >regs.r10);
+__get_user(env->regs[11], >regs.r11);
+__get_user(env->regs[12], >regs.r12);
+__get_user(env->regs[13], >regs.r13);
+__get_user(env->regs[14], >usp);
+__get_user(env->regs[15], >regs.acr);
+__get_user(env->pregs[PR_MOF], >regs.mof);
+__get_user(env->pregs[PR_SRP], >regs.srp);
+__get_user(env->pc, >regs.erp);
+}
+
+static abi_ulong get_sigframe(CPUCRISState *env, int framesize)
+{
+abi_ulong sp;
+/* Align the stack downwards to 4.  */
+sp = (env->regs[R_SP] & ~3);
+return sp - framesize;
+}
+
+void setup_frame(int sig, struct target_sigaction *ka,
+ target_sigset_t *set, CPUCRISState *env)
+{
+struct target_signal_frame *frame;
+abi_ulong frame_addr;
+int i;
+
+frame_addr = get_sigframe(env, sizeof *frame);
+trace_user_setup_frame(env, frame_addr);
+if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
+goto badframe;
+
+/*
+ * The CRIS signal return trampoline. A real linux/CRIS kernel doesn't
+ * use this trampoline anymore but it sets it up for GDB.
+ * In QEMU, using the trampoline simplifies things a bit so we use it.
+ *
+ * This is movu.w __NR_sigreturn, r9; break 13;
+ */
+__put_user(0x9c5f, frame->retcode+0);
+__put_user(TARGET_NR_sigreturn,
+   frame->retcode + 1);
+__put_user(0xe93d, frame->retcode + 2);
+
+/* Save the mask.  */
+__put_user(set->sig[0], >sc.oldmask);
+
+for(i = 1; i < TARGET_NSIG_WORDS; i++) {
+__put_user(set->sig[i], >extramask[i - 1]);
+}
+
+setup_sigcontext(>sc, env);
+
+/* Move the stack and setup the arguments for the handler.  */
+env->regs[R_SP] = frame_addr;
+env->regs[10] = sig;
+env->pc = (unsigned long) ka->_sa_handler;
+/* Link SRP so the guest returns through the trampoline.  */
+env->pregs[PR_SRP] = frame_addr + offsetof(typeof(*frame), retcode);
+
+unlock_user_struct(frame, frame_addr, 1);
+return;
+badframe:
+

[Qemu-devel] [PATCH for 2.13 v3 05/20] linux-user: move microblaze signal.c parts to microblaze directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
microblaze/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/microblaze/signal.c| 230 ++
 linux-user/microblaze/target_signal.h |   6 +-
 linux-user/signal.c   | 227 -
 3 files changed, 235 insertions(+), 228 deletions(-)

diff --git a/linux-user/microblaze/signal.c b/linux-user/microblaze/signal.c
index 02ca338b6c..5572baa7dc 100644
--- a/linux-user/microblaze/signal.c
+++ b/linux-user/microblaze/signal.c
@@ -16,3 +16,233 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+struct target_sigcontext {
+struct target_pt_regs regs;  /* needs to be first */
+uint32_t oldmask;
+};
+
+struct target_stack_t {
+abi_ulong ss_sp;
+int ss_flags;
+unsigned int ss_size;
+};
+
+struct target_ucontext {
+abi_ulong tuc_flags;
+abi_ulong tuc_link;
+struct target_stack_t tuc_stack;
+struct target_sigcontext tuc_mcontext;
+uint32_t tuc_extramask[TARGET_NSIG_WORDS - 1];
+};
+
+/* Signal frames. */
+struct target_signal_frame {
+struct target_ucontext uc;
+uint32_t extramask[TARGET_NSIG_WORDS - 1];
+uint32_t tramp[2];
+};
+
+struct rt_signal_frame {
+siginfo_t info;
+ucontext_t uc;
+uint32_t tramp[2];
+};
+
+static void setup_sigcontext(struct target_sigcontext *sc, CPUMBState *env)
+{
+__put_user(env->regs[0], >regs.r0);
+__put_user(env->regs[1], >regs.r1);
+__put_user(env->regs[2], >regs.r2);
+__put_user(env->regs[3], >regs.r3);
+__put_user(env->regs[4], >regs.r4);
+__put_user(env->regs[5], >regs.r5);
+__put_user(env->regs[6], >regs.r6);
+__put_user(env->regs[7], >regs.r7);
+__put_user(env->regs[8], >regs.r8);
+__put_user(env->regs[9], >regs.r9);
+__put_user(env->regs[10], >regs.r10);
+__put_user(env->regs[11], >regs.r11);
+__put_user(env->regs[12], >regs.r12);
+__put_user(env->regs[13], >regs.r13);
+__put_user(env->regs[14], >regs.r14);
+__put_user(env->regs[15], >regs.r15);
+__put_user(env->regs[16], >regs.r16);
+__put_user(env->regs[17], >regs.r17);
+__put_user(env->regs[18], >regs.r18);
+__put_user(env->regs[19], >regs.r19);
+__put_user(env->regs[20], >regs.r20);
+__put_user(env->regs[21], >regs.r21);
+__put_user(env->regs[22], >regs.r22);
+__put_user(env->regs[23], >regs.r23);
+__put_user(env->regs[24], >regs.r24);
+__put_user(env->regs[25], >regs.r25);
+__put_user(env->regs[26], >regs.r26);
+__put_user(env->regs[27], >regs.r27);
+__put_user(env->regs[28], >regs.r28);
+__put_user(env->regs[29], >regs.r29);
+__put_user(env->regs[30], >regs.r30);
+__put_user(env->regs[31], >regs.r31);
+__put_user(env->sregs[SR_PC], >regs.pc);
+}
+
+static void restore_sigcontext(struct target_sigcontext *sc, CPUMBState *env)
+{
+__get_user(env->regs[0], >regs.r0);
+__get_user(env->regs[1], >regs.r1);
+__get_user(env->regs[2], >regs.r2);
+__get_user(env->regs[3], >regs.r3);
+__get_user(env->regs[4], >regs.r4);
+__get_user(env->regs[5], >regs.r5);
+__get_user(env->regs[6], >regs.r6);
+__get_user(env->regs[7], >regs.r7);
+__get_user(env->regs[8], >regs.r8);
+__get_user(env->regs[9], >regs.r9);
+__get_user(env->regs[10], >regs.r10);
+__get_user(env->regs[11], >regs.r11);
+__get_user(env->regs[12], >regs.r12);
+__get_user(env->regs[13], >regs.r13);
+__get_user(env->regs[14], >regs.r14);
+__get_user(env->regs[15], >regs.r15);
+__get_user(env->regs[16], >regs.r16);
+__get_user(env->regs[17], >regs.r17);
+__get_user(env->regs[18], >regs.r18);
+__get_user(env->regs[19], >regs.r19);
+__get_user(env->regs[20], >regs.r20);
+__get_user(env->regs[21], >regs.r21);
+__get_user(env->regs[22], >regs.r22);
+__get_user(env->regs[23], >regs.r23);
+__get_user(env->regs[24], >regs.r24);
+__get_user(env->regs[25], >regs.r25);
+__get_user(env->regs[26], >regs.r26);
+__get_user(env->regs[27], >regs.r27);
+__get_user(env->regs[28], >regs.r28);
+__get_user(env->regs[29], >regs.r29);
+__get_user(env->regs[30], >regs.r30);
+__get_user(env->regs[31], >regs.r31);
+__get_user(env->sregs[SR_PC], >regs.pc);
+}
+
+static abi_ulong get_sigframe(struct target_sigaction *ka,
+  CPUMBState *env, int frame_size)
+{
+abi_ulong sp = env->regs[1];
+
+if ((ka->sa_flags & TARGET_SA_ONSTACK) != 0 && !on_sig_stack(sp)) {
+sp = 

[Qemu-devel] [PATCH for 2.13 v3 16/20] linux-user: move i386/x86_64 signal.c parts to i386 directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
i386/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

x86_64/signal.c includes i386/signal.c

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---

Notes:
v3: expand tabs

 linux-user/i386/signal.c  | 584 ++
 linux-user/i386/target_signal.h   |   5 +
 linux-user/signal.c   | 582 +
 linux-user/x86_64/signal.c|   2 +
 linux-user/x86_64/target_signal.h |   3 +
 5 files changed, 595 insertions(+), 581 deletions(-)

diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 02ca338b6c..4a190e6435 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -16,3 +16,587 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+/* from the Linux kernel - /arch/x86/include/uapi/asm/sigcontext.h */
+
+struct target_fpreg {
+uint16_t significand[4];
+uint16_t exponent;
+};
+
+struct target_fpxreg {
+uint16_t significand[4];
+uint16_t exponent;
+uint16_t padding[3];
+};
+
+struct target_xmmreg {
+uint32_t element[4];
+};
+
+struct target_fpstate_32 {
+/* Regular FPU environment */
+uint32_t cw;
+uint32_t sw;
+uint32_t tag;
+uint32_t ipoff;
+uint32_t cssel;
+uint32_t dataoff;
+uint32_t datasel;
+struct target_fpreg st[8];
+uint16_t  status;
+uint16_t  magic;  /* 0x = regular FPU data only */
+
+/* FXSR FPU environment */
+uint32_t _fxsr_env[6];   /* FXSR FPU env is ignored */
+uint32_t mxcsr;
+uint32_t reserved;
+struct target_fpxreg fxsr_st[8]; /* FXSR FPU reg data is ignored */
+struct target_xmmreg xmm[8];
+uint32_t padding[56];
+};
+
+struct target_fpstate_64 {
+/* FXSAVE format */
+uint16_t cw;
+uint16_t sw;
+uint16_t twd;
+uint16_t fop;
+uint64_t rip;
+uint64_t rdp;
+uint32_t mxcsr;
+uint32_t mxcsr_mask;
+uint32_t st_space[32];
+uint32_t xmm_space[64];
+uint32_t reserved[24];
+};
+
+#ifndef TARGET_X86_64
+# define target_fpstate target_fpstate_32
+#else
+# define target_fpstate target_fpstate_64
+#endif
+
+struct target_sigcontext_32 {
+uint16_t gs, __gsh;
+uint16_t fs, __fsh;
+uint16_t es, __esh;
+uint16_t ds, __dsh;
+uint32_t edi;
+uint32_t esi;
+uint32_t ebp;
+uint32_t esp;
+uint32_t ebx;
+uint32_t edx;
+uint32_t ecx;
+uint32_t eax;
+uint32_t trapno;
+uint32_t err;
+uint32_t eip;
+uint16_t cs, __csh;
+uint32_t eflags;
+uint32_t esp_at_signal;
+uint16_t ss, __ssh;
+uint32_t fpstate; /* pointer */
+uint32_t oldmask;
+uint32_t cr2;
+};
+
+struct target_sigcontext_64 {
+uint64_t r8;
+uint64_t r9;
+uint64_t r10;
+uint64_t r11;
+uint64_t r12;
+uint64_t r13;
+uint64_t r14;
+uint64_t r15;
+
+uint64_t rdi;
+uint64_t rsi;
+uint64_t rbp;
+uint64_t rbx;
+uint64_t rdx;
+uint64_t rax;
+uint64_t rcx;
+uint64_t rsp;
+uint64_t rip;
+
+uint64_t eflags;
+
+uint16_t cs;
+uint16_t gs;
+uint16_t fs;
+uint16_t ss;
+
+uint64_t err;
+uint64_t trapno;
+uint64_t oldmask;
+uint64_t cr2;
+
+uint64_t fpstate; /* pointer */
+uint64_t padding[8];
+};
+
+#ifndef TARGET_X86_64
+# define target_sigcontext target_sigcontext_32
+#else
+# define target_sigcontext target_sigcontext_64
+#endif
+
+/* see Linux/include/uapi/asm-generic/ucontext.h */
+struct target_ucontext {
+abi_ulong tuc_flags;
+abi_ulong tuc_link;
+target_stack_ttuc_stack;
+struct target_sigcontext tuc_mcontext;
+target_sigset_t   tuc_sigmask;  /* mask last for extensibility */
+};
+
+#ifndef TARGET_X86_64
+struct sigframe {
+abi_ulong pretcode;
+int sig;
+struct target_sigcontext sc;
+struct target_fpstate fpstate;
+abi_ulong extramask[TARGET_NSIG_WORDS-1];
+char retcode[8];
+};
+
+struct rt_sigframe {
+abi_ulong pretcode;
+int sig;
+abi_ulong pinfo;
+abi_ulong puc;
+struct target_siginfo info;
+struct target_ucontext uc;
+struct target_fpstate fpstate;
+char retcode[8];
+};
+
+#else
+
+struct rt_sigframe {
+abi_ulong pretcode;
+struct target_ucontext uc;
+struct target_siginfo info;
+struct target_fpstate fpstate;
+};
+
+#endif
+
+/*
+ * Set up a signal frame.
+ */
+
+/* XXX: save x87 state */
+static void setup_sigcontext(struct target_sigcontext *sc,
+struct target_fpstate *fpstate, CPUX86State *env, abi_ulong mask,
+abi_ulong fpstate_addr)
+{
+CPUState 

[Qemu-devel] [PATCH for 2.13 v3 17/20] linux-user: move sparc/sparc64 signal.c parts to sparc directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
sparc/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

sparc64/signal.c includes sparc/signal.c

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---

Notes:
v3: expand tabs
report changes introduced by
5de154e82f linux-user: define TARGET_ARCH_HAS_KA_RESTORER

 linux-user/signal.c| 604 +---
 linux-user/sparc/signal.c  | 606 +
 linux-user/sparc/target_signal.h   |   6 +-
 linux-user/sparc64/signal.c|   2 +
 linux-user/sparc64/target_signal.h |   6 +-
 5 files changed, 619 insertions(+), 605 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 830f778262..27c3769c5e 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -803,609 +803,7 @@ int do_sigaction(int sig, const struct target_sigaction 
*act,
 return ret;
 }
 
-#if defined(TARGET_SPARC)
-
-#define __SUNOS_MAXWIN   31
-
-/* This is what SunOS does, so shall I. */
-struct target_sigcontext {
-abi_ulong sigc_onstack;  /* state to restore */
-
-abi_ulong sigc_mask; /* sigmask to restore */
-abi_ulong sigc_sp;   /* stack pointer */
-abi_ulong sigc_pc;   /* program counter */
-abi_ulong sigc_npc;  /* next program counter */
-abi_ulong sigc_psr;  /* for condition codes etc */
-abi_ulong sigc_g1;   /* User uses these two registers */
-abi_ulong sigc_o0;   /* within the trampoline code. */
-
-/* Now comes information regarding the users window set
- * at the time of the signal.
- */
-abi_ulong sigc_oswins;   /* outstanding windows */
-
-/* stack ptrs for each regwin buf */
-char *sigc_spbuf[__SUNOS_MAXWIN];
-
-/* Windows to restore after signal */
-struct {
-abi_ulong locals[8];
-abi_ulong ins[8];
-} sigc_wbuf[__SUNOS_MAXWIN];
-};
-/* A Sparc stack frame */
-struct sparc_stackf {
-abi_ulong locals[8];
-abi_ulong ins[8];
-/* It's simpler to treat fp and callers_pc as elements of ins[]
- * since we never need to access them ourselves.
- */
-char *structptr;
-abi_ulong xargs[6];
-abi_ulong xxargs[1];
-};
-
-typedef struct {
-struct {
-abi_ulong psr;
-abi_ulong pc;
-abi_ulong npc;
-abi_ulong y;
-abi_ulong u_regs[16]; /* globals and ins */
-}   si_regs;
-int si_mask;
-} __siginfo_t;
-
-typedef struct {
-abi_ulong  si_float_regs[32];
-unsigned   long si_fsr;
-unsigned   long si_fpqdepth;
-struct {
-unsigned long *insn_addr;
-unsigned long insn;
-} si_fpqueue [16];
-} qemu_siginfo_fpu_t;
-
-
-struct target_signal_frame {
-struct sparc_stackf ss;
-__siginfo_t info;
-abi_ulong   fpu_save;
-abi_ulong   insns[2] __attribute__ ((aligned (8)));
-abi_ulong   extramask[TARGET_NSIG_WORDS - 1];
-abi_ulong   extra_size; /* Should be 0 */
-qemu_siginfo_fpu_t fpu_state;
-};
-struct target_rt_signal_frame {
-struct sparc_stackf ss;
-siginfo_t   info;
-abi_ulong   regs[20];
-sigset_tmask;
-abi_ulong   fpu_save;
-unsigned intinsns[2];
-stack_t stack;
-unsigned intextra_size; /* Should be 0 */
-qemu_siginfo_fpu_t  fpu_state;
-};
-
-#define UREG_O016
-#define UREG_O622
-#define UREG_I00
-#define UREG_I11
-#define UREG_I22
-#define UREG_I33
-#define UREG_I44
-#define UREG_I55
-#define UREG_I66
-#define UREG_I77
-#define UREG_L0   8
-#define UREG_FPUREG_I6
-#define UREG_SPUREG_O6
-
-static inline abi_ulong get_sigframe(struct target_sigaction *sa, 
- CPUSPARCState *env,
- unsigned long framesize)
-{
-abi_ulong sp;
-
-sp = env->regwptr[UREG_FP];
-
-/* This is the X/Open sanctioned signal stack switching.  */
-if (sa->sa_flags & TARGET_SA_ONSTACK) {
-if (!on_sig_stack(sp)
-&& !((target_sigaltstack_used.ss_sp + 
target_sigaltstack_used.ss_size) & 7)) {
-sp = target_sigaltstack_used.ss_sp + 
target_sigaltstack_used.ss_size;
-}
-}
-return sp - framesize;
-}
-
-static int
-setup___siginfo(__siginfo_t *si, CPUSPARCState *env, abi_ulong mask)
-{
-int err = 0, i;
-
-__put_user(env->psr, >si_regs.psr);
-__put_user(env->pc, >si_regs.pc);
-__put_user(env->npc, >si_regs.npc);
-__put_user(env->y, >si_regs.y);
-for (i=0; i < 8; i++) {
-__put_user(env->gregs[i], >si_regs.u_regs[i]);
-}
-for (i=0; i < 8; i++) 

[Qemu-devel] [PATCH for 2.13 v3 12/20] linux-user: move tilegx signal.c parts to tilegx directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
tilegx/signal.c, except adding includes and
exporting setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/signal.c   | 165 -
 linux-user/tilegx/signal.c| 168 ++
 linux-user/tilegx/target_signal.h |   4 +-
 3 files changed, 171 insertions(+), 166 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 7c2a963e7c..5a3e5bff5e 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -3032,171 +3032,6 @@ sigsegv:
 return -TARGET_QEMU_ESIGRETURN;
 }
 
-#elif defined(TARGET_TILEGX)
-
-struct target_sigcontext {
-union {
-/* General-purpose registers.  */
-abi_ulong gregs[56];
-struct {
-abi_ulong __gregs[53];
-abi_ulong tp;/* Aliases gregs[TREG_TP].  */
-abi_ulong sp;/* Aliases gregs[TREG_SP].  */
-abi_ulong lr;/* Aliases gregs[TREG_LR].  */
-};
-};
-abi_ulong pc;/* Program counter.  */
-abi_ulong ics;   /* In Interrupt Critical Section?  */
-abi_ulong faultnum;  /* Fault number.  */
-abi_ulong pad[5];
-};
-
-struct target_ucontext {
-abi_ulong tuc_flags;
-abi_ulong tuc_link;
-target_stack_t tuc_stack;
-struct target_sigcontext tuc_mcontext;
-target_sigset_t tuc_sigmask;   /* mask last for extensibility */
-};
-
-struct target_rt_sigframe {
-unsigned char save_area[16]; /* caller save area */
-struct target_siginfo info;
-struct target_ucontext uc;
-abi_ulong retcode[2];
-};
-
-#define INSN_MOVELI_R10_139  0x00045fe551483000ULL /* { moveli r10, 139 } */
-#define INSN_SWINT1  0x286b180051485000ULL /* { swint1 } */
-
-
-static void setup_sigcontext(struct target_sigcontext *sc,
- CPUArchState *env, int signo)
-{
-int i;
-
-for (i = 0; i < TILEGX_R_COUNT; ++i) {
-__put_user(env->regs[i], >gregs[i]);
-}
-
-__put_user(env->pc, >pc);
-__put_user(0, >ics);
-__put_user(signo, >faultnum);
-}
-
-static void restore_sigcontext(CPUTLGState *env, struct target_sigcontext *sc)
-{
-int i;
-
-for (i = 0; i < TILEGX_R_COUNT; ++i) {
-__get_user(env->regs[i], >gregs[i]);
-}
-
-__get_user(env->pc, >pc);
-}
-
-static abi_ulong get_sigframe(struct target_sigaction *ka, CPUArchState *env,
-  size_t frame_size)
-{
-unsigned long sp = env->regs[TILEGX_R_SP];
-
-if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) {
-return -1UL;
-}
-
-if ((ka->sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) {
-sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
-}
-
-sp -= frame_size;
-sp &= -16UL;
-return sp;
-}
-
-static void setup_rt_frame(int sig, struct target_sigaction *ka,
-   target_siginfo_t *info,
-   target_sigset_t *set, CPUArchState *env)
-{
-abi_ulong frame_addr;
-struct target_rt_sigframe *frame;
-unsigned long restorer;
-
-frame_addr = get_sigframe(ka, env, sizeof(*frame));
-trace_user_setup_rt_frame(env, frame_addr);
-if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-goto give_sigsegv;
-}
-
-/* Always write at least the signal number for the stack backtracer. */
-if (ka->sa_flags & TARGET_SA_SIGINFO) {
-/* At sigreturn time, restore the callee-save registers too. */
-tswap_siginfo(>info, info);
-/* regs->flags |= PT_FLAGS_RESTORE_REGS; FIXME: we can skip it? */
-} else {
-__put_user(info->si_signo, >info.si_signo);
-}
-
-/* Create the ucontext.  */
-__put_user(0, >uc.tuc_flags);
-__put_user(0, >uc.tuc_link);
-__put_user(target_sigaltstack_used.ss_sp, >uc.tuc_stack.ss_sp);
-__put_user(sas_ss_flags(env->regs[TILEGX_R_SP]),
-   >uc.tuc_stack.ss_flags);
-__put_user(target_sigaltstack_used.ss_size, >uc.tuc_stack.ss_size);
-setup_sigcontext(>uc.tuc_mcontext, env, info->si_signo);
-
-if (ka->sa_flags & TARGET_SA_RESTORER) {
-restorer = (unsigned long) ka->sa_restorer;
-} else {
-__put_user(INSN_MOVELI_R10_139, >retcode[0]);
-__put_user(INSN_SWINT1, >retcode[1]);
-restorer = frame_addr + offsetof(struct target_rt_sigframe, retcode);
-}
-env->pc = (unsigned long) ka->_sa_handler;
-env->regs[TILEGX_R_SP] = (unsigned long) frame;
-env->regs[TILEGX_R_LR] = restorer;
-env->regs[0] = (unsigned long) sig;
-env->regs[1] = (unsigned long) >info;
-env->regs[2] = (unsigned long) >uc;
-/* regs->flags |= PT_FLAGS_CALLER_SAVES; FIXME: we can skip it? */
-
-unlock_user_struct(frame, frame_addr, 1);
-return;
-
-give_sigsegv:
-

[Qemu-devel] [PATCH for 2.13 v3 13/20] linux-user: move riscv signal.c parts to riscv directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
riscv/signal.c, except adding includes and
exporting setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 linux-user/riscv/signal.c| 200 +++
 linux-user/riscv/target_signal.h |   3 +
 linux-user/signal.c  | 197 --
 3 files changed, 203 insertions(+), 197 deletions(-)

diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
index 02ca338b6c..718f3a5679 100644
--- a/linux-user/riscv/signal.c
+++ b/linux-user/riscv/signal.c
@@ -16,3 +16,203 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+/* Signal handler invocation must be transparent for the code being
+   interrupted. Complete CPU (hart) state is saved on entry and restored
+   before returning from the handler. Process sigmask is also saved to block
+   signals while the handler is running. The handler gets its own stack,
+   which also doubles as storage for the CPU state and sigmask.
+
+   The code below is qemu re-implementation of arch/riscv/kernel/signal.c */
+
+struct target_sigcontext {
+abi_long pc;
+abi_long gpr[31]; /* x0 is not present, so all offsets must be -1 */
+uint64_t fpr[32];
+uint32_t fcsr;
+}; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */
+
+struct target_ucontext {
+unsigned long uc_flags;
+struct target_ucontext *uc_link;
+target_stack_t uc_stack;
+struct target_sigcontext uc_mcontext;
+target_sigset_t uc_sigmask;
+};
+
+struct target_rt_sigframe {
+uint32_t tramp[2]; /* not in kernel, which uses VDSO instead */
+struct target_siginfo info;
+struct target_ucontext uc;
+};
+
+static abi_ulong get_sigframe(struct target_sigaction *ka,
+  CPURISCVState *regs, size_t framesize)
+{
+abi_ulong sp = regs->gpr[xSP];
+int onsigstack = on_sig_stack(sp);
+
+/* redzone */
+/* This is the X/Open sanctioned signal stack switching.  */
+if ((ka->sa_flags & TARGET_SA_ONSTACK) != 0 && !onsigstack) {
+sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+}
+
+sp -= framesize;
+sp &= ~3UL; /* align sp on 4-byte boundary */
+
+/* If we are on the alternate signal stack and would overflow it, don't.
+   Return an always-bogus address instead so we will die with SIGSEGV. */
+if (onsigstack && !likely(on_sig_stack(sp))) {
+return -1L;
+}
+
+return sp;
+}
+
+static void setup_sigcontext(struct target_sigcontext *sc, CPURISCVState *env)
+{
+int i;
+
+__put_user(env->pc, >pc);
+
+for (i = 1; i < 32; i++) {
+__put_user(env->gpr[i], >gpr[i - 1]);
+}
+for (i = 0; i < 32; i++) {
+__put_user(env->fpr[i], >fpr[i]);
+}
+
+uint32_t fcsr = csr_read_helper(env, CSR_FCSR); /*riscv_get_fcsr(env);*/
+__put_user(fcsr, >fcsr);
+}
+
+static void setup_ucontext(struct target_ucontext *uc,
+   CPURISCVState *env, target_sigset_t *set)
+{
+abi_ulong ss_sp = (target_ulong)target_sigaltstack_used.ss_sp;
+abi_ulong ss_flags = sas_ss_flags(env->gpr[xSP]);
+abi_ulong ss_size = target_sigaltstack_used.ss_size;
+
+__put_user(0,&(uc->uc_flags));
+__put_user(0,&(uc->uc_link));
+
+__put_user(ss_sp,&(uc->uc_stack.ss_sp));
+__put_user(ss_flags, &(uc->uc_stack.ss_flags));
+__put_user(ss_size,  &(uc->uc_stack.ss_size));
+
+int i;
+for (i = 0; i < TARGET_NSIG_WORDS; i++) {
+__put_user(set->sig[i], &(uc->uc_sigmask.sig[i]));
+}
+
+setup_sigcontext(>uc_mcontext, env);
+}
+
+static inline void install_sigtramp(uint32_t *tramp)
+{
+__put_user(0x08b00893, tramp + 0);  /* li a7, 139 = __NR_rt_sigreturn */
+__put_user(0x0073, tramp + 1);  /* ecall */
+}
+
+void setup_rt_frame(int sig, struct target_sigaction *ka,
+target_siginfo_t *info,
+target_sigset_t *set, CPURISCVState *env)
+{
+abi_ulong frame_addr;
+struct target_rt_sigframe *frame;
+
+frame_addr = get_sigframe(ka, env, sizeof(*frame));
+trace_user_setup_rt_frame(env, frame_addr);
+
+if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+goto badframe;
+}
+
+setup_ucontext(>uc, env, set);
+tswap_siginfo(>info, info);
+install_sigtramp(frame->tramp);
+
+env->pc = ka->_sa_handler;
+env->gpr[xSP] = frame_addr;
+env->gpr[xA0] = sig;
+env->gpr[xA1] = frame_addr + offsetof(struct target_rt_sigframe, info);
+env->gpr[xA2] = frame_addr + offsetof(struct 

[Qemu-devel] [PATCH for 2.13 v3 02/20] linux-user: move aarch64 signal.c parts to aarch64 directory

2018-04-11 Thread Laurent Vivier
No code change, only move code from signal.c to
aarch64/signal.c, except adding includes and
exporting setup_frame() and setup_rt_frame().

Signed-off-by: Laurent Vivier 
Reviewed-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---

Notes:
v3: report changes introduced by
7f0f4208b3 linux-user/signal.c: Ensure AArch64 signal frame isn't too 
small

 linux-user/aarch64/signal.c| 567 +
 linux-user/aarch64/target_signal.h |   5 +
 linux-user/signal.c| 566 +---
 3 files changed, 573 insertions(+), 565 deletions(-)

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 02ca338b6c..72d20975f3 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -16,3 +16,570 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see .
  */
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "target_signal.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+struct target_sigcontext {
+uint64_t fault_address;
+/* AArch64 registers */
+uint64_t regs[31];
+uint64_t sp;
+uint64_t pc;
+uint64_t pstate;
+/* 4K reserved for FP/SIMD state and future expansion */
+char __reserved[4096] __attribute__((__aligned__(16)));
+};
+
+struct target_ucontext {
+abi_ulong tuc_flags;
+abi_ulong tuc_link;
+target_stack_t tuc_stack;
+target_sigset_t tuc_sigmask;
+/* glibc uses a 1024-bit sigset_t */
+char __unused[1024 / 8 - sizeof(target_sigset_t)];
+/* last for future expansion */
+struct target_sigcontext tuc_mcontext;
+};
+
+/*
+ * Header to be used at the beginning of structures extending the user
+ * context. Such structures must be placed after the rt_sigframe on the stack
+ * and be 16-byte aligned. The last structure must be a dummy one with the
+ * magic and size set to 0.
+ */
+struct target_aarch64_ctx {
+uint32_t magic;
+uint32_t size;
+};
+
+#define TARGET_FPSIMD_MAGIC 0x46508001
+
+struct target_fpsimd_context {
+struct target_aarch64_ctx head;
+uint32_t fpsr;
+uint32_t fpcr;
+uint64_t vregs[32 * 2]; /* really uint128_t vregs[32] */
+};
+
+#define TARGET_EXTRA_MAGIC  0x45585401
+
+struct target_extra_context {
+struct target_aarch64_ctx head;
+uint64_t datap; /* 16-byte aligned pointer to extra space cast to __u64 */
+uint32_t size; /* size in bytes of the extra space */
+uint32_t reserved[3];
+};
+
+#define TARGET_SVE_MAGIC0x53564501
+
+struct target_sve_context {
+struct target_aarch64_ctx head;
+uint16_t vl;
+uint16_t reserved[3];
+/* The actual SVE data immediately follows.  It is layed out
+ * according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
+ * the original struct pointer.
+ */
+};
+
+#define TARGET_SVE_VQ_BYTES  16
+
+#define TARGET_SVE_SIG_ZREG_SIZE(VQ)  ((VQ) * TARGET_SVE_VQ_BYTES)
+#define TARGET_SVE_SIG_PREG_SIZE(VQ)  ((VQ) * (TARGET_SVE_VQ_BYTES / 8))
+
+#define TARGET_SVE_SIG_REGS_OFFSET \
+QEMU_ALIGN_UP(sizeof(struct target_sve_context), TARGET_SVE_VQ_BYTES)
+#define TARGET_SVE_SIG_ZREG_OFFSET(VQ, N) \
+(TARGET_SVE_SIG_REGS_OFFSET + TARGET_SVE_SIG_ZREG_SIZE(VQ) * (N))
+#define TARGET_SVE_SIG_PREG_OFFSET(VQ, N) \
+(TARGET_SVE_SIG_ZREG_OFFSET(VQ, 32) + TARGET_SVE_SIG_PREG_SIZE(VQ) * (N))
+#define TARGET_SVE_SIG_FFR_OFFSET(VQ) \
+(TARGET_SVE_SIG_PREG_OFFSET(VQ, 16))
+#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
+(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
+
+struct target_rt_sigframe {
+struct target_siginfo info;
+struct target_ucontext uc;
+};
+
+struct target_rt_frame_record {
+uint64_t fp;
+uint64_t lr;
+uint32_t tramp[2];
+};
+
+static void target_setup_general_frame(struct target_rt_sigframe *sf,
+   CPUARMState *env, target_sigset_t *set)
+{
+int i;
+
+__put_user(0, >uc.tuc_flags);
+__put_user(0, >uc.tuc_link);
+
+__put_user(target_sigaltstack_used.ss_sp, >uc.tuc_stack.ss_sp);
+__put_user(sas_ss_flags(env->xregs[31]), >uc.tuc_stack.ss_flags);
+__put_user(target_sigaltstack_used.ss_size, >uc.tuc_stack.ss_size);
+
+for (i = 0; i < 31; i++) {
+__put_user(env->xregs[i], >uc.tuc_mcontext.regs[i]);
+}
+__put_user(env->xregs[31], >uc.tuc_mcontext.sp);
+__put_user(env->pc, >uc.tuc_mcontext.pc);
+__put_user(pstate_read(env), >uc.tuc_mcontext.pstate);
+
+__put_user(env->exception.vaddress, >uc.tuc_mcontext.fault_address);
+
+for (i = 0; i < TARGET_NSIG_WORDS; i++) {
+__put_user(set->sig[i], >uc.tuc_sigmask.sig[i]);
+}
+}
+
+static void target_setup_fpsimd_record(struct target_fpsimd_context *fpsimd,
+   CPUARMState *env)
+{
+int i;
+
+__put_user(TARGET_FPSIMD_MAGIC, >head.magic);
+

  1   2   3   >