Re: [PATCH v2] KVM: nVMX: Reset RFLAGS on VM-exit

2013-03-04 Thread Gleb Natapov
On Sun, Mar 03, 2013 at 08:47:11PM +0100, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com
 
 Ouch, how could this work so well that far? We need to clear RFLAGS to
 the reset value as specified by the SDM. Particularly, IF must be off
 after VM-exit!
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
Reviewed-by: Gleb Natapov g...@redhat.com

 ---
 
 Changes in v2:
  - use symbolic constant for rflags reset value
 
  arch/x86/kvm/vmx.c |1 +
  1 files changed, 1 insertions(+), 0 deletions(-)
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 486078d..eece4c8 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -7454,6 +7454,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu 
 *vcpu,
  
   kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12-host_rsp);
   kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12-host_rip);
 + vmx_set_rflags(vcpu, X86_EFLAGS_BIT1);
   /*
* Note that calling vmx_set_cr0 is important, even if cr0 hasn't
* actually changed, because it depends on the current state of
 -- 
 1.7.3.4

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Where to download kvm forum 2012 presentations? Thanks

2013-03-04 Thread Changlimin

-
本邮件及其附件含有杭州华三通信技术有限公司的保密信息,仅限于发送给上面地址中列出
的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、
或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本
邮件!
This e-mail and its attachments contain confidential information from H3C, 
which is
intended only for the person or entity whose address is listed above. Any use 
of the
information contained herein in any way (including, but not limited to, total 
or partial
disclosure, reproduction, or dissemination) by persons other than the intended
recipient(s) is prohibited. If you receive this e-mail in error, please notify 
the sender
by phone or email immediately and delete it!


Re: [PATCH v13 1/8] save/load cpu runstate

2013-03-04 Thread Paolo Bonzini
Il 28/02/2013 13:13, Hu Tao ha scritto:
 This patch enables preservation of cpu runstate during save/load vm.
 So when a vm is restored from snapshot, the cpu runstate is restored,
 too.

I don't think this feature is worth breaking backwards migration
compatibility.  It is usually handled at a higher-level (management,
like libvirt).

Please make this a separate patch.

Paolo

 See following example:
 
 # save two vms: one is running, the other is paused
 (qemu) info status
 VM status: running
 (qemu) savevm running
 (qemu) stop
 (qemu) info status
 VM status: paused
 (qemu) savevm paused
 
 # restore the one running
 (qemu) info status
 VM status: paused
 (qemu) loadvm running
 (qemu) info status
 VM status: running
 
 # restore the one paused
 (qemu) loadvm paused
 (qemu) info status
 VM status: paused
 (qemu) cont
 (qemu)info status
 VM status: running
 
 Signed-off-by: Hu Tao hu...@cn.fujitsu.com
 ---
  include/sysemu/sysemu.h |  2 ++
  migration.c |  6 +-
  monitor.c   |  5 ++---
  savevm.c|  1 +
  vl.c| 34 ++
  5 files changed, 40 insertions(+), 8 deletions(-)
 
 diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
 index b19ec95..f121213 100644
 --- a/include/sysemu/sysemu.h
 +++ b/include/sysemu/sysemu.h
 @@ -19,6 +19,8 @@ extern uint8_t qemu_uuid[];
  int qemu_uuid_parse(const char *str, uint8_t *uuid);
  #define UUID_FMT 
 %02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx
  
 +void save_run_state(void);
 +void load_run_state(void);
  bool runstate_check(RunState state);
  void runstate_set(RunState new_state);
  int runstate_is_running(void);
 diff --git a/migration.c b/migration.c
 index 11725ae..c29830e 100644
 --- a/migration.c
 +++ b/migration.c
 @@ -107,11 +107,7 @@ static void process_incoming_migration_co(void *opaque)
  /* Make sure all file formats flush their mutable metadata */
  bdrv_invalidate_cache_all();
  
 -if (autostart) {
 -vm_start();
 -} else {
 -runstate_set(RUN_STATE_PAUSED);
 -}
 +load_run_state();
  }
  
  void process_incoming_migration(QEMUFile *f)
 diff --git a/monitor.c b/monitor.c
 index 32a6e74..bf974b4 100644
 --- a/monitor.c
 +++ b/monitor.c
 @@ -2059,13 +2059,12 @@ void qmp_closefd(const char *fdname, Error **errp)
  
  static void do_loadvm(Monitor *mon, const QDict *qdict)
  {
 -int saved_vm_running  = runstate_is_running();
  const char *name = qdict_get_str(qdict, name);
  
  vm_stop(RUN_STATE_RESTORE_VM);
  
 -if (load_vmstate(name) == 0  saved_vm_running) {
 -vm_start();
 +if (load_vmstate(name) == 0) {
 +load_run_state();
  }
  }
  
 diff --git a/savevm.c b/savevm.c
 index a8a53ef..aa631eb 100644
 --- a/savevm.c
 +++ b/savevm.c
 @@ -2143,6 +2143,7 @@ void do_savevm(Monitor *mon, const QDict *qdict)
  }
  
  saved_vm_running = runstate_is_running();
 +save_run_state();
  vm_stop(RUN_STATE_SAVE_VM);
  
  memset(sn, 0, sizeof(*sn));
 diff --git a/vl.c b/vl.c
 index febd2ea..7991f2e 100644
 --- a/vl.c
 +++ b/vl.c
 @@ -523,6 +523,7 @@ static int default_driver_check(QemuOpts *opts, void 
 *opaque)
  /* QEMU state */
  
  static RunState current_run_state = RUN_STATE_PRELAUNCH;
 +static RunState saved_run_state = RUN_STATE_RUNNING;
  
  typedef struct {
  RunState from;
 @@ -546,6 +547,7 @@ static const RunStateTransition 
 runstate_transitions_def[] = {
  { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
  
  { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
 +{ RUN_STATE_POSTMIGRATE, RUN_STATE_PAUSED },
  { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
  
  { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
 @@ -556,6 +558,7 @@ static const RunStateTransition 
 runstate_transitions_def[] = {
  { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
  
  { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
 +{ RUN_STATE_RESTORE_VM, RUN_STATE_PAUSED },
  
  { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
  { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
 @@ -585,11 +588,39 @@ static const RunStateTransition 
 runstate_transitions_def[] = {
  
  static bool runstate_valid_transitions[RUN_STATE_MAX][RUN_STATE_MAX];
  
 +void save_run_state(void)
 +{
 +saved_run_state = current_run_state;
 +}
 +
 +void load_run_state(void)
 +{
 +if (saved_run_state == RUN_STATE_RUNNING) {
 +vm_start();
 +} else if (!runstate_check(saved_run_state)) {
 +runstate_set(saved_run_state);
 +} else {
 +; /* leave unchanged */
 +}
 +}
 +
  bool runstate_check(RunState state)
  {
  return current_run_state == state;
  }
  
 +static void runstate_save(QEMUFile *f, void *opaque)
 +{
 +qemu_put_byte(f, saved_run_state);
 +}
 +
 +static int runstate_load(QEMUFile *f, void *opaque, int version_id)
 +{
 +saved_run_state = qemu_get_byte(f);
 +
 +return 0;
 +}
 +
  

Re: [PATCH v13 2/8] start vm after resetting it

2013-03-04 Thread Paolo Bonzini
Il 28/02/2013 13:13, Hu Tao ha scritto:
 From: Wen Congyang we...@cn.fujitsu.com
 
 The guest should run after resetting it, but it does not run if its
 old state is RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED.
 
 We don't set runstate to RUN_STATE_PAUSED when resetting the guest,
 so the runstate will be changed from RUN_STATE_INTERNAL_ERROR or
 RUN_STATE_PAUSED to RUN_STATE_RUNNING(not RUN_STATE_PAUSED).

This is also debatable.  In particular, restarting an INTERNAL_ERROR
guest makes it harder to inspect the state at the time of the failure.

INTERNAL_ERROR should never happen, let's separate this patch too.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 4/8] add a new runstate: RUN_STATE_GUEST_PANICKED

2013-03-04 Thread Paolo Bonzini
Il 28/02/2013 13:13, Hu Tao ha scritto:
 The guest will be in this state when it is panicked.
 
 Signed-off-by: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Hu Tao hu...@cn.fujitsu.com
 ---
  migration.c  |  1 +
  qapi-schema.json |  6 +-
  qmp.c|  3 ++-
  vl.c | 11 ++-
  4 files changed, 18 insertions(+), 3 deletions(-)
 
 diff --git a/migration.c b/migration.c
 index c29830e..fa17b82 100644
 --- a/migration.c
 +++ b/migration.c
 @@ -698,6 +698,7 @@ static void *buffered_file_thread(void *opaque)
  int64_t start_time, end_time;
  
  DPRINTF(done iterating\n);
 +save_run_state();
  start_time = qemu_get_clock_ms(rt_clock);
  qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
  if (old_vm_running) {
 diff --git a/qapi-schema.json b/qapi-schema.json
 index 28b070f..8f1d138 100644
 --- a/qapi-schema.json
 +++ b/qapi-schema.json
 @@ -174,11 +174,15 @@
  # @suspended: guest is suspended (ACPI S3)
  #
  # @watchdog: the watchdog action is configured to pause and has been 
 triggered
 +#
 +# @guest-panicked: the panicked action is configured to pause and has been
 +# triggered.
  ##
  { 'enum': 'RunState',
'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
  'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
 -'running', 'save-vm', 'shutdown', 'suspended', 'watchdog' ] }
 +'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
 +'guest-panicked' ] }
  
  ##
  # @SnapshotInfo
 diff --git a/qmp.c b/qmp.c
 index 5f1bed1..f5027f6 100644
 --- a/qmp.c
 +++ b/qmp.c
 @@ -150,7 +150,8 @@ void qmp_cont(Error **errp)
  Error *local_err = NULL;
  
  if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
 -   runstate_check(RUN_STATE_SHUTDOWN)) {
 +runstate_check(RUN_STATE_SHUTDOWN) ||
 +runstate_check(RUN_STATE_GUEST_PANICKED)) {
  error_set(errp, QERR_RESET_REQUIRED);
  return;
  } else if (runstate_check(RUN_STATE_SUSPENDED)) {
 diff --git a/vl.c b/vl.c
 index 3d08e1a..51d4922 100644
 --- a/vl.c
 +++ b/vl.c
 @@ -536,6 +536,7 @@ static const RunStateTransition 
 runstate_transitions_def[] = {
  
  { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
  { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
 +{ RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },

Is this a consequence of the first patch?

  { RUN_STATE_INTERNAL_ERROR, RUN_STATE_RUNNING },
  { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
 @@ -549,6 +550,7 @@ static const RunStateTransition 
 runstate_transitions_def[] = {
  { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
  { RUN_STATE_POSTMIGRATE, RUN_STATE_PAUSED },
  { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
 +{ RUN_STATE_POSTMIGRATE, RUN_STATE_GUEST_PANICKED },

Impossible.  GUEST_PANICKED requires an instruction to be executed in
the guest, so it should first go to RUNNING.

  { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
  { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE },
 @@ -559,6 +561,7 @@ static const RunStateTransition 
 runstate_transitions_def[] = {
  
  { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
  { RUN_STATE_RESTORE_VM, RUN_STATE_PAUSED },
 +{ RUN_STATE_RESTORE_VM, RUN_STATE_GUEST_PANICKED },

Is it also for the first patch?

  { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
  { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
 @@ -569,6 +572,7 @@ static const RunStateTransition 
 runstate_transitions_def[] = {
  { RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
  { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
  { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
 +{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },

This one is obviously ok.

  { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
  
 @@ -583,6 +587,10 @@ static const RunStateTransition 
 runstate_transitions_def[] = {
  { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
  { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
  
 +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
 +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_PAUSED },
 +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },

Like SHUTDOWN, it should go first to PAUSED and then to RUNNING.  A
GUEST_PANICKED - RUNNING transition is not possible.  You're seeing it
because you lack the addition of GUEST_PANICKED here:

if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
runstate_check(RUN_STATE_SHUTDOWN)) {
runstate_set(RUN_STATE_PAUSED);
}

I think you should first move the INTERNAL_ERROR || SHUTDOWN checks to a
separate function, so that you can then add GUEST_PANICKED.

Paolo

  { RUN_STATE_MAX, RUN_STATE_MAX },
  };
  
 @@ -2001,7 +2009,8 @@ static bool main_loop_should_exit(void)
  qemu_system_reset(VMRESET_REPORT);
  resume_all_vcpus();
  if 

Re: [PATCH v13 5/8] add a new qevent: QEVENT_GUEST_PANICKED

2013-03-04 Thread Paolo Bonzini
Il 28/02/2013 13:13, Hu Tao ha scritto:
 This event will be emited when the guest is panicked.
 
 Signed-off-by: Wen Congyang we...@cn.fujitsu.com
 ---
  include/monitor/monitor.h | 1 +
  monitor.c | 1 +
  2 files changed, 2 insertions(+)
 
 diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
 index 87fb49c..4006905 100644
 --- a/include/monitor/monitor.h
 +++ b/include/monitor/monitor.h
 @@ -45,6 +45,7 @@ typedef enum MonitorEvent {
  QEVENT_WAKEUP,
  QEVENT_BALLOON_CHANGE,
  QEVENT_SPICE_MIGRATE_COMPLETED,
 +QEVENT_GUEST_PANICKED,
  
  /* Add to 'monitor_event_names' array in monitor.c when
   * defining new events here */
 diff --git a/monitor.c b/monitor.c
 index bf974b4..d65218d 100644
 --- a/monitor.c
 +++ b/monitor.c
 @@ -463,6 +463,7 @@ static const char *monitor_event_names[] = {
  [QEVENT_WAKEUP] = WAKEUP,
  [QEVENT_BALLOON_CHANGE] = BALLOON_CHANGE,
  [QEVENT_SPICE_MIGRATE_COMPLETED] = SPICE_MIGRATE_COMPLETED,
 +[QEVENT_GUEST_PANICKED] = GUEST_PANICKED,
  };
  QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
  
 

Reviewed-by: Paolo Bonzini pbonz...@redhat.com

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/8] pv event interface between host and guest

2013-03-04 Thread Paolo Bonzini
Il 03/03/2013 10:17, Gleb Natapov ha scritto:
 On Thu, Feb 28, 2013 at 08:13:10PM +0800, Hu Tao wrote:
 This series implements a new interface, kvm pv event, to notify host when
 some events happen in guest. Right now there is one supported event: guest
 panic.

 What other event do you have in mind? Is interface generic enough to
 accommodate future, yet unknown, events. It allows to pass only one
 integer specifying even type, what if additional info is needed? My be
 stop pretending that device is generic and make it do once thing but do
 it well? For generic even passing interface (whatever it may be needed
 for) much more powerful virtio should be used.
 
 On implementation itself I do not understand why is this kvm specific.
 The only thing that makes it so is that you hook device initialization
 into guest kvm initialization code, but this is obviously incorrect.
 What stops QEMU tcg or Xen from reusing the same device for the same
 purpose except the artificial limitation in a guest.

Agreed.

 Reading data from a random ioports is not how you discover platform
 devices in 21 century (and the data you read from unassigned port is not
 guarantied to be zero, it may depend on QEMU version), you use ACPI for
 that and Marcelo already pointed that to you. Having little knowledge of
 ACPI (we all do) is not a good reason to not doing it. We probably need
 to reserve QEMU specific ACPI Plug and Play hardware ID to define our own
 devices. After that you will be able to create device with _HID(QEMU0001)
 in DSDT that supplies address information (ioport to use) and capability
 supported.

Please also document this HID in a new file in the QEMU docs/ directory.

 Guest uses acpi_get_devices() to discover a platform device by
 its name (QEMU0001).  Then you put the driver for the platform device
 into drivers/platform/x86/ and QEMU/kvm/Xen all will be able to use it.

Just to clarify it for Hu Tao, the read from a random ioport is how the
ACPI code will detect presence of the device.

Something like this should work (in SeaBIOS's src/acpi-dsdt-isa.dsl):

Device(PEVT) {
Name(_HID, EisaId(QEMU0001))
OperationRegion(PEOR, SystemIO, 0x505, 0x01)
Field(PEOR, ByteAcc, NoLock, Preserve) {
PEPT,   8,
}

Method(_STA, 0, NotSerialized) {
Store(PEPT, Local0)
If (LEqual(Local0, Zero)) {
Return (0x00)
} Else {
Return (0x0F)
}
}

Name(_CRS, ResourceTemplate() {
IO(Decode16, 0x505, 0x505, 0x01, 0x01)
})
}

Please test this with a QEMU option like -M pc-1.4.  The device should
_not_ be detected if you're doing it right.

 On QEMU side of things I cannot comment much on how QOMified the device
 is (it should be),

Please make the device target-independent.  It can be used on non-x86
architectures that have I/O ports.  You should make the port
configurable using a property (DEFINE_PROPERTY_INT16 or something like
that), with a default of 0x505.

All the panicked_action is not necessary in my opinion.  We have it for
watchdogs, but that's really a legacy thing.  Let libvirt do it, and
always make the guest panic perform the PANICKED_PAUSE action.

If you do it properly, a lot (really a lot) of code will go away.

 I hope other reviews will verify it, but I noticed
 that device is only initialized for PIIX, what about Q35?

Yup.

Paolo

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/8] pv event interface between host and guest

2013-03-04 Thread Gleb Natapov
On Mon, Mar 04, 2013 at 11:28:05AM +0100, Paolo Bonzini wrote:
 Il 04/03/2013 11:21, Gleb Natapov ha scritto:
   Just to clarify it for Hu Tao, the read from a random ioport is how the
   ACPI code will detect presence of the device.
   
  Actually no (at least in the long run, for the first version it may be
  OK).
 
 Agreed.
 
  Since we want to move DSDT generation into QEMU if device will not
  be present QEMU will not generate corresponded Device() in DSDT, or it
  will generate it with _STA() { Return (0x00)} hard coded.
 
 Yes, this would be good.
 
  Seabios can do
  the same if we will pass it info about device presence via fw_cfg.
 
 True, but I don't like this a lot.  I don't like splitting decisions
 between SeaBIOS and the DSDT, you end up touching code all over the
 place and writing ASL is simpler than patching---even with all the
 machinery that we have.
That's the main argument in favor of moving DSDT into QEMU regardless
of this patch series, but as long as we have it in Seabios, have
infrastructure for patching and use it for many things already I do not
see why avoiding it.

  It is also simpler to move ASL from SeaBIOS to
 OVMF and/or viceversa.  I don't recall what was the opposition to a
 fw_cfg driver directly in the DSDT, but I think this would be a good
 usage for it.
 
Basically fw_cfg was not designed with this in mind. It was really meant
to be simple interface for FW running one one CPU to use. You probably
may do locking with AML too to guaranty atomic access, but things get
complicated. Also may option that was added lately use file interface
(since this is what Kevin prefers) and manipulating strings in AML is
probably not what we want.

 Splitting it between QEMU and DSDT is a bit better, since you have to
 touch QEMU anyway to implement the feature.
 
 Anyhow, this does not apply to the next submission of this series.  I
 think we can agree to the compromise of using ACPI but still read the
 port in _STA.
 
If you want to make ioport configurable I do not see how can we avoid
patching.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/8] pv event interface between host and guest

2013-03-04 Thread Gleb Natapov
On Mon, Mar 04, 2013 at 11:49:07AM +0100, Paolo Bonzini wrote:
 Il 04/03/2013 11:43, Gleb Natapov ha scritto:
   Anyhow, this does not apply to the next submission of this series.  I
   think we can agree to the compromise of using ACPI but still read the
   port in _STA.
  
  If you want to make ioport configurable I do not see how can we avoid
  patching.
 
 I want to make the ioport configurable in the device, but the PIIX and
 ICH9 (which are what the DSDT is written for) will always use port 0x505.
 
But the device is not part of PIIX or ICH9. It is additional device that
may or may not be present depending on a command line. So what if
someone configures debugcon or debugexit to use this port? We can always
blame the users, but I fill that we are making unnecessary compromises.

 You can configure a different iobase for your serial ports, the guest
 can still use them but not discover them via ACPI.  This is the same thing.
 
Probably we should patch DSDT too when it will be in QEMU :) of force
iobase to spec values if device is used as part of a chipset.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v7 07/11] hw/arm_gic: Convert ARM GIC classes to use init/realize

2013-03-04 Thread Andreas Färber
Am 26.02.2013 18:40, schrieb Peter Maydell:
 Convert the ARM GIC classes to use init/realize rather than
 SysBusDevice::init. (We have to do them all in one patch to
 avoid unconverted subclasses calling a nonexistent SysBusDevice
 init function in the base class and crashing.)
 
 Signed-off-by: Peter Maydell peter.mayd...@linaro.org
 ---
  hw/arm_gic.c  |   23 +--
  hw/arm_gic_common.c   |   26 +++---
  hw/arm_gic_internal.h |2 +-
  hw/armv7m_nvic.c  |   15 ---
  4 files changed, 37 insertions(+), 29 deletions(-)
 
 diff --git a/hw/arm_gic.c b/hw/arm_gic.c
 index 90e43d0..250e720 100644
 --- a/hw/arm_gic.c
 +++ b/hw/arm_gic.c
 @@ -659,14 +659,18 @@ void gic_init_irqs_and_distributor(GICState *s, int 
 num_irq)
  memory_region_init_io(s-iomem, gic_dist_ops, s, gic_dist, 0x1000);
  }
  
 -static int arm_gic_init(SysBusDevice *dev)
 +static void arm_gic_realize(DeviceState *dev, Error **errp)
  {
 -/* Device instance init function for the GIC sysbus device */
 +/* Device instance realize function for the GIC sysbus device */
  int i;
 -GICState *s = FROM_SYSBUS(GICState, dev);
 +GICState *s = ARM_GIC(dev);
 +SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
  ARMGICClass *agc = ARM_GIC_GET_CLASS(s);
  
 -agc-parent_init(dev);
 +agc-parent_realize(dev, errp);
 +if (error_is_set(errp)) {
 +return;
 +}
  
  gic_init_irqs_and_distributor(s, s-num_irq);
  
 @@ -686,22 +690,21 @@ static int arm_gic_init(SysBusDevice *dev)
gic_cpu, 0x100);
  }
  /* Distributor */
 -sysbus_init_mmio(dev, s-iomem);
 +sysbus_init_mmio(sbd, s-iomem);
  /* cpu interfaces (one for current cpu plus one per cpu) */
  for (i = 0; i = NUM_CPU(s); i++) {
 -sysbus_init_mmio(dev, s-cpuiomem[i]);
 +sysbus_init_mmio(sbd, s-cpuiomem[i]);
  }
 -return 0;
  }
  
  static void arm_gic_class_init(ObjectClass *klass, void *data)
  {
  DeviceClass *dc = DEVICE_CLASS(klass);
 -SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
  ARMGICClass *agc = ARM_GIC_CLASS(klass);
 -agc-parent_init = sbc-init;
 -sbc-init = arm_gic_init;
 +
  dc-no_user = 1;
 +agc-parent_realize = dc-realize;
 +dc-realize = arm_gic_realize;
  }
  
  static const TypeInfo arm_gic_info = {
 diff --git a/hw/arm_gic_common.c b/hw/arm_gic_common.c
 index 2947622..3b2955c 100644
 --- a/hw/arm_gic_common.c
 +++ b/hw/arm_gic_common.c
 @@ -104,31 +104,35 @@ static int gic_load(QEMUFile *f, void *opaque, int 
 version_id)
  return 0;
  }
  
 -static int arm_gic_common_init(SysBusDevice *dev)
 +static void arm_gic_common_realize(DeviceState *dev, Error **errp)
  {
 -GICState *s = FROM_SYSBUS(GICState, dev);
 +GICState *s = ARM_GIC_COMMON(dev);
  int num_irq = s-num_irq;
  
  if (s-num_cpu  NCPU) {
 -hw_error(requested %u CPUs exceeds GIC maximum %d\n,
 - s-num_cpu, NCPU);
 +error_setg(errp, requested %u CPUs exceeds GIC maximum %d\n,

Please drop \n for error_setg(). Probably would be worth adding to a
convert-to-realize section on the Wiki.

 +   s-num_cpu, NCPU);
 +return;
  }
  s-num_irq += GIC_BASE_IRQ;
  if (s-num_irq  GIC_MAXIRQ) {
 -hw_error(requested %u interrupt lines exceeds GIC maximum %d\n,
 - num_irq, GIC_MAXIRQ);
 +error_setg(errp,
 +   requested %u interrupt lines exceeds GIC maximum %d\n,
 +   num_irq, GIC_MAXIRQ);
 +return;
  }
  /* ITLinesNumber is represented as (N / 32) - 1 (see
   * gic_dist_readb) so this is an implementation imposed
   * restriction, not an architectural one:
   */
  if (s-num_irq  32 || (s-num_irq % 32)) {
 -hw_error(%d interrupt lines unsupported: not divisible by 32\n,
 - num_irq);
 +error_setg(errp,
 +   %d interrupt lines unsupported: not divisible by 32\n,
 +   num_irq);
 +return;
  }
  
  register_savevm(NULL, arm_gic, -1, 3, gic_save, gic_load, s);
 -return 0;
  }
  
  static void arm_gic_common_reset(DeviceState *dev)
 @@ -173,12 +177,12 @@ static Property arm_gic_common_properties[] = {
  
  static void arm_gic_common_class_init(ObjectClass *klass, void *data)
  {
 -SysBusDeviceClass *sc = SYS_BUS_DEVICE_CLASS(klass);
  DeviceClass *dc = DEVICE_CLASS(klass);
 +
  dc-reset = arm_gic_common_reset;
 +dc-realize = arm_gic_common_realize;
  dc-props = arm_gic_common_properties;
  dc-no_user = 1;
 -sc-init = arm_gic_common_init;
  }
  
  static const TypeInfo arm_gic_common_type = {
 diff --git a/hw/arm_gic_internal.h b/hw/arm_gic_internal.h
 index 3640be0..3ba37f3 100644
 --- a/hw/arm_gic_internal.h
 +++ b/hw/arm_gic_internal.h
 @@ -132,7 +132,7 @@ typedef struct ARMGICCommonClass {
  
  typedef struct ARMGICClass {
  

Re: [Qemu-devel] [PATCH v7 03/11] target-arm: Drop CPUARMState* argument from bank_number()

2013-03-04 Thread Andreas Färber
Am 26.02.2013 18:40, schrieb Peter Maydell:
 Drop the CPUARMState* argument from bank_number(), since we only
 use it for passing to cpu_abort(). Use hw_error() instead.
 This avoids propagating further interfaces using env pointers.
 
 In the long term this function's callers need auditing to fix
 problems where badly behaved guests can pass invalid bank numbers.
 
 Signed-off-by: Peter Maydell peter.mayd...@linaro.org

Reviewed-by: Andreas Färber afaer...@suse.de

Thanks,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/8] pv event interface between host and guest

2013-03-04 Thread Paolo Bonzini
Il 04/03/2013 11:59, Gleb Natapov ha scritto:
  I want to make the ioport configurable in the device, but the PIIX and
  ICH9 (which are what the DSDT is written for) will always use port 0x505.
 
 But the device is not part of PIIX or ICH9.

So is kvmclock, or kvmvapic.  I think it makes sense to add this device
to PIIX or ICH9 since it is an ISA device.

 It is additional device that
 may or may not be present depending on a command line. So what if
 someone configures debugcon or debugexit to use this port?

I haven't checked if debug{con,exit} will pass the _STA test, but if
they do, the user will get a Ctrl-A or respectively an exit of QEMU when
the guest panics.

What if someone configures debugcon on port 0x3f8?  Some guest will use
it, some will not.

 We can always
 blame the users, but I fill that we are making unnecessary compromises.

Once we choose an ISA device, where the user has full control of the
address space, we already know we'll have to accept compromises.  I
don't think this compromise is particularly bad: do discovery via ACPI
(nice), accept that the user can trick the AML (ugly).

Paolo

  You can configure a different iobase for your serial ports, the guest
  can still use them but not discover them via ACPI.  This is the same thing.
 
 Probably we should patch DSDT too when it will be in QEMU :) of force
 iobase to spec values if device is used as part of a chipset.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/8] pv event interface between host and guest

2013-03-04 Thread Gleb Natapov
On Mon, Mar 04, 2013 at 11:05:37AM +0100, Paolo Bonzini wrote:
  Guest uses acpi_get_devices() to discover a platform device by
  its name (QEMU0001).  Then you put the driver for the platform device
  into drivers/platform/x86/ and QEMU/kvm/Xen all will be able to use it.
 
 Just to clarify it for Hu Tao, the read from a random ioport is how the
 ACPI code will detect presence of the device.
 
Actually no (at least in the long run, for the first version it may be
OK). Since we want to move DSDT generation into QEMU if device will not
be present QEMU will not generate corresponded Device() in DSDT, or it
will generate it with _STA() { Return (0x00)} hard coded. Seabios can do
the same if we will pass it info about device presence via fw_cfg. Not
sure Kevin will like it now when we plan to move DSDT into QEMU anyway :)

 Something like this should work (in SeaBIOS's src/acpi-dsdt-isa.dsl):
 
 Device(PEVT) {
 Name(_HID, EisaId(QEMU0001))
 OperationRegion(PEOR, SystemIO, 0x505, 0x01)
 Field(PEOR, ByteAcc, NoLock, Preserve) {
 PEPT,   8,
 }
 
 Method(_STA, 0, NotSerialized) {
 Store(PEPT, Local0)
 If (LEqual(Local0, Zero)) {
 Return (0x00)
 } Else {
 Return (0x0F)
 }
 }
 
 Name(_CRS, ResourceTemplate() {
 IO(Decode16, 0x505, 0x505, 0x01, 0x01)
 })
 }
 
 Please test this with a QEMU option like -M pc-1.4.  The device should
 _not_ be detected if you're doing it right.
 

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/8] pv event interface between host and guest

2013-03-04 Thread Gleb Natapov
On Mon, Mar 04, 2013 at 12:10:58PM +0100, Paolo Bonzini wrote:
 Il 04/03/2013 11:59, Gleb Natapov ha scritto:
   I want to make the ioport configurable in the device, but the PIIX and
   ICH9 (which are what the DSDT is written for) will always use port 0x505.
  
  But the device is not part of PIIX or ICH9.
 
 So is kvmclock, or kvmvapic.  I think it makes sense to add this device
 to PIIX or ICH9 since it is an ISA device.
 
Those are CPU interfaces, not chipset. fw_cfg or our PIIX ACPI additions would
be better examples, but since they are always present and non configurable they
are in a different category.

  It is additional device that
  may or may not be present depending on a command line. So what if
  someone configures debugcon or debugexit to use this port?
 
 I haven't checked if debug{con,exit} will pass the _STA test, but if
 they do, the user will get a Ctrl-A or respectively an exit of QEMU when
 the guest panics.
 
 What if someone configures debugcon on port 0x3f8?  Some guest will use
 it, some will not.
 
Qemu should fail to start since conflict will be detected during
initialization.

  We can always
  blame the users, but I fill that we are making unnecessary compromises.
 
 Once we choose an ISA device, where the user has full control of the
 address space, we already know we'll have to accept compromises.  I
 don't think this compromise is particularly bad: do discovery via ACPI
 (nice), accept that the user can trick the AML (ugly).
 
Why would have we accept compromises, we may, but I disagree that it
is necessary? If user configures conflicting ports QEMU will detect
it during init, if configuration is correct DSDT should provide enough
information for guests to use configured devices.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/8] pv event interface between host and guest

2013-03-04 Thread Paolo Bonzini
Il 04/03/2013 12:20, Gleb Natapov ha scritto:
 On Mon, Mar 04, 2013 at 12:10:58PM +0100, Paolo Bonzini wrote:
 It is additional device that
 may or may not be present depending on a command line. So what if
 someone configures debugcon or debugexit to use this port?

 I haven't checked if debug{con,exit} will pass the _STA test, but if
 they do, the user will get a Ctrl-A or respectively an exit of QEMU when
 the guest panics.

 What if someone configures debugcon on port 0x3f8?  Some guest will use
 it, some will not.

 Qemu should fail to start since conflict will be detected during
 initialization.

Not if you _remove_ the serial port and place debugcon at 0x3f8.

Same here, you can remove the panic event port and add debugcon at
0x505.  That's the problematic case.  But if the user goes to that
length, I think we can honestly say we don't care.

Paolo

 We can always
 blame the users, but I fill that we are making unnecessary compromises.

 Once we choose an ISA device, where the user has full control of the
 address space, we already know we'll have to accept compromises.  I
 don't think this compromise is particularly bad: do discovery via ACPI
 (nice), accept that the user can trick the AML (ugly).

 Why would have we accept compromises, we may, but I disagree that it
 is necessary? If user configures conflicting ports QEMU will detect
 it during init, if configuration is correct DSDT should provide enough
 information for guests to use configured devices.
 
 --
   Gleb.
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v7 07/11] hw/arm_gic: Convert ARM GIC classes to use init/realize

2013-03-04 Thread Peter Maydell
On 4 March 2013 19:10, Andreas Färber afaer...@suse.de wrote:
 Am 26.02.2013 18:40, schrieb Peter Maydell:

  if (s-num_cpu  NCPU) {
 -hw_error(requested %u CPUs exceeds GIC maximum %d\n,
 - s-num_cpu, NCPU);
 +error_setg(errp, requested %u CPUs exceeds GIC maximum %d\n,

 Please drop \n for error_setg(). Probably would be worth adding to a
 convert-to-realize section on the Wiki.

Doh. That's such a trivial change I intend to just make it in
passing when I put these changes into target-arm.next rather
than sending out an entire fresh round of patches, unless you
object.

 Otherwise looks fine, thanks.

Should I mark such a fixed-up patch with your reviewed-by tag?

thanks
-- PMM
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 00/12] KVM Support for MIPS32 Processors

2013-03-04 Thread Andreas Färber
Hello,

Am 02.03.2013 16:18, schrieb Sanjay Lal:
 The following patchset implements KVM support for MIPS32 processors,
 using Trap  Emulate, with basic runtime binary translation to improve
 performance.
[snip]

Please see http://wiki.qemu.org/Contribute/SubmitAPatch for some hints
on how to improve submission of your QEMU patchset. In particular we
require Signed-off-bys just like Linux, subjects should use
target-mips:  or similar based on file/directory names, subject line
should be one short statement and commit message should give further
explanations of what the patch is doing and why, where appropriate.

Also a fair warning: I am refactoring the core CPU code, so you should
be tracking qemu.git and/or mailing list for possible conflicts and
rebasing necessary.
In that context please prefer use of MIPSCPU over CPUMIPSState (e.g., in
GIC state and functions).

Please adopt our Coding Style, which among other things asks for
CamelCase struct naming (e.g., MIPSGICState rather than gic_t).

Please learn about QOM usage and its conventions. Your GIC should
probably be a SysBusDevice, not a pre-qdev collection of manually
allocated state.
http://wiki.qemu.org/QOMConventions

There's also an ongoing discussion about DPRINTF()s defined as no-op do
{} while(0) leading to format string breakages over time. Recommended
replacement is a macro using do { if (FOO) { ... } } while (0), with
FOO evaluating to 0 in the no-debug case, so that everything gets
compile-tested but optimized out.

Regards,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 0/8] pv event interface between host and guest

2013-03-04 Thread Paolo Bonzini
Il 04/03/2013 12:52, Gleb Natapov ha scritto:
  Same here, you can remove the panic event port and add debugcon at
  0x505.  That's the problematic case.  But if the user goes to that
  length, I think we can honestly say we don't care.

 IMO there is a big difference between well know serial ISA ports and
 PIO ports we allocate for our devices. Later have to be discoverable
 without resorting to probing. On CPU level we do the same with CPUID
 bits instead of relaying on MSRs #GP. On KVM API level we do the same
 with capabilities instead of relying on ioctls returning errors. This
 is not different.

Ok, I see your point now.  Yes, this is a good reason why patching is
better in the long run.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode

2013-03-04 Thread Gleb Natapov
On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote:
 The logic for calculating the value with which we call kvm_set_cr0/4 was
 broken (will definitely be visible with nested unrestricted guest mode
 support). Also, we performed the check regarding CR0_ALWAYSON too early
 when in guest mode.
 
 What really needs to be done on both CR0 and CR4 is to mask out L1-owned
 bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and
 arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus,
 are not suited as input.
 
 For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON and
 refuse the update if it fails. To be fully consistent, we implement this
 check now also for CR4.
 
 Finally, we have to set the shadow to the value L2 wanted to write
 originally.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
 
 Found while making unrestricted guest mode working. Not sure what impact
 the bugs had on current feature level, if any.
 
 For interested folks, I've pushed my nEPT environment here:
 
 git://git.kiszka.org/linux-kvm.git nept-hacking
 
  arch/x86/kvm/vmx.c |   49 ++---
  1 files changed, 30 insertions(+), 19 deletions(-)
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 7cc566b..d1dac08 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned 
 char *hypercall)
  /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
  static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
  {
 - if (to_vmx(vcpu)-nested.vmxon 
 - ((val  VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
 - return 1;
 -
   if (is_guest_mode(vcpu)) {
 - /*
 -  * We get here when L2 changed cr0 in a way that did not change
 -  * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
 -  * but did change L0 shadowed bits. This can currently happen
 -  * with the TS bit: L0 may want to leave TS on (for lazy fpu
 -  * loading) while pretending to allow the guest to change it.
 -  */
Can't say I understand this patch yet, but it looks like the comment is
still valid. Why have you removed it?

 - if (kvm_set_cr0(vcpu, (val  vcpu-arch.cr0_guest_owned_bits) |
 -  (vcpu-arch.cr0  ~vcpu-arch.cr0_guest_owned_bits)))
 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 + unsigned long orig_val = val;
 +
 + val = (val  ~vmcs12-cr0_guest_host_mask) |
 + (vmcs_read64(GUEST_CR0)  vmcs12-cr0_guest_host_mask);
 + if ((val  VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
 + return 1;
 +
 + if (kvm_set_cr0(vcpu, val))
   return 1;
 - vmcs_writel(CR0_READ_SHADOW, val);
 + vmcs_writel(CR0_READ_SHADOW, orig_val);
   return 0;
 - } else
 + } else {
 + if (to_vmx(vcpu)-nested.vmxon 
 + ((val  VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
 + return 1;
   return kvm_set_cr0(vcpu, val);
 + }
  }
  
  static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
  {
   if (is_guest_mode(vcpu)) {
 - if (kvm_set_cr4(vcpu, (val  vcpu-arch.cr4_guest_owned_bits) |
 -  (vcpu-arch.cr4  ~vcpu-arch.cr4_guest_owned_bits)))
 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 + unsigned long orig_val = val;
 +
 + val = (val  ~vmcs12-cr4_guest_host_mask) |
 + (vmcs_readl(GUEST_CR4)  vmcs12-cr4_guest_host_mask);
 + if ((val  VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)
 + return 1;
 +
 + if (kvm_set_cr4(vcpu, val))
   return 1;
 - vmcs_writel(CR4_READ_SHADOW, val);
 + vmcs_writel(CR4_READ_SHADOW, orig_val);
   return 0;
 - } else
 + } else {
 + if (to_vmx(vcpu)-nested.vmxon 
 + ((val  VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON))
 + return 1;
   return kvm_set_cr4(vcpu, val);
 + }
  }
  
  /* called to set cr0 as approriate for clts instruction exit. */
 -- 
 1.7.3.4

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS

2013-03-04 Thread Paolo Bonzini
Il 03/03/2013 13:04, Jan Kiszka ha scritto:
 From: Jan Kiszka jan.kis...@siemens.com
 
 Properly set those bits to 1 that the spec demands in case bit 55 of
 VMX_BASIC is 0 - like in our case.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  arch/x86/kvm/vmx.c |   13 ++---
  1 files changed, 10 insertions(+), 3 deletions(-)
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 631cdb3..c204f0d 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2050,21 +2050,28 @@ static __init void nested_vmx_setup_ctls_msrs(void)
   PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
   PIN_BASED_VIRTUAL_NMIS;
  
 - /* exit controls */
 - nested_vmx_exit_ctls_low = 0;
 + /*
 +  * Exit controls
 +  * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
 +  * 17 must be 1.
 +  */
 + nested_vmx_exit_ctls_low = 0x36dff;
   /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
  #ifdef CONFIG_X86_64
   nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
  #else
   nested_vmx_exit_ctls_high = 0;
  #endif
 + nested_vmx_exit_ctls_high |= 0x36dff;

Can you use nested_vmx_exit_ctls_low on the RHS, or define a constant?

   /* entry controls */
   rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
   nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
 - nested_vmx_entry_ctls_low = 0;
 + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
 + nested_vmx_entry_ctls_low = 0x11ff;
   nested_vmx_entry_ctls_high =
   VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
 + nested_vmx_entry_ctls_high |= 0x11ff;

Same here.

Paolo

   /* cpu-based controls */
   rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: Racy mp_state manipulations

2013-03-04 Thread Paolo Bonzini
Il 03/03/2013 17:48, Jan Kiszka ha scritto:
 Hi all,
 
 KVM's mp_state on x86 is usually manipulated over the context of the
 VCPU. Therefore, no locking is required. There are unfortunately two
 exceptions, and one of them is definitely broken: INIT and SIPI delivery.
 
 The lapic may set mp_state over the context of the sending VCPU. For
 SIPI, it first checks if the mp_state is INIT_RECEIVED before updating
 it to SIPI_RECEIVED. We can only race here with user space setting the
 state in parallel, I suppose. Probably harmless in practice.

Still it would be better to add an smp_wmb/smp_rmb pair between accesses
of mp_state and sipi_vector.

Also, Io
 What is critical is the update on INIT. That signal is asynchronous to
 the target VCPU state. And we can loose it:
 
 vcpu 1vcpu 2
 ----
 hlt;
 vmexit
   __apic_accept_irq(APIC_DM_INIT)
   mp_state = KVM_MP_STATE_INIT_RECEIVED
 mp_state = KVM_MP_STATE_HALTED
 
 And there it goes, our INIT state. I've triggered this under heavy INIT
 load and my nVMX patch for processing it while in VMXON.
 
 I'm currently considering options to fix this:
 
 - through a lock at mp_state manipulations, check under the lock that
   we don't perform invalid state transitions (e.g. INIT-HLT)
 - signal the INIT via some KVM_REQ_INIT to the target VCPU, fully
   localizing mp_state updates, the same could be done with SIPI, just
   to play safe
 
 I'm leaning toward the latter ATM, Any thoughts or other idea?

The latter makes sense since it's not a fast path, but the only
transition that is acceptable to KVM_MP_STATE_HALTED is from
KVM_MP_STATE_RUNNABLE:

from \ toRUNNABLE UNINIT  INIT HALTED   SIPI
RUNNABLE   n/a  yesyes   yes NO
UNINIT NO   n/ayes   NO  NO
INIT   NO   yesn/a   NO  yes
HALTED yes  yesyes   n/a NO
SIPI   yes  yesyes   NO  n/a

so for this particular bug it should also work to use a cmpxchg when
setting KVM_MP_STATE_HALTED.  Same for INIT-SIPI, since writes to
sipi_vector are harmless.

BTW, what happens when you send an INIT IPI to the bootstrap processor?
 This may be interesting if we want to emulate soft resets correctly in
QEMU; KVM makes it go to wait-for-SIPI state if I read the code
correctly, but that is wrong.

Paolo

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests

2013-03-04 Thread Paolo Bonzini
Il 03/03/2013 21:21, Jan Kiszka ha scritto:
 From: Jan Kiszka jan.kis...@siemens.com
 
 A VCPU sending INIT or SIPI to some other VCPU races for setting the
 remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED
 was overwritten by kvm_emulate_halt and, thus, got lost.
 
 Fix this by raising requests on the sender side that will then be
 handled synchronously over the target VCPU context.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
 
 Turned out to be simpler than expected. I'm no longer able to reproduce
 the race I saw before.
 
  arch/x86/kvm/lapic.c |9 -
  arch/x86/kvm/x86.c   |   16 +++-
  include/linux/kvm_host.h |2 ++
  3 files changed, 21 insertions(+), 6 deletions(-)
 
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 02b51dd..be1e37a 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
 delivery_mode,
   case APIC_DM_INIT:
   if (!trig_mode || level) {
   result = 1;
 - vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 - kvm_make_request(KVM_REQ_EVENT, vcpu);
 + kvm_make_request(KVM_REQ_INIT, vcpu);
   kvm_vcpu_kick(vcpu);
   } else {
   apic_debug(Ignoring de-assert INIT to vcpu %d\n,
 @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, 
 int delivery_mode,
   case APIC_DM_STARTUP:
   apic_debug(SIPI to vcpu %d vector 0x%02x\n,
  vcpu-vcpu_id, vector);
 - if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 + if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED ||
 + test_bit(KVM_REQ_INIT, vcpu-requests)) {
   result = 1;
   vcpu-arch.sipi_vector = vector;
 - vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 - kvm_make_request(KVM_REQ_EVENT, vcpu);
 + kvm_make_request(KVM_REQ_SIPI, vcpu);
   kvm_vcpu_kick(vcpu);
   }
   break;
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index d0cf737..8c8843c 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
   kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap);
  }
  
 +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu)
 +{
 + if (kvm_check_request(KVM_REQ_INIT, vcpu))
 + vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 + if (kvm_check_request(KVM_REQ_SIPI, vcpu) 
 + vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 + vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;

Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore?  Perhaps you can
call kvm_check_init_and_sipi from __vcpu_run, before the call to
kvm_vcpu_block (and move the reset from __vcpu_run to
kvm_check_init_and_sipi too)?  Then you do not even need to touch
kvm_arch_vcpu_runnable.

 + return true;
 + }
 + return false;
 +}
 +
  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  {
   int r;
 @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
   bool req_immediate_exit = 0;
  
   if (vcpu-requests) {
 + kvm_check_init_and_sipi(vcpu);

Does this need to return 1 if kvm_check_init_and_sipi returns 1?
Otherwise the guest is entered in INIT state.  I think.

Paolo

   if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
   kvm_mmu_unload(vcpu);
   if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
 @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
  
  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
  {
 + if (kvm_check_init_and_sipi(vcpu))
 + return 1;
   return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE 
   !vcpu-arch.apf.halted)
   || !list_empty_careful(vcpu-async_pf.done)
 - || vcpu-arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
   || atomic_read(vcpu-arch.nmi_queued) ||
   (kvm_arch_interrupt_allowed(vcpu) 
kvm_cpu_has_interrupt(vcpu));
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index 722cae7..1a191c9 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -124,6 +124,8 @@ static inline bool is_error_page(struct page *page)
  #define KVM_REQ_MCLOCK_INPROGRESS 20
  #define KVM_REQ_EPR_EXIT  21
  #define KVM_REQ_EOIBITMAP 22
 +#define KVM_REQ_INIT  23
 +#define KVM_REQ_SIPI  24
  
  #define KVM_USERSPACE_IRQ_SOURCE_ID  0
  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to 

Re: KVM: x86: Racy mp_state manipulations

2013-03-04 Thread Jan Kiszka
On 2013-03-04 15:12, Paolo Bonzini wrote:
 Il 03/03/2013 17:48, Jan Kiszka ha scritto:
 Hi all,

 KVM's mp_state on x86 is usually manipulated over the context of the
 VCPU. Therefore, no locking is required. There are unfortunately two
 exceptions, and one of them is definitely broken: INIT and SIPI delivery.

 The lapic may set mp_state over the context of the sending VCPU. For
 SIPI, it first checks if the mp_state is INIT_RECEIVED before updating
 it to SIPI_RECEIVED. We can only race here with user space setting the
 state in parallel, I suppose. Probably harmless in practice.
 
 Still it would be better to add an smp_wmb/smp_rmb pair between accesses
 of mp_state and sipi_vector.

Do we need a mb between sipi_vector assignment and kvm_make_request (see
my patch to fix this issue)?

 
 Also, Io
 What is critical is the update on INIT. That signal is asynchronous to
 the target VCPU state. And we can loose it:

 vcpu 1   vcpu 2
 --   --
 hlt;
 vmexit
  __apic_accept_irq(APIC_DM_INIT)
  mp_state = KVM_MP_STATE_INIT_RECEIVED
 mp_state = KVM_MP_STATE_HALTED

 And there it goes, our INIT state. I've triggered this under heavy INIT
 load and my nVMX patch for processing it while in VMXON.

 I'm currently considering options to fix this:

 - through a lock at mp_state manipulations, check under the lock that
   we don't perform invalid state transitions (e.g. INIT-HLT)
 - signal the INIT via some KVM_REQ_INIT to the target VCPU, fully
   localizing mp_state updates, the same could be done with SIPI, just
   to play safe

 I'm leaning toward the latter ATM, Any thoughts or other idea?
 
 The latter makes sense since it's not a fast path, but the only
 transition that is acceptable to KVM_MP_STATE_HALTED is from
 KVM_MP_STATE_RUNNABLE:
 
 from \ toRUNNABLE UNINIT  INIT HALTED   SIPI
 RUNNABLE   n/a  yesyes   yes NO
 UNINIT NO   n/ayes   NO  NO
 INIT   NO   yesn/a   NO  yes
 HALTED yes  yesyes   n/a NO
 SIPI   yes  yesyes   NO  n/a
 
 so for this particular bug it should also work to use a cmpxchg when
 setting KVM_MP_STATE_HALTED.  Same for INIT-SIPI, since writes to
 sipi_vector are harmless.

OK, but I already went for request bits. :)

 
 BTW, what happens when you send an INIT IPI to the bootstrap processor?
  This may be interesting if we want to emulate soft resets correctly in
 QEMU; KVM makes it go to wait-for-SIPI state if I read the code
 correctly, but that is wrong.

Where is this restriction specified? How do you reset the BP without
resetting the while system then?

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests

2013-03-04 Thread Jan Kiszka
On 2013-03-04 15:28, Paolo Bonzini wrote:
 Il 03/03/2013 21:21, Jan Kiszka ha scritto:
 From: Jan Kiszka jan.kis...@siemens.com

 A VCPU sending INIT or SIPI to some other VCPU races for setting the
 remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED
 was overwritten by kvm_emulate_halt and, thus, got lost.

 Fix this by raising requests on the sender side that will then be
 handled synchronously over the target VCPU context.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---

 Turned out to be simpler than expected. I'm no longer able to reproduce
 the race I saw before.

  arch/x86/kvm/lapic.c |9 -
  arch/x86/kvm/x86.c   |   16 +++-
  include/linux/kvm_host.h |2 ++
  3 files changed, 21 insertions(+), 6 deletions(-)

 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 02b51dd..be1e37a 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
 delivery_mode,
  case APIC_DM_INIT:
  if (!trig_mode || level) {
  result = 1;
 -vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 -kvm_make_request(KVM_REQ_EVENT, vcpu);
 +kvm_make_request(KVM_REQ_INIT, vcpu);
  kvm_vcpu_kick(vcpu);
  } else {
  apic_debug(Ignoring de-assert INIT to vcpu %d\n,
 @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, 
 int delivery_mode,
  case APIC_DM_STARTUP:
  apic_debug(SIPI to vcpu %d vector 0x%02x\n,
 vcpu-vcpu_id, vector);
 -if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 +if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED ||
 +test_bit(KVM_REQ_INIT, vcpu-requests)) {
  result = 1;
  vcpu-arch.sipi_vector = vector;
 -vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 -kvm_make_request(KVM_REQ_EVENT, vcpu);
 +kvm_make_request(KVM_REQ_SIPI, vcpu);
  kvm_vcpu_kick(vcpu);
  }
  break;
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index d0cf737..8c8843c 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
  kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap);
  }
  
 +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu)
 +{
 +if (kvm_check_request(KVM_REQ_INIT, vcpu))
 +vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 +if (kvm_check_request(KVM_REQ_SIPI, vcpu) 
 +vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 +vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 
 Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore?  Perhaps you can
 call kvm_check_init_and_sipi from __vcpu_run, before the call to
 kvm_vcpu_block (and move the reset from __vcpu_run to
 kvm_check_init_and_sipi too)?  Then you do not even need to touch
 kvm_arch_vcpu_runnable.

Haven't thought about this in details yet as I first wanted to fix
within the existing infrastructure. But maybe the change below requires
more refactoring anyway. Let's see.

 
 +return true;
 +}
 +return false;
 +}
 +
  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  {
  int r;
 @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  bool req_immediate_exit = 0;
  
  if (vcpu-requests) {
 +kvm_check_init_and_sipi(vcpu);
 
 Does this need to return 1 if kvm_check_init_and_sipi returns 1?
 Otherwise the guest is entered in INIT state.  I think.

Hmm, true... Need to refactor things a bit more as
kvm_check_init_and_sipi is designed to return true only for
wait-on-sipi-runnable transition.

Thanks,
Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode

2013-03-04 Thread Nadav Har'El
On Mon, Mar 04, 2013, Jan Kiszka wrote about Re: [PATCH] KVM: nVMX: Fix 
setting of CR0 and CR4 in guest mode:
   if (is_guest_mode(vcpu)) {
  -/*
  - * We get here when L2 changed cr0 in a way that did 
  not change
  - * any of L1's shadowed bits (see 
  nested_vmx_exit_handled_cr),
  - * but did change L0 shadowed bits. This can currently 
  happen
  - * with the TS bit: L0 may want to leave TS on (for 
  lazy fpu
  - * loading) while pretending to allow the guest to 
  change it.
  - */
  Can't say I understand this patch yet, but it looks like the comment is
  still valid. Why have you removed it?
 
  L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think
  the comment was always misleading.
 
  I do not see how it is misleading. For everything but TS we will not get
  here (if L1 is kvm). For TS we will get here if L1 allows L2 to change
  it, but L0 does not.
 
 For everything *but guest-owned* we will get here, thus for most CR0
 accesses (bit-wise, not regarding frequency).

For most CR0 bits, L1 (at least, a KVM one) will shadow (trap) them, so
we won't get to this point you modified at all... Instead,
nested_vmx_exit_handled_cr() would notice that a shadowed-by-L1 bit
was modified so an exit to L1 is required. We only get to that code
you changed if a bit was modified that L1 did *not* want to trap, but L0 did.
This is definitely not the bit-wise majority of the cases - unless you
have an L1 that does not trap most of the CR0 bits.

But I'm more worried about the actual code change :-) I didn't
understand if there's a situation where the existing code did something
wrong, or why it was wrong. Did you check the lazy-FPU-loading (TS bit)
aspect of your new code? To effectively check this, what I had to do
is to run on all of L0, L1, and L2, long runs of parallel make (make -j3) -
concurrently. Even code which doesn't do floating-point calculations uses
the FPU sometimes for its wide registers, so all these processes, guests
and guest's guests, compete for the FPU, exercising very well this code
path. If the TS bit is handled wrongly, some of these make processes
will die, when one of the compilations dies of SIGSEGV (forgetting to
set the FPU registers leads to some uninitialized pointers being used),
so it's quite easy to exercise this.

-- 
Nadav Har'El| Monday, Mar 4 2013, 22 Adar 5773
n...@math.technion.ac.il |-
Phone +972-523-790466, ICQ 13349191 |A witty saying proves nothing. --
http://nadav.harel.org.il   |Voltaire
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS

2013-03-04 Thread Nadav Har'El
On Sun, Mar 03, 2013, Jan Kiszka wrote about [PATCH] KVM: nVMX: Fix content of 
MSR_IA32_VMX_ENTRY/EXIT_CTLS:
   /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
  #ifdef CONFIG_X86_64
   nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
  #else
   nested_vmx_exit_ctls_high = 0;
  #endif
 + nested_vmx_exit_ctls_high |= 0x36dff;

Can you please compose this 0x36dff out of constants? Is
VM_EXIT_HOST_ADDR_SPACE_SIZE one of them?

It's important to verify that we actually support all these bits - even
if we *should* support them, it doesn't mean we actually do (but if we
do, we should say we do).

 - nested_vmx_entry_ctls_low = 0;
 + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
 + nested_vmx_entry_ctls_low = 0x11ff;

Setting nested_vmx_entry_ctls_low = 0 just means that although the spec
says only 1 setting is supported, we *also* support 0 setting. I'm not
sure why this is a bad thing. Our VMX will be even better than the
real processors' ;-)


-- 
Nadav Har'El| Monday, Mar 4 2013, 22 Adar 5773
n...@math.technion.ac.il |-
Phone +972-523-790466, ICQ 13349191 |My opinions may have changed, but not the
http://nadav.harel.org.il   |fact that I am right.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS

2013-03-04 Thread Jan Kiszka
Properly set those bits to 1 that the spec demands in case bit 55 of
VMX_BASIC is 0 - like in our case.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

Changes in v2:
 - use symbolic constants

 arch/x86/include/asm/vmx.h |4 
 arch/x86/kvm/vmx.c |   13 ++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index f6b599b..a0c2ad2 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -169,6 +169,8 @@
 #define VM_EXIT_LOAD_IA32_EFER  0x0020
 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER   0x0040
 
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR  0x00036dff
+
 #define VM_ENTRY_LOAD_DEBUG_CONTROLS0x0002
 #define VM_ENTRY_IA32E_MODE 0x0200
 #define VM_ENTRY_SMM0x0400
@@ -177,6 +179,8 @@
 #define VM_ENTRY_LOAD_IA32_PAT 0x4000
 #define VM_ENTRY_LOAD_IA32_EFER 0x8000
 
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x11ff
+
 #define VMX_MISC_SAVE_EFER_LMA 0x0020
 
 /* VMCS Encodings */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 631cdb3..1f917de 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2050,21 +2050,28 @@ static __init void nested_vmx_setup_ctls_msrs(void)
PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS;
 
-   /* exit controls */
-   nested_vmx_exit_ctls_low = 0;
+   /*
+* Exit controls
+* If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
+* 17 must be 1.
+*/
+   nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
/* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
 #ifdef CONFIG_X86_64
nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #else
nested_vmx_exit_ctls_high = 0;
 #endif
+   nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
-   nested_vmx_entry_ctls_low = 0;
+   /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
+   nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
nested_vmx_entry_ctls_high =
VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
+   nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
-- 
1.7.3.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS

2013-03-04 Thread Paolo Bonzini
Il 04/03/2013 17:15, Jan Kiszka ha scritto:
 Properly set those bits to 1 that the spec demands in case bit 55 of
 VMX_BASIC is 0 - like in our case.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com

Reviewed-by: Paolo Bonzini pbonz...@redhat.com


 ---
 
 Changes in v2:
  - use symbolic constants
 
  arch/x86/include/asm/vmx.h |4 
  arch/x86/kvm/vmx.c |   13 ++---
  2 files changed, 14 insertions(+), 3 deletions(-)
 
 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
 index f6b599b..a0c2ad2 100644
 --- a/arch/x86/include/asm/vmx.h
 +++ b/arch/x86/include/asm/vmx.h
 @@ -169,6 +169,8 @@
  #define VM_EXIT_LOAD_IA32_EFER  0x0020
  #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER   0x0040
  
 +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR0x00036dff
 +
  #define VM_ENTRY_LOAD_DEBUG_CONTROLS0x0002
  #define VM_ENTRY_IA32E_MODE 0x0200
  #define VM_ENTRY_SMM0x0400
 @@ -177,6 +179,8 @@
  #define VM_ENTRY_LOAD_IA32_PAT   0x4000
  #define VM_ENTRY_LOAD_IA32_EFER 0x8000
  
 +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR   0x11ff
 +
  #define VMX_MISC_SAVE_EFER_LMA   0x0020
  
  /* VMCS Encodings */
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 631cdb3..1f917de 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2050,21 +2050,28 @@ static __init void nested_vmx_setup_ctls_msrs(void)
   PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
   PIN_BASED_VIRTUAL_NMIS;
  
 - /* exit controls */
 - nested_vmx_exit_ctls_low = 0;
 + /*
 +  * Exit controls
 +  * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
 +  * 17 must be 1.
 +  */
 + nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
   /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
  #ifdef CONFIG_X86_64
   nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
  #else
   nested_vmx_exit_ctls_high = 0;
  #endif
 + nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
  
   /* entry controls */
   rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
   nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
 - nested_vmx_entry_ctls_low = 0;
 + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
 + nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
   nested_vmx_entry_ctls_high =
   VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
 + nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
  
   /* cpu-based controls */
   rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 0/2] kvm: Better yield_to candidate using preemption notifiers

2013-03-04 Thread Raghavendra K T
 This patch series further filters better vcpu candidate to yield to
in PLE handler. The main idea is to record the preempted vcpus using
preempt notifiers and iterate only those preempted vcpus in the
handler. Note that the vcpus which were in spinloop during pause loop
exit are already filtered.

Thanks Jiannan, Avi for bringing the idea and Gleb, PeterZ for
precious suggestions during the discussion. 
Thanks Srikar for suggesting to avoid rcu lock while checking task state
that has improved overcommit cases.

There are basically two approches for the implementation.

Method 1: Uses per vcpu preempt flag (this series).

Method 2: We keep a bitmap of preempted vcpus. using this we can easily
iterate over preempted vcpus.

Note that method 2 needs an extra index variable to identify/map bitmap to
vcpu and it also needs static vcpu allocation.

I am also posting Method 2 approach for reference in case it interests.

Result: decent improvement for kernbench and ebizzy.

base = 3.8.0 + undercommit patches 
patched = base + preempt patches

Tested on 32 core (no HT) mx3850 machine with 32 vcpu guest 8GB RAM

--+---+---+---++---+
   kernbench (exec time in sec lower is beter) 
--+---+---+---++---+
  base   stdev   patched   stdev  %improve 
--+---+---+---++---+
1x47.0383 4.6977 44.2584 1.2899 5.90986
2x96.0071 7.1873 91.2605 7.3567 4.94401
3x   164.015710.3613156.675011.4267 4.47561
4x   212.576823.7326204.480013.2908 3.80888
--+---+---+---++---+
no ple kernbench 1x result for reference: 46.056133

--+---+---+---++---+
   ebizzy (record/sec higher is better)
--+---+---+---++---+
  base   stdev   patched   stdev  %improve 
--+---+---+---++---+
1x  5609.200056.93436263.700064.7097 11.66833
2x  2071.9000   108.48292653.5000   181.8395 28.07085
3x  1557.4167   109.71411993.5000   166.3176 28.00043
4x  1254.750091.29971765.5000   237.5410 40.70532
--+---+---+---++---+
no ple ebizzy 1x result for reference : 7394.9 rec/sec

Please let me know if you have any suggestions and comments.

Raghavendra K T (2):
   kvm: Record the preemption status of vcpus using preempt notifiers
   kvm: Iterate over only vcpus that are preempted


 include/linux/kvm_host.h | 1 +
 virt/kvm/kvm_main.c  | 7 +++
 2 files changed, 8 insertions(+)
 
Reference patch for Method 2
---8---
Use preempt bitmap and optimize vcpu iteration using preempt notifiers

From: Raghavendra K T raghavendra...@linux.vnet.ibm.com

Record the preempted vcpus in a bit map using preempt notifiers.
Add the logic of iterating over only preempted vcpus thus making
vcpu iteration fast.
Thanks Jiannan, Avi for initially proposing patch. Gleb, Peter for
precious suggestions.
Thanks srikar for suggesting to remove rcu lock while checking
task state that helped in reducing overcommit overhead

Not-yet-signed-off-by: Raghavendra K T raghavendra...@linux.vnet.ibm.com
---
 include/linux/kvm_host.h |7 +++
 virt/kvm/kvm_main.c  |   15 ---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cad77fe..8c4a2409 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -252,6 +252,7 @@ struct kvm_vcpu {
bool dy_eligible;
} spin_loop;
 #endif
+   int idx;
struct kvm_vcpu_arch arch;
 };
 
@@ -385,6 +386,7 @@ struct kvm {
long mmu_notifier_count;
 #endif
long tlbs_dirty;
+   DECLARE_BITMAP(preempt_bitmap, KVM_MAX_VCPUS);
 };
 
 #define kvm_err(fmt, ...) \
@@ -413,6 +415,11 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm 
*kvm, int i)
 (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
 idx++)
 
+#define kvm_for_each_preempted_vcpu(idx, vcpup, kvm, n) \
+   for (idx = find_first_bit(kvm-preempt_bitmap, KVM_MAX_VCPUS); \
+idx  n  (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
+idx = find_next_bit(kvm-preempt_bitmap, KVM_MAX_VCPUS, idx+1))
+
 #define kvm_for_each_memslot(memslot, slots)   \
for (memslot = slots-memslots[0]; \
  memslot  slots-memslots + KVM_MEM_SLOTS_NUM  memslot-npages;\
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index adc68fe..1db16b3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1770,10 +1770,12 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
struct kvm_vcpu *vcpu;
int last_boosted_vcpu = me-kvm-last_boosted_vcpu;
int yielded = 0;
+   

Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests

2013-03-04 Thread Gleb Natapov
On Sun, Mar 03, 2013 at 09:21:43PM +0100, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com
 
 A VCPU sending INIT or SIPI to some other VCPU races for setting the
 remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED
 was overwritten by kvm_emulate_halt and, thus, got lost.
 
 Fix this by raising requests on the sender side that will then be
 handled synchronously over the target VCPU context.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
 
 Turned out to be simpler than expected. I'm no longer able to reproduce
 the race I saw before.
 
  arch/x86/kvm/lapic.c |9 -
  arch/x86/kvm/x86.c   |   16 +++-
  include/linux/kvm_host.h |2 ++
  3 files changed, 21 insertions(+), 6 deletions(-)
 
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 02b51dd..be1e37a 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
 delivery_mode,
   case APIC_DM_INIT:
   if (!trig_mode || level) {
   result = 1;
 - vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 - kvm_make_request(KVM_REQ_EVENT, vcpu);
 + kvm_make_request(KVM_REQ_INIT, vcpu);
   kvm_vcpu_kick(vcpu);
   } else {
   apic_debug(Ignoring de-assert INIT to vcpu %d\n,
 @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, 
 int delivery_mode,
   case APIC_DM_STARTUP:
   apic_debug(SIPI to vcpu %d vector 0x%02x\n,
  vcpu-vcpu_id, vector);
 - if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 + if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED ||
 + test_bit(KVM_REQ_INIT, vcpu-requests)) {
   result = 1;
   vcpu-arch.sipi_vector = vector;
 - vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 - kvm_make_request(KVM_REQ_EVENT, vcpu);
 + kvm_make_request(KVM_REQ_SIPI, vcpu);
   kvm_vcpu_kick(vcpu);
   }
   break;
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index d0cf737..8c8843c 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
   kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap);
  }
  
 +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu)
 +{
 + if (kvm_check_request(KVM_REQ_INIT, vcpu))
 + vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 + if (kvm_check_request(KVM_REQ_SIPI, vcpu) 
 + vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 + vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 + return true;
 + }
 + return false;
 +}
 +
  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  {
   int r;
 @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
   bool req_immediate_exit = 0;
  
   if (vcpu-requests) {
 + kvm_check_init_and_sipi(vcpu);
   if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
   kvm_mmu_unload(vcpu);
   if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
 @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
  
  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
  {
 + if (kvm_check_init_and_sipi(vcpu))
 + return 1;
   return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE 
   !vcpu-arch.apf.halted)
   || !list_empty_careful(vcpu-async_pf.done)
 - || vcpu-arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
   || atomic_read(vcpu-arch.nmi_queued) ||
   (kvm_arch_interrupt_allowed(vcpu) 
kvm_cpu_has_interrupt(vcpu));
This makes two subsequent calls to kvm_arch_vcpu_runnable() return
different values if SIPI is pending. While it may not cause problem to
current code (I haven't thought it through) with such semantics you
gonna have a bad time.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests

2013-03-04 Thread Jan Kiszka
On 2013-03-04 19:08, Gleb Natapov wrote:
 On Sun, Mar 03, 2013 at 09:21:43PM +0100, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com

 A VCPU sending INIT or SIPI to some other VCPU races for setting the
 remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED
 was overwritten by kvm_emulate_halt and, thus, got lost.

 Fix this by raising requests on the sender side that will then be
 handled synchronously over the target VCPU context.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---

 Turned out to be simpler than expected. I'm no longer able to reproduce
 the race I saw before.

  arch/x86/kvm/lapic.c |9 -
  arch/x86/kvm/x86.c   |   16 +++-
  include/linux/kvm_host.h |2 ++
  3 files changed, 21 insertions(+), 6 deletions(-)

 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 02b51dd..be1e37a 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
 delivery_mode,
  case APIC_DM_INIT:
  if (!trig_mode || level) {
  result = 1;
 -vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 -kvm_make_request(KVM_REQ_EVENT, vcpu);
 +kvm_make_request(KVM_REQ_INIT, vcpu);
  kvm_vcpu_kick(vcpu);
  } else {
  apic_debug(Ignoring de-assert INIT to vcpu %d\n,
 @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, 
 int delivery_mode,
  case APIC_DM_STARTUP:
  apic_debug(SIPI to vcpu %d vector 0x%02x\n,
 vcpu-vcpu_id, vector);
 -if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 +if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED ||
 +test_bit(KVM_REQ_INIT, vcpu-requests)) {
  result = 1;
  vcpu-arch.sipi_vector = vector;
 -vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 -kvm_make_request(KVM_REQ_EVENT, vcpu);
 +kvm_make_request(KVM_REQ_SIPI, vcpu);
  kvm_vcpu_kick(vcpu);
  }
  break;
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index d0cf737..8c8843c 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
  kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap);
  }
  
 +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu)
 +{
 +if (kvm_check_request(KVM_REQ_INIT, vcpu))
 +vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 +if (kvm_check_request(KVM_REQ_SIPI, vcpu) 
 +vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 +vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 +return true;
 +}
 +return false;
 +}
 +
  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  {
  int r;
 @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  bool req_immediate_exit = 0;
  
  if (vcpu-requests) {
 +kvm_check_init_and_sipi(vcpu);
  if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
  kvm_mmu_unload(vcpu);
  if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
 @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
  
  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
  {
 +if (kvm_check_init_and_sipi(vcpu))
 +return 1;
  return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE 
  !vcpu-arch.apf.halted)
  || !list_empty_careful(vcpu-async_pf.done)
 -|| vcpu-arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
  || atomic_read(vcpu-arch.nmi_queued) ||
  (kvm_arch_interrupt_allowed(vcpu) 
   kvm_cpu_has_interrupt(vcpu));
 This makes two subsequent calls to kvm_arch_vcpu_runnable() return
 different values if SIPI is pending. While it may not cause problem to
 current code (I haven't thought it through) with such semantics you
 gonna have a bad time.

If I manage to follow Paolo's suggestion to eliminate the SIPI_RECEIVED
state and all the staged logic around it, that might change. Will be
more invasive but likely cleaner in its result.

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode

2013-03-04 Thread Gleb Natapov
On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote:
 On 2013-03-04 18:56, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote:
  On 2013-03-04 15:15, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote:
  On 2013-03-04 14:22, Gleb Natapov wrote:
  On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote:
  The logic for calculating the value with which we call kvm_set_cr0/4 
  was
  broken (will definitely be visible with nested unrestricted guest mode
  support). Also, we performed the check regarding CR0_ALWAYSON too early
  when in guest mode.
 
  What really needs to be done on both CR0 and CR4 is to mask out 
  L1-owned
  bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and
  arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus,
  are not suited as input.
 
  For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON 
  and
  refuse the update if it fails. To be fully consistent, we implement 
  this
  check now also for CR4.
 
  Finally, we have to set the shadow to the value L2 wanted to write
  originally.
 
  Signed-off-by: Jan Kiszka jan.kis...@siemens.com
  ---
 
  Found while making unrestricted guest mode working. Not sure what 
  impact
  the bugs had on current feature level, if any.
 
  For interested folks, I've pushed my nEPT environment here:
 
  git://git.kiszka.org/linux-kvm.git nept-hacking
 
   arch/x86/kvm/vmx.c |   49 
  ++---
   1 files changed, 30 insertions(+), 19 deletions(-)
 
  diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
  index 7cc566b..d1dac08 100644
  --- a/arch/x86/kvm/vmx.c
  +++ b/arch/x86/kvm/vmx.c
  @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, 
  unsigned char *hypercall)
   /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
   static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
   {
  -  if (to_vmx(vcpu)-nested.vmxon 
  -  ((val  VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
  -  return 1;
  -
 if (is_guest_mode(vcpu)) {
  -  /*
  -   * We get here when L2 changed cr0 in a way that did 
  not change
  -   * any of L1's shadowed bits (see 
  nested_vmx_exit_handled_cr),
  -   * but did change L0 shadowed bits. This can currently 
  happen
  -   * with the TS bit: L0 may want to leave TS on (for 
  lazy fpu
  -   * loading) while pretending to allow the guest to 
  change it.
  -   */
  Can't say I understand this patch yet, but it looks like the comment is
  still valid. Why have you removed it?
 
  L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think
  the comment was always misleading.
 
  I do not see how it is misleading. For everything but TS we will not get
  here (if L1 is kvm). For TS we will get here if L1 allows L2 to change
  it, but L0 does not.
 
  For everything *but guest-owned* we will get here, thus for most CR0
  accesses (bit-wise, not regarding frequency).
 
  I do not see how. If bit is trapped by L1 we will not get here. We will
  do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this condition.
  I am not arguing about you code (didn't grok it yet), but the comment
  still make sense to me.
 
 We get here when L2 changed cr0 in a way that did not change any of
 L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0
 shadowed bits. That I can sign. But the rest about TS is just
 misleading as we trap _every_ change in L0 - except for TS under certain
 conditions. The old code was tested against TS only, that's what the
 comment witness.
 
TS is just an example of how we can get here with KVM on KVM. Obviously
other hypervisors may have different configuration. L2 may allow full
guest access to CR0 and then each CR0 write by L2 will be handled here.
Under what other condition we trap _every_ change in L0 - except for
TS here?

 If you prefer, I'll leave part one in.
 
Please do so. Without the comment it is not obvious why exit condition
is not checked here. Still do not see why you object to TS part.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode

2013-03-04 Thread Jan Kiszka
On 2013-03-04 19:39, Gleb Natapov wrote:
 On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote:
 On 2013-03-04 18:56, Gleb Natapov wrote:
 On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote:
 On 2013-03-04 15:15, Gleb Natapov wrote:
 On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote:
 On 2013-03-04 14:22, Gleb Natapov wrote:
 On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote:
 The logic for calculating the value with which we call kvm_set_cr0/4 
 was
 broken (will definitely be visible with nested unrestricted guest mode
 support). Also, we performed the check regarding CR0_ALWAYSON too early
 when in guest mode.

 What really needs to be done on both CR0 and CR4 is to mask out 
 L1-owned
 bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and
 arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus,
 are not suited as input.

 For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON 
 and
 refuse the update if it fails. To be fully consistent, we implement 
 this
 check now also for CR4.

 Finally, we have to set the shadow to the value L2 wanted to write
 originally.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---

 Found while making unrestricted guest mode working. Not sure what 
 impact
 the bugs had on current feature level, if any.

 For interested folks, I've pushed my nEPT environment here:

 git://git.kiszka.org/linux-kvm.git nept-hacking

  arch/x86/kvm/vmx.c |   49 
 ++---
  1 files changed, 30 insertions(+), 19 deletions(-)

 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 7cc566b..d1dac08 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, 
 unsigned char *hypercall)
  /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
  static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
  {
 -  if (to_vmx(vcpu)-nested.vmxon 
 -  ((val  VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
 -  return 1;
 -
if (is_guest_mode(vcpu)) {
 -  /*
 -   * We get here when L2 changed cr0 in a way that did 
 not change
 -   * any of L1's shadowed bits (see 
 nested_vmx_exit_handled_cr),
 -   * but did change L0 shadowed bits. This can currently 
 happen
 -   * with the TS bit: L0 may want to leave TS on (for 
 lazy fpu
 -   * loading) while pretending to allow the guest to 
 change it.
 -   */
 Can't say I understand this patch yet, but it looks like the comment is
 still valid. Why have you removed it?

 L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think
 the comment was always misleading.

 I do not see how it is misleading. For everything but TS we will not get
 here (if L1 is kvm). For TS we will get here if L1 allows L2 to change
 it, but L0 does not.

 For everything *but guest-owned* we will get here, thus for most CR0
 accesses (bit-wise, not regarding frequency).

 I do not see how. If bit is trapped by L1 we will not get here. We will
 do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this condition.
 I am not arguing about you code (didn't grok it yet), but the comment
 still make sense to me.

 We get here when L2 changed cr0 in a way that did not change any of
 L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0
 shadowed bits. That I can sign. But the rest about TS is just
 misleading as we trap _every_ change in L0 - except for TS under certain
 conditions. The old code was tested against TS only, that's what the
 comment witness.

 TS is just an example of how we can get here with KVM on KVM. Obviously
 other hypervisors may have different configuration. L2 may allow full
 guest access to CR0 and then each CR0 write by L2 will be handled here.
 Under what other condition we trap _every_ change in L0 - except for
 TS here?

On FPU activation:

cr0_guest_owned_bits = X86_CR0_TS;

And on FPU deactivation:

cr0_guest_owned_bits = 0;

 
 If you prefer, I'll leave part one in.

 Please do so. Without the comment it is not obvious why exit condition
 is not checked here. Still do not see why you object to TS part.

It describes a corner case in a way that suggests this is the only
reason why we get here.

Jan




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode

2013-03-04 Thread Gleb Natapov
On Mon, Mar 04, 2013 at 08:23:52PM +0100, Jan Kiszka wrote:
 On 2013-03-04 19:39, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote:
  On 2013-03-04 18:56, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote:
  On 2013-03-04 15:15, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote:
  On 2013-03-04 14:22, Gleb Natapov wrote:
  On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote:
  The logic for calculating the value with which we call kvm_set_cr0/4 
  was
  broken (will definitely be visible with nested unrestricted guest 
  mode
  support). Also, we performed the check regarding CR0_ALWAYSON too 
  early
  when in guest mode.
 
  What really needs to be done on both CR0 and CR4 is to mask out 
  L1-owned
  bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and
  arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, 
  thus,
  are not suited as input.
 
  For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON 
  and
  refuse the update if it fails. To be fully consistent, we implement 
  this
  check now also for CR4.
 
  Finally, we have to set the shadow to the value L2 wanted to write
  originally.
 
  Signed-off-by: Jan Kiszka jan.kis...@siemens.com
  ---
 
  Found while making unrestricted guest mode working. Not sure what 
  impact
  the bugs had on current feature level, if any.
 
  For interested folks, I've pushed my nEPT environment here:
 
  git://git.kiszka.org/linux-kvm.git nept-hacking
 
   arch/x86/kvm/vmx.c |   49 
  ++---
   1 files changed, 30 insertions(+), 19 deletions(-)
 
  diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
  index 7cc566b..d1dac08 100644
  --- a/arch/x86/kvm/vmx.c
  +++ b/arch/x86/kvm/vmx.c
  @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, 
  unsigned char *hypercall)
   /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
   static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
   {
  -if (to_vmx(vcpu)-nested.vmxon 
  -((val  VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
  -return 1;
  -
   if (is_guest_mode(vcpu)) {
  -/*
  - * We get here when L2 changed cr0 in a way that did 
  not change
  - * any of L1's shadowed bits (see 
  nested_vmx_exit_handled_cr),
  - * but did change L0 shadowed bits. This can currently 
  happen
  - * with the TS bit: L0 may want to leave TS on (for 
  lazy fpu
  - * loading) while pretending to allow the guest to 
  change it.
  - */
  Can't say I understand this patch yet, but it looks like the comment 
  is
  still valid. Why have you removed it?
 
  L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think
  the comment was always misleading.
 
  I do not see how it is misleading. For everything but TS we will not get
  here (if L1 is kvm). For TS we will get here if L1 allows L2 to change
  it, but L0 does not.
 
  For everything *but guest-owned* we will get here, thus for most CR0
  accesses (bit-wise, not regarding frequency).
 
  I do not see how. If bit is trapped by L1 we will not get here. We will
  do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this 
  condition.
  I am not arguing about you code (didn't grok it yet), but the comment
  still make sense to me.
 
  We get here when L2 changed cr0 in a way that did not change any of
  L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0
  shadowed bits. That I can sign. But the rest about TS is just
  misleading as we trap _every_ change in L0 - except for TS under certain
  conditions. The old code was tested against TS only, that's what the
  comment witness.
 
  TS is just an example of how we can get here with KVM on KVM. Obviously
  other hypervisors may have different configuration. L2 may allow full
  guest access to CR0 and then each CR0 write by L2 will be handled here.
  Under what other condition we trap _every_ change in L0 - except for
  TS here?
 
 On FPU activation:
 
 cr0_guest_owned_bits = X86_CR0_TS;
 
 And on FPU deactivation:
 
 cr0_guest_owned_bits = 0;
 
That's exactly TS case that comment explains. Note that
CR0_GUEST_HOST_MASK = ~cr0_guest_owned_bits.

  
  If you prefer, I'll leave part one in.
 
  Please do so. Without the comment it is not obvious why exit condition
  is not checked here. Still do not see why you object to TS part.
 
 It describes a corner case in a way that suggests this is the only
 reason why we get here.
 
For KVM on KVM it is.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode

2013-03-04 Thread Jan Kiszka
On 2013-03-04 21:00, Gleb Natapov wrote:
 On Mon, Mar 04, 2013 at 08:37:38PM +0100, Jan Kiszka wrote:
 On 2013-03-04 20:33, Gleb Natapov wrote:
 On Mon, Mar 04, 2013 at 08:23:52PM +0100, Jan Kiszka wrote:
 On 2013-03-04 19:39, Gleb Natapov wrote:
 On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote:
 On 2013-03-04 18:56, Gleb Natapov wrote:
 On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote:
 On 2013-03-04 15:15, Gleb Natapov wrote:
 On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote:
 On 2013-03-04 14:22, Gleb Natapov wrote:
 On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote:
 The logic for calculating the value with which we call 
 kvm_set_cr0/4 was
 broken (will definitely be visible with nested unrestricted guest 
 mode
 support). Also, we performed the check regarding CR0_ALWAYSON too 
 early
 when in guest mode.

 What really needs to be done on both CR0 and CR4 is to mask out 
 L1-owned
 bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 
 and
 arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, 
 thus,
 are not suited as input.

 For both CRs, we can then apply the check against 
 VMXON_CRx_ALWAYSON and
 refuse the update if it fails. To be fully consistent, we 
 implement this
 check now also for CR4.

 Finally, we have to set the shadow to the value L2 wanted to write
 originally.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---

 Found while making unrestricted guest mode working. Not sure what 
 impact
 the bugs had on current feature level, if any.

 For interested folks, I've pushed my nEPT environment here:

 git://git.kiszka.org/linux-kvm.git nept-hacking

  arch/x86/kvm/vmx.c |   49 
 ++---
  1 files changed, 30 insertions(+), 19 deletions(-)

 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 7cc566b..d1dac08 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, 
 unsigned char *hypercall)
  /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
  static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long 
 val)
  {
 -  if (to_vmx(vcpu)-nested.vmxon 
 -  ((val  VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
 -  return 1;
 -
if (is_guest_mode(vcpu)) {
 -  /*
 -   * We get here when L2 changed cr0 in a way that did 
 not change
 -   * any of L1's shadowed bits (see 
 nested_vmx_exit_handled_cr),
 -   * but did change L0 shadowed bits. This can currently 
 happen
 -   * with the TS bit: L0 may want to leave TS on (for 
 lazy fpu
 -   * loading) while pretending to allow the guest to 
 change it.
 -   */
 Can't say I understand this patch yet, but it looks like the 
 comment is
 still valid. Why have you removed it?

 L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think
 the comment was always misleading.

 I do not see how it is misleading. For everything but TS we will not 
 get
 here (if L1 is kvm). For TS we will get here if L1 allows L2 to change
 it, but L0 does not.

 For everything *but guest-owned* we will get here, thus for most CR0
 accesses (bit-wise, not regarding frequency).

 I do not see how. If bit is trapped by L1 we will not get here. We will
 do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this 
 condition.
 I am not arguing about you code (didn't grok it yet), but the comment
 still make sense to me.

 We get here when L2 changed cr0 in a way that did not change any of
 L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0
 shadowed bits. That I can sign. But the rest about TS is just
 misleading as we trap _every_ change in L0 - except for TS under certain
 conditions. The old code was tested against TS only, that's what the
 comment witness.

 TS is just an example of how we can get here with KVM on KVM. Obviously
 other hypervisors may have different configuration. L2 may allow full
 guest access to CR0 and then each CR0 write by L2 will be handled here.
 Under what other condition we trap _every_ change in L0 - except for
 TS here?

 On FPU activation:

 cr0_guest_owned_bits = X86_CR0_TS;

 And on FPU deactivation:

 cr0_guest_owned_bits = 0;

 That's exactly TS case that comment explains. Note that
 CR0_GUEST_HOST_MASK = ~cr0_guest_owned_bits.

 Again, it's the inverse of what the comment suggest: we enter
 handle_set_cr0 for every change on CR0 that doesn't match the shadow -
 except TS was given to the guest by both L1 and L0 (or TS isn't changed
 as well).
 That doesn't make sense to me. I do not even sure what you are saying
 since you do not specify what shadow is matched. From the code I see
 that on CR0 exit to L0 from L2 we check if L2 tries to change CR0 bits
 that L1 claims to belong to it and do #vmexit to L1 if it is:
 
if (vmcs12-cr0_guest_host_mask  (val ^ vmcs12-cr0_read_shadow))
 return 1;
 
 We never reach 

Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode

2013-03-04 Thread Gleb Natapov
On Mon, Mar 04, 2013 at 09:12:25PM +0100, Jan Kiszka wrote:
 On 2013-03-04 21:00, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 08:37:38PM +0100, Jan Kiszka wrote:
  On 2013-03-04 20:33, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 08:23:52PM +0100, Jan Kiszka wrote:
  On 2013-03-04 19:39, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote:
  On 2013-03-04 18:56, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote:
  On 2013-03-04 15:15, Gleb Natapov wrote:
  On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote:
  On 2013-03-04 14:22, Gleb Natapov wrote:
  On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote:
  The logic for calculating the value with which we call 
  kvm_set_cr0/4 was
  broken (will definitely be visible with nested unrestricted 
  guest mode
  support). Also, we performed the check regarding CR0_ALWAYSON 
  too early
  when in guest mode.
 
  What really needs to be done on both CR0 and CR4 is to mask out 
  L1-owned
  bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 
  and
  arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, 
  thus,
  are not suited as input.
 
  For both CRs, we can then apply the check against 
  VMXON_CRx_ALWAYSON and
  refuse the update if it fails. To be fully consistent, we 
  implement this
  check now also for CR4.
 
  Finally, we have to set the shadow to the value L2 wanted to 
  write
  originally.
 
  Signed-off-by: Jan Kiszka jan.kis...@siemens.com
  ---
 
  Found while making unrestricted guest mode working. Not sure 
  what impact
  the bugs had on current feature level, if any.
 
  For interested folks, I've pushed my nEPT environment here:
 
  git://git.kiszka.org/linux-kvm.git nept-hacking
 
   arch/x86/kvm/vmx.c |   49 
  ++---
   1 files changed, 30 insertions(+), 19 deletions(-)
 
  diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
  index 7cc566b..d1dac08 100644
  --- a/arch/x86/kvm/vmx.c
  +++ b/arch/x86/kvm/vmx.c
  @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu 
  *vcpu, unsigned char *hypercall)
   /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
   static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long 
  val)
   {
  -if (to_vmx(vcpu)-nested.vmxon 
  -((val  VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
  -return 1;
  -
   if (is_guest_mode(vcpu)) {
  -/*
  - * We get here when L2 changed cr0 in a way 
  that did not change
  - * any of L1's shadowed bits (see 
  nested_vmx_exit_handled_cr),
  - * but did change L0 shadowed bits. This can 
  currently happen
  - * with the TS bit: L0 may want to leave TS on 
  (for lazy fpu
  - * loading) while pretending to allow the guest 
  to change it.
  - */
  Can't say I understand this patch yet, but it looks like the 
  comment is
  still valid. Why have you removed it?
 
  L0 allows L1 or L2 at most to own TS, the rest is host-owned. I 
  think
  the comment was always misleading.
 
  I do not see how it is misleading. For everything but TS we will 
  not get
  here (if L1 is kvm). For TS we will get here if L1 allows L2 to 
  change
  it, but L0 does not.
 
  For everything *but guest-owned* we will get here, thus for most CR0
  accesses (bit-wise, not regarding frequency).
 
  I do not see how. If bit is trapped by L1 we will not get here. We 
  will
  do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this 
  condition.
  I am not arguing about you code (didn't grok it yet), but the comment
  still make sense to me.
 
  We get here when L2 changed cr0 in a way that did not change any of
  L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0
  shadowed bits. That I can sign. But the rest about TS is just
  misleading as we trap _every_ change in L0 - except for TS under 
  certain
  conditions. The old code was tested against TS only, that's what the
  comment witness.
 
  TS is just an example of how we can get here with KVM on KVM. Obviously
  other hypervisors may have different configuration. L2 may allow full
  guest access to CR0 and then each CR0 write by L2 will be handled here.
  Under what other condition we trap _every_ change in L0 - except for
  TS here?
 
  On FPU activation:
 
  cr0_guest_owned_bits = X86_CR0_TS;
 
  And on FPU deactivation:
 
  cr0_guest_owned_bits = 0;
 
  That's exactly TS case that comment explains. Note that
  CR0_GUEST_HOST_MASK = ~cr0_guest_owned_bits.
 
  Again, it's the inverse of what the comment suggest: we enter
  handle_set_cr0 for every change on CR0 that doesn't match the shadow -
  except TS was given to the guest by both L1 and L0 (or TS isn't changed
  as well).
  That doesn't make sense to me. I do not even sure what you are saying
  since you do not specify what shadow is 

Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests

2013-03-04 Thread Jan Kiszka
On 2013-03-04 15:28, Paolo Bonzini wrote:
 Il 03/03/2013 21:21, Jan Kiszka ha scritto:
 From: Jan Kiszka jan.kis...@siemens.com

 A VCPU sending INIT or SIPI to some other VCPU races for setting the
 remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED
 was overwritten by kvm_emulate_halt and, thus, got lost.

 Fix this by raising requests on the sender side that will then be
 handled synchronously over the target VCPU context.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---

 Turned out to be simpler than expected. I'm no longer able to reproduce
 the race I saw before.

  arch/x86/kvm/lapic.c |9 -
  arch/x86/kvm/x86.c   |   16 +++-
  include/linux/kvm_host.h |2 ++
  3 files changed, 21 insertions(+), 6 deletions(-)

 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 02b51dd..be1e37a 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
 delivery_mode,
  case APIC_DM_INIT:
  if (!trig_mode || level) {
  result = 1;
 -vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 -kvm_make_request(KVM_REQ_EVENT, vcpu);
 +kvm_make_request(KVM_REQ_INIT, vcpu);
  kvm_vcpu_kick(vcpu);
  } else {
  apic_debug(Ignoring de-assert INIT to vcpu %d\n,
 @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, 
 int delivery_mode,
  case APIC_DM_STARTUP:
  apic_debug(SIPI to vcpu %d vector 0x%02x\n,
 vcpu-vcpu_id, vector);
 -if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 +if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED ||
 +test_bit(KVM_REQ_INIT, vcpu-requests)) {
  result = 1;
  vcpu-arch.sipi_vector = vector;
 -vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 -kvm_make_request(KVM_REQ_EVENT, vcpu);
 +kvm_make_request(KVM_REQ_SIPI, vcpu);
  kvm_vcpu_kick(vcpu);
  }
  break;
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index d0cf737..8c8843c 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
  kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap);
  }
  
 +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu)
 +{
 +if (kvm_check_request(KVM_REQ_INIT, vcpu))
 +vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 +if (kvm_check_request(KVM_REQ_SIPI, vcpu) 
 +vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 +vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 
 Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore?

Unfortunately, we cannot kill it as it was user-visible:

When a VCPU receives KVM_MP_STATE_SIPI_RECEIVED, it leaves __vcpu_run
with -EINTR and, thus, KVM_RUN. We actually return to userspace,
allowing it to see this mp_state and also migrate the guest in this state.

I could avoid this userspace exit (not sure what it is good for) but we
will have to keep the logic to accept and convert the state into
KVM_MP_STATE_RUNNABLE. So there is not much to simplify here, I'm afraid.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [PATCH v2] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests

2013-03-04 Thread Jan Kiszka
On 2013-03-04 22:41, Jan Kiszka wrote:
 From: Jan Kiszka jan.kis...@siemens.com
 
 A VCPU sending INIT or SIPI to some other VCPU races for setting the
 remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED
 was overwritten by kvm_emulate_halt and, thus, got lost.
 
 Fix this by raising requests on the sender side that will then be
 handled synchronously over the target VCPU context.
 
 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
 
 Changes in v2:
  - check transition to INIT_RECEIVED in vcpu_enter_guest
  - removed return value of kvm_check_init_and_sipi - caller has to
check for relevant transition afterward
  - add write barrier after setting sipi_vector
 
  arch/x86/kvm/lapic.c |   11 ++-
  arch/x86/kvm/x86.c   |   15 +++
  include/linux/kvm_host.h |2 ++
  3 files changed, 23 insertions(+), 5 deletions(-)
 
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 02b51dd..7986c9f 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
 delivery_mode,
   case APIC_DM_INIT:
   if (!trig_mode || level) {
   result = 1;
 - vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 - kvm_make_request(KVM_REQ_EVENT, vcpu);
 + kvm_make_request(KVM_REQ_INIT, vcpu);
   kvm_vcpu_kick(vcpu);
   } else {
   apic_debug(Ignoring de-assert INIT to vcpu %d\n,
 @@ -743,11 +742,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, 
 int delivery_mode,
   case APIC_DM_STARTUP:
   apic_debug(SIPI to vcpu %d vector 0x%02x\n,
  vcpu-vcpu_id, vector);
 - if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 + if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED ||
 + test_bit(KVM_REQ_INIT, vcpu-requests)) {
   result = 1;
   vcpu-arch.sipi_vector = vector;
 - vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 - kvm_make_request(KVM_REQ_EVENT, vcpu);
 + /* make sure sipi_vector is visible for the receiver */
 + smp_wmb();
 + kvm_make_request(KVM_REQ_SIPI, vcpu);
   kvm_vcpu_kick(vcpu);
   }
   break;
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index d0cf737..0be04b9 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -5641,6 +5641,15 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
   kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap);
  }
  
 +static void kvm_check_init_and_sipi(struct kvm_vcpu *vcpu)
 +{
 + if (kvm_check_request(KVM_REQ_INIT, vcpu))
 + vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;

And here is a small race between clearing REQ_INIT and setting
INIT_RECEIVED. It can make the LAPIC drop the SIPI incorrectly. Need to
break up test and clear, doing the clear after mp_state update. Yeah...

Jan

 + if (kvm_check_request(KVM_REQ_SIPI, vcpu) 
 + vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
 + vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 +}
 +
  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  {
   int r;
 @@ -5649,6 +5658,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
   bool req_immediate_exit = 0;
  
   if (vcpu-requests) {
 + kvm_check_init_and_sipi(vcpu);
 + if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 + r = 1;
 + goto out;
 + }
   if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
   kvm_mmu_unload(vcpu);
   if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
 @@ -6977,6 +6991,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
  
  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
  {
 + kvm_check_init_and_sipi(vcpu);
   return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE 
   !vcpu-arch.apf.halted)
   || !list_empty_careful(vcpu-async_pf.done)
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index 722cae7..1a191c9 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -124,6 +124,8 @@ static inline bool is_error_page(struct page *page)
  #define KVM_REQ_MCLOCK_INPROGRESS 20
  #define KVM_REQ_EPR_EXIT  21
  #define KVM_REQ_EOIBITMAP 22
 +#define KVM_REQ_INIT  23
 +#define KVM_REQ_SIPI  24
  
  #define KVM_USERSPACE_IRQ_SOURCE_ID  0
  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
 




signature.asc
Description: OpenPGP digital signature


Re: virtio PCI on KVM without IO BARs

2013-03-04 Thread Marcelo Tosatti
On Thu, Feb 28, 2013 at 05:24:33PM +0200, Michael S. Tsirkin wrote:
 OK we talked about this a while ago, here's
 a summary and some proposals:
 At the moment, virtio PCI uses IO BARs for all accesses.
 
 The reason for IO use is the cost of different VM exit types
 of transactions and their emulation on KVM on x86
 (it would be trivial to use memory BARs on non x86 platforms
  if they don't have PIO).
 Example benchmark (cycles per transaction):
   (io access) outw 1737
   (memory access) movw 4341
 for comparison:
   (hypercall access): vmcall 1566
   (pv memory access) movw_fast 1817 (*explanation what this is below)
 
 This creates a problem if we want to make virtio devices
 proper PCI express devices with native hotplug support.
 This is because each hotpluggable PCI express device always has
 a PCI express port (port per device),
 where each port is represented by a PCI to PCI bridge.
 In turn, a PCI to PCI bridge claims a 4Kbyte aligned
 range of IO addresses. This means that we can have at
 most 15 such devices, this is a nasty limitation.
 
 Another problem with PIO is support for physical virtio devices,
 and nested virt: KVM currently programs all PIO accesses
 to cause vm exit, so using this device in a VM will be slow.
 
 So we really want to stop using IO BARs completely if at all possible,
 but looking at the table above, switching to memory BAR and movw for
 notifications will not work well.
 
 Possible solutions:
 1. hypercall instead of PIO
   basically add a hypercall that gets an MMIO address/data
   and does an MMIO write for us.
   We'll want some capability in the device to let guest know
   this is what it should do.
   Pros: even faster than PIO
   Cons: this won't help nested or assigned devices (won't hurt
 them either as it will be conditional on the capability above).
   Cons: need host kernel support, which then has to be maintained
 forever, even if intel speeds up MMIO exits.
 
 2. pv memory access
   There are two reasons that memory access is slower:
   - one is that it's handled as an EPT misconfiguration error
   so handled by cpu slow path
   - one is that we need to decode the x86 instruction in
   software, to calculate address/data for the access.
 
   We could agree that guests would use a specific instruction
   for virtio accesses, and fast-path it specifically.
   This is the pv memory access option above.
   Pros: helps assigned devices and nested virt
   Pros: easy to drop if hardware support is there
   Cons: a bit slower than IO
   Cons: need host kernel support
 
 3. hypervisor assigned IO address
   qemu can reserve IO addresses and assign to virtio devices.
   2 bytes per device (for notification and ISR access) will be
   enough. So we can reserve 4K and this gets us 2000 devices.
 From KVM perspective, nothing changes.
   We'll want some capability in the device to let guest know
   this is what it should do, and pass the io address.
   One way to reserve the addresses is by using the bridge.
   Pros: no need for host kernel support
   Pros: regular PIO so fast
   Cons: does not help assigned devices, breaks nested virt
 
 Simply counting pros/cons, option 3 seems best. It's also the
 easiest to implement.

Agree.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 6/6] KVM: s390: Wire up ioeventfd.

2013-03-04 Thread Marcelo Tosatti
On Thu, Feb 28, 2013 at 12:33:21PM +0100, Cornelia Huck wrote:
 Enable ioeventfd support on s390 and hook up diagnose 500 virtio-ccw
 notifications.
 
 Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
 ---
  arch/s390/kvm/Kconfig|  1 +
  arch/s390/kvm/Makefile   |  2 +-
  arch/s390/kvm/diag.c | 26 ++
  arch/s390/kvm/kvm-s390.c |  1 +
  4 files changed, 29 insertions(+), 1 deletion(-)
 
 diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
 index b58dd86..3c43e30 100644
 --- a/arch/s390/kvm/Kconfig
 +++ b/arch/s390/kvm/Kconfig
 @@ -22,6 +22,7 @@ config KVM
   select PREEMPT_NOTIFIERS
   select ANON_INODES
   select HAVE_KVM_CPU_RELAX_INTERCEPT
 + select HAVE_KVM_EVENTFD
   ---help---
 Support hosting paravirtualized guest machines using the SIE
 virtualization capability on the mainframe. This should work
 diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
 index 3975722..8fe9d65 100644
 --- a/arch/s390/kvm/Makefile
 +++ b/arch/s390/kvm/Makefile
 @@ -6,7 +6,7 @@
  # it under the terms of the GNU General Public License (version 2 only)
  # as published by the Free Software Foundation.
  
 -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
 +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
  
  ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
  
 diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
 index a390687..1c01a99 100644
 --- a/arch/s390/kvm/diag.c
 +++ b/arch/s390/kvm/diag.c
 @@ -13,6 +13,7 @@
  
  #include linux/kvm.h
  #include linux/kvm_host.h
 +#include asm/virtio-ccw.h
  #include kvm-s390.h
  #include trace.h
  #include trace-s390.h
 @@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
   return -EREMOTE;
  }
  
 +static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 +{
 + int ret, idx;
 +
 + /* No virtio-ccw notification? Get out quickly. */
 + if (!vcpu-kvm-arch.css_support ||
 + (vcpu-run-s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
 + return -EOPNOTSUPP;
 +
 + idx = srcu_read_lock(vcpu-kvm-srcu);
 + /*
 +  * The layout is as follows:
 +  * - gpr 2 contains the subchannel id (passed as addr)
 +  * - gpr 3 contains the virtqueue index (passed as datamatch)
 +  */
 + ret = kvm_io_bus_write(vcpu-kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
 + vcpu-run-s.regs.gprs[2],
 + 8, vcpu-run-s.regs.gprs[3]);
 + srcu_read_unlock(vcpu-kvm-srcu, idx);
 + /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */
 + return ret  0 ? ret : 0;
 +}
 +

What about the cookie?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] arch/x86/kvm: beautify source code for __u32 irq which is never 0

2013-03-04 Thread Chen Gang
于 2013年02月27日 22:08, Gleb Natapov 写道:
 On Wed, Feb 27, 2013 at 11:33:25AM +0800, Chen Gang wrote:
  
irp-irq is __u32 which is never  0.
  
  Signed-off-by: Chen Gang gang.c...@asianux.com
 Applied, thanks.
 

  thank you, too.

  :-)

-- 
Chen Gang

Asianux Corporation
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: in-kernel interrupt controller steering

2013-03-04 Thread Scott Wood

On 03/04/2013 04:20:47 PM, Alexander Graf wrote:

Howdy,

We just sat down to discuss the proposed XICS and MPIC interfaces and  
how we can take bits of each and create an interface that works for  
everyone. In this, it feels like we came to some conclusions. Some of  
which we already reached earlier, but forgot in between :).


I hope I didn't forget too many pieces. Scott, Paul and Stuart,  
please add whatever you find missing in here.


It looks about right.

1) We need to set the generic interrupt type of the system before we  
create vcpus.


This is a new ioctl that sets the overall system interrupt controller  
type to a specific model. This used so that when we create vcpus, we  
can create the appended local interrupt controller state without  
the actual interrupt controller device available yet. It is also used  
later to switch between interrupt controller implementations.


This interrupt type is write once and frozen after the first vcpu got  
created.


Who is going to write up this patch?

2) Interrupt controllers (XICS / MPIC) get created by the device  
create api


Getting and setting state of an interrupt controller also happens  
through this. Getting and setting state from vcpus happens through  
ONE_REG. Injecting interrupt happens through the normal irqchip ioctl  
(we probably need to encode the target device id in there somehow).


This fits in nicely with a model where the interrupt controller is a  
proper QOM device in QEMU, since we can create it long after vcpus  
have been created.



3) We open code interrupt controller distinction

There is no need for function pointers. We just switch() based on the  
type that gets set in the initial ioctl to determine which code to  
call. The retrieval of the irq type happens through a static inline  
function in a header that can return a constant number for  
configurations that don't support multiple in-kernel irqchips.



4) The device attribute API has separate groups that target different  
use cases


Paul needs live migration, so he will implement device attributes  
that enable him to do live migration.
Scott doesn't implement live migration, so his MPIC attribute groups  
are solely for debugging purposes today.



5) There is no need for atomic device control accessors today.

Live migration happens with vcpus stopped, so we don't need to be  
atomic in the kernel - user space interface.



6) The device attribute API will keep read and write (get / set)  
accessors.


There is no specific need for a generic command ioctl.


Gleb, is this OK?  A bidirectional command accessor could be added  
later if a need arises.


Will attributes still be renamed to commands, even if the get/set  
approach is retained?



7) Interrupt line connections to vcpus are implicit

We don't explicitly mark which in-kernel irqchip interrupt line goes  
to which vcpu. This is done implicitly. If we see a need for it, we  
create a new irqchip device type that allows us to explicitly  
configure vcpu connections.


Are there any changes needed to the device control api patch (just  
patch 1/6, not the rest of the patchset), besides Christoffer's request  
to tone down one of the comments, and whatever the response is to the  
questions in #6?


Should we add a size field in kvm_device, both for error checking and  
to assist tools such as strace?


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


buildbot failure in qemu-kvm on default_x86_64_out_of_tree

2013-03-04 Thread qemu-kvm
The Buildbot has detected a new failure on builder default_x86_64_out_of_tree 
while building qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_out_of_tree/builds/1433

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_1

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot



buildbot failure in qemu-kvm on default_i386_debian_5_0

2013-03-04 Thread qemu-kvm
The Buildbot has detected a new failure on builder default_i386_debian_5_0 
while building qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/default_i386_debian_5_0/builds/1494

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_2

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot



buildbot failure in qemu-kvm on default_i386_out_of_tree

2013-03-04 Thread qemu-kvm
The Buildbot has detected a new failure on builder default_i386_out_of_tree 
while building qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/default_i386_out_of_tree/builds/1431

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_2

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot



Re: [PATCH v13 1/8] save/load cpu runstate

2013-03-04 Thread Hu Tao
On Mon, Mar 04, 2013 at 10:30:48AM +0100, Paolo Bonzini wrote:
 Il 28/02/2013 13:13, Hu Tao ha scritto:
  This patch enables preservation of cpu runstate during save/load vm.
  So when a vm is restored from snapshot, the cpu runstate is restored,
  too.
 
 I don't think this feature is worth breaking backwards migration
 compatibility.  It is usually handled at a higher-level (management,
 like libvirt).

If guest panic happens during migration, runstate will still be running
on destination host without this patch. But, it does be a problem to break
backwards migration compatibility.

 
 Please make this a separate patch.

Sure.

 
 Paolo
 
  See following example:
  
  # save two vms: one is running, the other is paused
  (qemu) info status
  VM status: running
  (qemu) savevm running
  (qemu) stop
  (qemu) info status
  VM status: paused
  (qemu) savevm paused
  
  # restore the one running
  (qemu) info status
  VM status: paused
  (qemu) loadvm running
  (qemu) info status
  VM status: running
  
  # restore the one paused
  (qemu) loadvm paused
  (qemu) info status
  VM status: paused
  (qemu) cont
  (qemu)info status
  VM status: running
  
  Signed-off-by: Hu Tao hu...@cn.fujitsu.com
  ---
   include/sysemu/sysemu.h |  2 ++
   migration.c |  6 +-
   monitor.c   |  5 ++---
   savevm.c|  1 +
   vl.c| 34 ++
   5 files changed, 40 insertions(+), 8 deletions(-)
  
  diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
  index b19ec95..f121213 100644
  --- a/include/sysemu/sysemu.h
  +++ b/include/sysemu/sysemu.h
  @@ -19,6 +19,8 @@ extern uint8_t qemu_uuid[];
   int qemu_uuid_parse(const char *str, uint8_t *uuid);
   #define UUID_FMT 
  %02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx
   
  +void save_run_state(void);
  +void load_run_state(void);
   bool runstate_check(RunState state);
   void runstate_set(RunState new_state);
   int runstate_is_running(void);
  diff --git a/migration.c b/migration.c
  index 11725ae..c29830e 100644
  --- a/migration.c
  +++ b/migration.c
  @@ -107,11 +107,7 @@ static void process_incoming_migration_co(void *opaque)
   /* Make sure all file formats flush their mutable metadata */
   bdrv_invalidate_cache_all();
   
  -if (autostart) {
  -vm_start();
  -} else {
  -runstate_set(RUN_STATE_PAUSED);
  -}
  +load_run_state();
   }
   
   void process_incoming_migration(QEMUFile *f)
  diff --git a/monitor.c b/monitor.c
  index 32a6e74..bf974b4 100644
  --- a/monitor.c
  +++ b/monitor.c
  @@ -2059,13 +2059,12 @@ void qmp_closefd(const char *fdname, Error **errp)
   
   static void do_loadvm(Monitor *mon, const QDict *qdict)
   {
  -int saved_vm_running  = runstate_is_running();
   const char *name = qdict_get_str(qdict, name);
   
   vm_stop(RUN_STATE_RESTORE_VM);
   
  -if (load_vmstate(name) == 0  saved_vm_running) {
  -vm_start();
  +if (load_vmstate(name) == 0) {
  +load_run_state();
   }
   }
   
  diff --git a/savevm.c b/savevm.c
  index a8a53ef..aa631eb 100644
  --- a/savevm.c
  +++ b/savevm.c
  @@ -2143,6 +2143,7 @@ void do_savevm(Monitor *mon, const QDict *qdict)
   }
   
   saved_vm_running = runstate_is_running();
  +save_run_state();
   vm_stop(RUN_STATE_SAVE_VM);
   
   memset(sn, 0, sizeof(*sn));
  diff --git a/vl.c b/vl.c
  index febd2ea..7991f2e 100644
  --- a/vl.c
  +++ b/vl.c
  @@ -523,6 +523,7 @@ static int default_driver_check(QemuOpts *opts, void 
  *opaque)
   /* QEMU state */
   
   static RunState current_run_state = RUN_STATE_PRELAUNCH;
  +static RunState saved_run_state = RUN_STATE_RUNNING;
   
   typedef struct {
   RunState from;
  @@ -546,6 +547,7 @@ static const RunStateTransition 
  runstate_transitions_def[] = {
   { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
   
   { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
  +{ RUN_STATE_POSTMIGRATE, RUN_STATE_PAUSED },
   { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
   
   { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
  @@ -556,6 +558,7 @@ static const RunStateTransition 
  runstate_transitions_def[] = {
   { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
   
   { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
  +{ RUN_STATE_RESTORE_VM, RUN_STATE_PAUSED },
   
   { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
   { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
  @@ -585,11 +588,39 @@ static const RunStateTransition 
  runstate_transitions_def[] = {
   
   static bool runstate_valid_transitions[RUN_STATE_MAX][RUN_STATE_MAX];
   
  +void save_run_state(void)
  +{
  +saved_run_state = current_run_state;
  +}
  +
  +void load_run_state(void)
  +{
  +if (saved_run_state == RUN_STATE_RUNNING) {
  +vm_start();
  +} else if (!runstate_check(saved_run_state)) {
  +

buildbot failure in qemu-kvm on disable_kvm_i386_out_of_tree

2013-03-04 Thread qemu-kvm
The Buildbot has detected a new failure on builder disable_kvm_i386_out_of_tree 
while building qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_i386_out_of_tree/builds/1431

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_2

Build Reason: The Nightly scheduler named 'nightly_disable_kvm' triggered this 
build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot



buildbot failure in qemu-kvm on disable_kvm_x86_64_out_of_tree

2013-03-04 Thread qemu-kvm
The Buildbot has detected a new failure on builder 
disable_kvm_x86_64_out_of_tree while building qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_x86_64_out_of_tree/builds/1431

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_1

Build Reason: The Nightly scheduler named 'nightly_disable_kvm' triggered this 
build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot



buildbot failure in qemu-kvm on disable_kvm_x86_64_debian_5_0

2013-03-04 Thread qemu-kvm
The Buildbot has detected a new failure on builder 
disable_kvm_x86_64_debian_5_0 while building qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_x86_64_debian_5_0/builds/1482

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_1

Build Reason: The Nightly scheduler named 'nightly_disable_kvm' triggered this 
build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot

N�r��yb�X��ǧv�^�)޺{.n�+h����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf

buildbot failure in qemu-kvm on disable_kvm_i386_debian_5_0

2013-03-04 Thread qemu-kvm
The Buildbot has detected a new failure on builder disable_kvm_i386_debian_5_0 
while building qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_i386_debian_5_0/builds/1483

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_2

Build Reason: The Nightly scheduler named 'nightly_disable_kvm' triggered this 
build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot



Re: [PATCH v13 4/8] add a new runstate: RUN_STATE_GUEST_PANICKED

2013-03-04 Thread Hu Tao
On Mon, Mar 04, 2013 at 10:40:15AM +0100, Paolo Bonzini wrote:
 Il 28/02/2013 13:13, Hu Tao ha scritto:
  The guest will be in this state when it is panicked.
  
  Signed-off-by: Wen Congyang we...@cn.fujitsu.com
  Signed-off-by: Hu Tao hu...@cn.fujitsu.com
  ---
   migration.c  |  1 +
   qapi-schema.json |  6 +-
   qmp.c|  3 ++-
   vl.c | 11 ++-
   4 files changed, 18 insertions(+), 3 deletions(-)
  
  diff --git a/migration.c b/migration.c
  index c29830e..fa17b82 100644
  --- a/migration.c
  +++ b/migration.c
  @@ -698,6 +698,7 @@ static void *buffered_file_thread(void *opaque)
   int64_t start_time, end_time;
   
   DPRINTF(done iterating\n);
  +save_run_state();
   start_time = qemu_get_clock_ms(rt_clock);
   qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
   if (old_vm_running) {
  diff --git a/qapi-schema.json b/qapi-schema.json
  index 28b070f..8f1d138 100644
  --- a/qapi-schema.json
  +++ b/qapi-schema.json
  @@ -174,11 +174,15 @@
   # @suspended: guest is suspended (ACPI S3)
   #
   # @watchdog: the watchdog action is configured to pause and has been 
  triggered
  +#
  +# @guest-panicked: the panicked action is configured to pause and has been
  +# triggered.
   ##
   { 'enum': 'RunState',
 'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
   'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
  -'running', 'save-vm', 'shutdown', 'suspended', 'watchdog' ] }
  +'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
  +'guest-panicked' ] }
   
   ##
   # @SnapshotInfo
  diff --git a/qmp.c b/qmp.c
  index 5f1bed1..f5027f6 100644
  --- a/qmp.c
  +++ b/qmp.c
  @@ -150,7 +150,8 @@ void qmp_cont(Error **errp)
   Error *local_err = NULL;
   
   if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
  -   runstate_check(RUN_STATE_SHUTDOWN)) {
  +runstate_check(RUN_STATE_SHUTDOWN) ||
  +runstate_check(RUN_STATE_GUEST_PANICKED)) {
   error_set(errp, QERR_RESET_REQUIRED);
   return;
   } else if (runstate_check(RUN_STATE_SUSPENDED)) {
  diff --git a/vl.c b/vl.c
  index 3d08e1a..51d4922 100644
  --- a/vl.c
  +++ b/vl.c
  @@ -536,6 +536,7 @@ static const RunStateTransition 
  runstate_transitions_def[] = {
   
   { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
   { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
  +{ RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
 
 Is this a consequence of the first patch?

Yes.

 
   { RUN_STATE_INTERNAL_ERROR, RUN_STATE_RUNNING },
   { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
  @@ -549,6 +550,7 @@ static const RunStateTransition 
  runstate_transitions_def[] = {
   { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
   { RUN_STATE_POSTMIGRATE, RUN_STATE_PAUSED },
   { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
  +{ RUN_STATE_POSTMIGRATE, RUN_STATE_GUEST_PANICKED },
 
 Impossible.  GUEST_PANICKED requires an instruction to be executed in
 the guest, so it should first go to RUNNING.
 
   { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
   { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE },
  @@ -559,6 +561,7 @@ static const RunStateTransition 
  runstate_transitions_def[] = {
   
   { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
   { RUN_STATE_RESTORE_VM, RUN_STATE_PAUSED },
  +{ RUN_STATE_RESTORE_VM, RUN_STATE_GUEST_PANICKED },
 
 Is it also for the first patch?

Yes.

 
   { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
   { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
  @@ -569,6 +572,7 @@ static const RunStateTransition 
  runstate_transitions_def[] = {
   { RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
   { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
   { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
  +{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
 
 This one is obviously ok.
 
   { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
   
  @@ -583,6 +587,10 @@ static const RunStateTransition 
  runstate_transitions_def[] = {
   { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
   { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
   
  +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
  +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_PAUSED },
  +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
 
 Like SHUTDOWN, it should go first to PAUSED and then to RUNNING.  A
 GUEST_PANICKED - RUNNING transition is not possible.  You're seeing it
 because you lack the addition of GUEST_PANICKED here:
 
 if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
 runstate_check(RUN_STATE_SHUTDOWN)) {
 runstate_set(RUN_STATE_PAUSED);
 }
 
 I think you should first move the INTERNAL_ERROR || SHUTDOWN checks to a
 separate function, so that you can then add GUEST_PANICKED.

Will 

 if 

[PATCH] ARM: KVM: add support for minimal host vs guest profiling

2013-03-04 Thread Marc Zyngier
In order to be able to correctly profile what is happening on the
host, we need to be able to identify when we're running on the guest,
and log these events differently.

Perf offers a simple way to register callbacks into KVM. Mimic what
x86 does and enjoy being able to profile your KVM host.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
This patch is on top of the pre-arm64 rework series posted earlier.

 arch/arm/include/asm/kvm_host.h |  3 ++
 arch/arm/kvm/Makefile   |  2 +-
 arch/arm/kvm/arm.c  |  4 +++
 arch/arm/kvm/perf.c | 68 +
 4 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/kvm/perf.c

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0c4e643..78813b8 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -207,4 +207,7 @@ static inline void __cpu_init_hyp_mode(unsigned long long 
pgd_ptr,
kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
 }
 
+int kvm_perf_init(void);
+int kvm_perf_teardown(void);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 8dc5e76..53c5ed8 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o 
coalesced_mmio.o)
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o
+obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
 obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c10a45f..e821c37 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -928,7 +928,10 @@ static int init_hyp_mode(void)
if (err)
goto out_free_mappings;
 
+   kvm_perf_init();
+
kvm_info(Hyp mode initialized successfully\n);
+
return 0;
 out_free_vfp:
free_percpu(kvm_host_vfp_state);
@@ -972,6 +975,7 @@ out_err:
 /* NOP: Compiling as a module not supported */
 void kvm_arch_exit(void)
 {
+   kvm_perf_teardown();
 }
 
 static int arm_init(void)
diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c
new file mode 100644
index 000..1a3849d
--- /dev/null
+++ b/arch/arm/kvm/perf.c
@@ -0,0 +1,68 @@
+/*
+ * Based on the x86 implementation.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include linux/perf_event.h
+#include linux/kvm_host.h
+
+#include asm/kvm_emulate.h
+
+static int kvm_is_in_guest(void)
+{
+return kvm_arm_get_running_vcpu() != NULL;
+}
+
+static int kvm_is_user_mode(void)
+{
+   struct kvm_vcpu *vcpu;
+
+   vcpu = kvm_arm_get_running_vcpu();
+
+   if (vcpu)
+   return !vcpu_mode_priv(vcpu);
+
+   return 0;
+}
+
+static unsigned long kvm_get_guest_ip(void)
+{
+   struct kvm_vcpu *vcpu;
+
+   vcpu = kvm_arm_get_running_vcpu();
+
+   if (vcpu)
+   return *vcpu_pc(vcpu);
+
+   return 0;
+}
+
+static struct perf_guest_info_callbacks kvm_guest_cbs = {
+   .is_in_guest= kvm_is_in_guest,
+   .is_user_mode   = kvm_is_user_mode,
+   .get_guest_ip   = kvm_get_guest_ip,
+};
+
+int kvm_perf_init(void)
+{
+   return perf_register_guest_info_callbacks(kvm_guest_cbs);
+}
+
+int kvm_perf_teardown(void)
+{
+   return perf_unregister_guest_info_callbacks(kvm_guest_cbs);
+}
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 28/29] ARM: KVM: change kvm_tlb_flush_vmid to kvm_tlb_flush_vmid_ipa

2013-03-04 Thread Marc Zyngier
v8 is capable of invalidating Stage-2 by IPA, but v7 is not.
Change kvm_tlb_flush_vmid() to take an IPA parameter, which is
then ignored by the invalidation code (and nuke the whole TLB
as it always did).

This allows v8 to implement a more optimized strategy.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_asm.h | 2 +-
 arch/arm/kvm/interrupts.S  | 9 ++---
 arch/arm/kvm/mmu.c | 8 
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index e4956f4..18d5032 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -75,7 +75,7 @@ extern char __kvm_hyp_code_end[];
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 8ca87ab..941aabc 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -35,15 +35,18 @@ __kvm_hyp_code_start:
 /
  * Flush per-VMID TLBs
  *
- * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+ * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
  *
  * We rely on the hardware to broadcast the TLB invalidation to all CPUs
  * inside the inner-shareable domain (which is the case for all v7
  * implementations).  If we come across a non-IS SMP implementation, we'll
  * have to use an IPI based mechanism. Until then, we stick to the simple
  * hardware assisted version.
+ *
+ * As v7 does not support flushing per IPA, just nuke the whole TLB
+ * instead, ignoring the ipa value.
  */
-ENTRY(__kvm_tlb_flush_vmid)
+ENTRY(__kvm_tlb_flush_vmid_ipa)
push{r2, r3}
 
add r0, r0, #KVM_VTTBR
@@ -60,7 +63,7 @@ ENTRY(__kvm_tlb_flush_vmid)
 
pop {r2, r3}
bx  lr
-ENDPROC(__kvm_tlb_flush_vmid)
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
 
 /
  * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b694f58..2f12e40 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -34,9 +34,9 @@ extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 
 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
 
-static void kvm_tlb_flush_vmid(struct kvm *kvm)
+static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
-   kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+   kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -457,7 +457,7 @@ static int stage2_set_pte(struct kvm *kvm, struct 
kvm_mmu_memory_cache *cache,
old_pte = *pte;
kvm_set_pte(pte, *new_pte);
if (pte_present(old_pte))
-   kvm_tlb_flush_vmid(kvm);
+   kvm_tlb_flush_vmid_ipa(kvm, addr);
else
get_page(virt_to_page(pte));
 
@@ -674,7 +674,7 @@ static void handle_hva_to_gpa(struct kvm *kvm,
 static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
unmap_stage2_range(kvm, gpa, PAGE_SIZE);
-   kvm_tlb_flush_vmid(kvm);
+   kvm_tlb_flush_vmid_ipa(kvm, gpa);
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 25/29] ARM: KVM: fix fault_ipa computing

2013-03-04 Thread Marc Zyngier
The ARM ARM says that HPFAR reports bits [39:12] of the faulting
IPA, and we need to complement it with the bottom 12 bits of the
faulting VA.

This is always 12 bits, irrespective of the page size. Makes it
clearer in the code.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/kvm/mmu.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 71d15bc..f601e6f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -614,8 +614,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
goto out_unlock;
}
 
-   /* Adjust page offset */
-   fault_ipa |= kvm_vcpu_get_hfar(vcpu)  ~PAGE_MASK;
+   /*
+* The IPA is reported as [MAX:12], so we need to
+* complement it with the bottom 12 bits from the
+* faulting VA. This is always 12 bits, irrespective
+* of the page size.
+*/
+   fault_ipa |= kvm_vcpu_get_hfar(vcpu)  ((1  12) - 1);
ret = io_mem_abort(vcpu, run, fault_ipa);
goto out_unlock;
}
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 21/29] ARM: KVM: allow HYP mappings to be at an offset from kernel mappings

2013-03-04 Thread Marc Zyngier
arm64 cannot represent the kernel VAs in HYP mode, because of the lack
of TTBR1 at EL2. A way to cope with this situation is to have HYP VAs
to be an offset from the kernel VAs.

Introduce macros to convert a kernel VA to a HYP VA, make the HYP
mapping functions use these conversion macros. Also change the
documentation to reflect the existence of the offset.

On ARM, where we can have an identity mapping between kernel and HYP,
the macros are without any effect.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_mmu.h |  8 
 arch/arm/kvm/mmu.c | 43 ++
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index ac78493..3c71a1d 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -22,6 +22,14 @@
 #include asm/cacheflush.h
 #include asm/pgalloc.h
 
+/*
+ * We directly use the kernel VA for the HYP, as we can directly share
+ * the mapping (HTTBR covers TTBR1).
+ */
+#define HYP_PAGE_OFFSET_MASK   (~0UL)
+#define HYP_PAGE_OFFSETPAGE_OFFSET
+#define KERN_TO_HYP(kva)   (kva)
+
 int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_hyp_pmds(void);
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 6b4ea18..ead6b16 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -101,14 +101,15 @@ void free_hyp_pmds(void)
 
mutex_lock(kvm_hyp_pgd_mutex);
for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) {
-   pgd = hyp_pgd + pgd_index(addr);
-   pud = pud_offset(pgd, addr);
+   unsigned long hyp_addr = KERN_TO_HYP(addr);
+   pgd = hyp_pgd + pgd_index(hyp_addr);
+   pud = pud_offset(pgd, hyp_addr);
 
if (pud_none(*pud))
continue;
BUG_ON(pud_bad(*pud));
 
-   pmd = pmd_offset(pud, addr);
+   pmd = pmd_offset(pud, hyp_addr);
free_ptes(pmd, addr);
pmd_free(NULL, pmd);
pud_clear(pud);
@@ -124,7 +125,9 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned 
long start,
struct page *page;
 
for (addr = start  PAGE_MASK; addr  end; addr += PAGE_SIZE) {
-   pte = pte_offset_kernel(pmd, addr);
+   unsigned long hyp_addr = KERN_TO_HYP(addr);
+
+   pte = pte_offset_kernel(pmd, hyp_addr);
BUG_ON(!virt_addr_valid(addr));
page = virt_to_page(addr);
kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
@@ -139,7 +142,9 @@ static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned 
long start,
unsigned long addr;
 
for (addr = start  PAGE_MASK; addr  end; addr += PAGE_SIZE) {
-   pte = pte_offset_kernel(pmd, addr);
+   unsigned long hyp_addr = KERN_TO_HYP(addr);
+
+   pte = pte_offset_kernel(pmd, hyp_addr);
BUG_ON(pfn_valid(*pfn_base));
kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
(*pfn_base)++;
@@ -154,12 +159,13 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned 
long start,
unsigned long addr, next;
 
for (addr = start; addr  end; addr = next) {
-   pmd = pmd_offset(pud, addr);
+   unsigned long hyp_addr = KERN_TO_HYP(addr);
+   pmd = pmd_offset(pud, hyp_addr);
 
BUG_ON(pmd_sect(*pmd));
 
if (pmd_none(*pmd)) {
-   pte = pte_alloc_one_kernel(NULL, addr);
+   pte = pte_alloc_one_kernel(NULL, hyp_addr);
if (!pte) {
kvm_err(Cannot allocate Hyp pte\n);
return -ENOMEM;
@@ -200,11 +206,12 @@ static int __create_hyp_mappings(void *from, void *to, 
unsigned long *pfn_base)
 
mutex_lock(kvm_hyp_pgd_mutex);
for (addr = start; addr  end; addr = next) {
-   pgd = hyp_pgd + pgd_index(addr);
-   pud = pud_offset(pgd, addr);
+   unsigned long hyp_addr = KERN_TO_HYP(addr);
+   pgd = hyp_pgd + pgd_index(hyp_addr);
+   pud = pud_offset(pgd, hyp_addr);
 
if (pud_none_or_clear_bad(pud)) {
-   pmd = pmd_alloc_one(NULL, addr);
+   pmd = pmd_alloc_one(NULL, hyp_addr);
if (!pmd) {
kvm_err(Cannot allocate Hyp pmd\n);
err = -ENOMEM;
@@ -224,12 +231,13 @@ out:
 }
 
 /**
- * create_hyp_mappings - map a kernel virtual address range in Hyp mode
+ * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:  The virtual kernel start address of the range
  * @to:The virtual kernel end address 

[PATCH 18/29] ARM: KVM: remove superfluous include from kvm_vgic.h

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_vgic.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index ab97207..343744e 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -21,7 +21,6 @@
 
 #include linux/kernel.h
 #include linux/kvm.h
-#include linux/kvm_host.h
 #include linux/irqreturn.h
 #include linux/spinlock.h
 #include linux/types.h
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/29] ARM: KVM: abstract most MMU operations

2013-03-04 Thread Marc Zyngier
Move low level MMU-related operations to kvm_mmu.h. This makes
the MMU code reusable by the arm64 port.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_mmu.h | 58 ++
 arch/arm/kvm/mmu.c | 58 +-
 2 files changed, 70 insertions(+), 46 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 421a20b..ac78493 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -19,6 +19,9 @@
 #ifndef __ARM_KVM_MMU_H__
 #define __ARM_KVM_MMU_H__
 
+#include asm/cacheflush.h
+#include asm/pgalloc.h
+
 int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_hyp_pmds(void);
@@ -36,6 +39,16 @@ phys_addr_t kvm_mmu_get_httbr(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
+{
+   pte_val(*pte) = new_pte;
+   /*
+* flush_pmd_entry just takes a void pointer and cleans the necessary
+* cache entries, so we can reuse the function for ptes.
+*/
+   flush_pmd_entry(pte);
+}
+
 static inline bool kvm_is_write_fault(unsigned long hsr)
 {
unsigned long hsr_ec = hsr  HSR_EC_SHIFT;
@@ -47,4 +60,49 @@ static inline bool kvm_is_write_fault(unsigned long hsr)
return true;
 }
 
+static inline void kvm_clean_pgd(pgd_t *pgd)
+{
+   clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
+}
+
+static inline void kvm_clean_pmd_entry(pmd_t *pmd)
+{
+   clean_pmd_entry(pmd);
+}
+
+static inline void kvm_clean_pte(pte_t *pte)
+{
+   clean_pte_table(pte);
+}
+
+static inline void kvm_set_s2pte_writable(pte_t *pte)
+{
+   pte_val(*pte) |= L_PTE_S2_RDWR;
+}
+
+struct kvm;
+
+static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+{
+   /*
+* If we are going to insert an instruction page and the icache is
+* either VIPT or PIPT, there is a potential problem where the host
+* (or another VM) may have used the same page as this guest, and we
+* read incorrect data from the icache.  If we're using a PIPT cache,
+* we can invalidate just that page, but if we are using a VIPT cache
+* we need to invalidate the entire icache - damn shame - as written
+* in the ARM ARM (DDI 0406C.b - Page B3-1393).
+*
+* VIVT caches are tagged using both the ASID and the VMID and doesn't
+* need any kind of flushing (DDI 0406C.b - Page B3-1392).
+*/
+   if (icache_is_pipt()) {
+   unsigned long hva = gfn_to_hva(kvm, gfn);
+   __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+   } else if (!icache_is_vivt_asid_tagged()) {
+   /* any kind of VIPT cache */
+   __flush_icache_all();
+   }
+}
+
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 8e9047a..6b4ea18 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -28,8 +28,6 @@
 #include asm/kvm_mmio.h
 #include asm/kvm_asm.h
 #include asm/kvm_emulate.h
-#include asm/mach/map.h
-#include trace/events/kvm.h
 
 #include trace.h
 
@@ -42,16 +40,6 @@ static void kvm_tlb_flush_vmid(struct kvm *kvm)
kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
 }
 
-static void kvm_set_pte(pte_t *pte, pte_t new_pte)
-{
-   pte_val(*pte) = new_pte;
-   /*
-* flush_pmd_entry just takes a void pointer and cleans the necessary
-* cache entries, so we can reuse the function for ptes.
-*/
-   flush_pmd_entry(pte);
-}
-
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
  int min, int max)
 {
@@ -290,7 +278,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
VM_BUG_ON((unsigned long)pgd  (S2_PGD_SIZE - 1));
 
memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
-   clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
+   kvm_clean_pgd(pgd);
kvm-arch.pgd = pgd;
 
return 0;
@@ -422,22 +410,22 @@ static int stage2_set_pte(struct kvm *kvm, struct 
kvm_mmu_memory_cache *cache,
return 0; /* ignore calls from kvm_set_spte_hva */
pmd = mmu_memory_cache_alloc(cache);
pud_populate(NULL, pud, pmd);
-   pmd += pmd_index(addr);
get_page(virt_to_page(pud));
-   } else
-   pmd = pmd_offset(pud, addr);
+   }
+
+   pmd = pmd_offset(pud, addr);
 
/* Create 2nd stage page table mapping - Level 2 */
if (pmd_none(*pmd)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
pte = mmu_memory_cache_alloc(cache);
-   clean_pte_table(pte);
+   kvm_clean_pte(pte);
pmd_populate_kernel(NULL, pmd, pte);
-   

[PATCH 09/29] ARM: KVM: abstract SAS decoding away

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h | 15 +++
 arch/arm/kvm/mmio.c| 17 +++--
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index c27d9c9..2cbb694 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -120,4 +120,19 @@ static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu 
*vcpu)
return (kvm_vcpu_get_hsr(vcpu)  7)  1;
 }
 
+static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
+{
+   switch ((kvm_vcpu_get_hsr(vcpu)  22)  0x3) {
+   case 0:
+   return 1;
+   case 1:
+   return 2;
+   case 2:
+   return 4;
+   default:
+   kvm_err(Hardware is weird: SAS 0b11 is reserved\n);
+   return -EFAULT;
+   }
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 6495c1c..cd33a7e 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -77,20 +77,9 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
return 1;
}
 
-   switch ((kvm_vcpu_get_hsr(vcpu)  22)  0x3) {
-   case 0:
-   len = 1;
-   break;
-   case 1:
-   len = 2;
-   break;
-   case 2:
-   len = 4;
-   break;
-   default:
-   kvm_err(Hardware is weird: SAS 0b11 is reserved\n);
-   return -EFAULT;
-   }
+   len = kvm_vcpu_dabt_get_as(vcpu);
+   if (len  0)
+   return len; /* Unlikely... */
 
is_write = kvm_vcpu_dabt_iswrite(vcpu);
sign_extend = kvm_vcpu_dabt_issext(vcpu);
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/29] ARM: KVM: abstract HSR_ISV away

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h | 5 +
 arch/arm/kvm/mmio.c| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 3c01988..9531535 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -90,4 +90,9 @@ static inline unsigned long kvm_vcpu_get_hyp_pc(struct 
kvm_vcpu *vcpu)
return vcpu-arch.fault.hyp_pc;
 }
 
+static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
+{
+   return !!(kvm_vcpu_get_hsr(vcpu)  HSR_ISV);
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index ce63f39..41f96e9 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -130,7 +130,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 * space do its magic.
 */
 
-   if (kvm_vcpu_get_hsr(vcpu)  HSR_ISV) {
+   if (kvm_vcpu_dabt_isvalid(vcpu)) {
ret = decode_hsr(vcpu, fault_ipa, mmio);
if (ret)
return ret;
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/29] ARM: KVM: move exit handler selection to a separate file

2013-03-04 Thread Marc Zyngier
The exit handler selection code cannot be shared with arm64
(two different modes, more exception classes...).

Move it to a separate file (handle_exit.c).

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_host.h |   3 +
 arch/arm/kvm/Makefile   |   2 +-
 arch/arm/kvm/arm.c  | 113 
 arch/arm/kvm/handle_exit.c  | 140 
 4 files changed, 144 insertions(+), 114 deletions(-)
 create mode 100644 arch/arm/kvm/handle_exit.c

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index eb836e6..24f457a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -183,4 +183,7 @@ struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+   int exception_index);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index fc96ce6..8dc5e76 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -17,7 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 obj-y += kvm-arm.o init.o interrupts.o
-obj-y += arm.o guest.o mmu.o emulate.o reset.o
+obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o psci.o
 obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
 obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 93aaba6..de783ee 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -30,7 +30,6 @@
 #define CREATE_TRACE_POINTS
 #include trace.h
 
-#include asm/unified.h
 #include asm/uaccess.h
 #include asm/ptrace.h
 #include asm/mman.h
@@ -480,118 +479,6 @@ static void update_vttbr(struct kvm *kvm)
spin_unlock(kvm_vmid_lock);
 }
 
-static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-   /* SVC called from Hyp mode should never get here */
-   kvm_debug(SVC called from Hyp mode shouldn't go here\n);
-   BUG();
-   return -EINVAL; /* Squash warning */
-}
-
-static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-   trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
- vcpu-arch.hsr  HSR_HVC_IMM_MASK);
-
-   if (kvm_psci_call(vcpu))
-   return 1;
-
-   kvm_inject_undefined(vcpu);
-   return 1;
-}
-
-static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-   if (kvm_psci_call(vcpu))
-   return 1;
-
-   kvm_inject_undefined(vcpu);
-   return 1;
-}
-
-static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-   /* The hypervisor should never cause aborts */
-   kvm_err(Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n,
-   kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
-   return -EFAULT;
-}
-
-static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-   /* This is either an error in the ws. code or an external abort */
-   kvm_err(Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n,
-   kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
-   return -EFAULT;
-}
-
-typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
-static exit_handle_fn arm_exit_handlers[] = {
-   [HSR_EC_WFI]= kvm_handle_wfi,
-   [HSR_EC_CP15_32]= kvm_handle_cp15_32,
-   [HSR_EC_CP15_64]= kvm_handle_cp15_64,
-   [HSR_EC_CP14_MR]= kvm_handle_cp14_access,
-   [HSR_EC_CP14_LS]= kvm_handle_cp14_load_store,
-   [HSR_EC_CP14_64]= kvm_handle_cp14_access,
-   [HSR_EC_CP_0_13]= kvm_handle_cp_0_13_access,
-   [HSR_EC_CP10_ID]= kvm_handle_cp10_id,
-   [HSR_EC_SVC_HYP]= handle_svc_hyp,
-   [HSR_EC_HVC]= handle_hvc,
-   [HSR_EC_SMC]= handle_smc,
-   [HSR_EC_IABT]   = kvm_handle_guest_abort,
-   [HSR_EC_IABT_HYP]   = handle_pabt_hyp,
-   [HSR_EC_DABT]   = kvm_handle_guest_abort,
-   [HSR_EC_DABT_HYP]   = handle_dabt_hyp,
-};
-
-/*
- * Return  0 to return to guest,  0 on error, 0 (and set exit_reason) on
- * proper exit to QEMU.
- */
-static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
-  int exception_index)
-{
-   unsigned long hsr_ec;
-
-   switch (exception_index) {
-   case ARM_EXCEPTION_IRQ:
-   return 1;
-   case ARM_EXCEPTION_UNDEFINED:
-   kvm_err(Undefined exception in Hyp mode at: %#08lx\n,
-   kvm_vcpu_get_hyp_pc(vcpu));
-   BUG();
-   panic(KVM: Hypervisor undefined exception!\n);
-   case ARM_EXCEPTION_DATA_ABORT:
-   case 

[PATCH 10/29] ARM: KVM: abstract IL decoding away

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h | 6 ++
 arch/arm/kvm/arm.c | 3 +--
 arch/arm/kvm/coproc.c  | 2 +-
 arch/arm/kvm/mmio.c| 2 +-
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 2cbb694..bb1a25c 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -135,4 +135,10 @@ static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu 
*vcpu)
}
 }
 
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(struct kvm_vcpu *vcpu)
+{
+   return !!((kvm_vcpu_get_hsr(vcpu)  25)  1);
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 6ba934b..e49a687 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -624,8 +624,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
 * that fail their condition code check
 */
if (!kvm_condition_valid(vcpu)) {
-   bool is_wide = kvm_vcpu_get_hsr(vcpu)  HSR_IL;
-   kvm_skip_instr(vcpu, is_wide);
+   kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
 
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index b305916..94eee8b 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -293,7 +293,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
 
if (likely(r-access(vcpu, params, r))) {
/* Skip instruction, since it was emulated */
-   kvm_skip_instr(vcpu, (kvm_vcpu_get_hsr(vcpu)  25)  
1);
+   kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
/* If access function fails, it should complain. */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index cd33a7e..9bd471a 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -101,7 +101,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
 * The MMIO instruction is emulated and should not be re-executed
 * in the guest.
 */
-   kvm_skip_instr(vcpu, (kvm_vcpu_get_hsr(vcpu)  25)  1);
+   kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 0;
 }
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 24/29] ARM: KVM: move kvm_target_cpu to guest.c

2013-03-04 Thread Marc Zyngier
guest.c already contains some target-specific checks. Let's move
kvm_target_cpu() over there so arm.c is mostly target agnostic.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/kvm/arm.c   | 17 -
 arch/arm/kvm/guest.c | 17 +
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index f053049..c10a45f 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -33,7 +33,6 @@
 #include asm/uaccess.h
 #include asm/ptrace.h
 #include asm/mman.h
-#include asm/cputype.h
 #include asm/tlbflush.h
 #include asm/cacheflush.h
 #include asm/virt.h
@@ -301,22 +300,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return 0;
 }
 
-int __attribute_const__ kvm_target_cpu(void)
-{
-   unsigned long implementor = read_cpuid_implementor();
-   unsigned long part_number = read_cpuid_part_number();
-
-   if (implementor != ARM_CPU_IMP_ARM)
-   return -EINVAL;
-
-   switch (part_number) {
-   case ARM_CPU_PART_CORTEX_A15:
-   return KVM_ARM_TARGET_CORTEX_A15;
-   default:
-   return -EINVAL;
-   }
-}
-
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
int ret;
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 2339d96..152d036 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -22,6 +22,7 @@
 #include linux/module.h
 #include linux/vmalloc.h
 #include linux/fs.h
+#include asm/cputype.h
 #include asm/uaccess.h
 #include asm/kvm.h
 #include asm/kvm_asm.h
@@ -180,6 +181,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return -EINVAL;
 }
 
+int __attribute_const__ kvm_target_cpu(void)
+{
+   unsigned long implementor = read_cpuid_implementor();
+   unsigned long part_number = read_cpuid_part_number();
+
+   if (implementor != ARM_CPU_IMP_ARM)
+   return -EINVAL;
+
+   switch (part_number) {
+   case ARM_CPU_PART_CORTEX_A15:
+   return KVM_ARM_TARGET_CORTEX_A15;
+   default:
+   return -EINVAL;
+   }
+}
+
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
 {
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 29/29] ARM: KVM: Fix length of mmio access

2013-03-04 Thread Marc Zyngier
Instead of hardcoding the maximum MMIO access to be 4 bytes,
compare it to sizeof(unsigned long), which will do the
right thing on both 32 and 64bit systems.

Same thing for sign extention.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/kvm/mmio.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 9bd471a..bd1bbcd 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -39,10 +39,10 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
if (!run-mmio.is_write) {
dest = vcpu_reg(vcpu, vcpu-arch.mmio_decode.rt);
-   memset(dest, 0, sizeof(int));
+   *dest = 0;
 
len = run-mmio.len;
-   if (len  4)
+   if (len  sizeof(unsigned long))
return -EINVAL;
 
memcpy(dest, run-mmio.data, len);
@@ -50,7 +50,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run-mmio.phys_addr,
*((u64 *)run-mmio.data));
 
-   if (vcpu-arch.mmio_decode.sign_extend  len  4) {
+   if (vcpu-arch.mmio_decode.sign_extend 
+   len  sizeof(unsigned long)) {
mask = 1U  ((len * 8) - 1);
*dest = (*dest ^ mask) - mask;
}
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 22/29] ARM: KVM: fix address validation for HYP mappings

2013-03-04 Thread Marc Zyngier
__create_hyp_mappings() performs some kind of address validation before
creating the mapping, by verifying that the start address is above
PAGE_OFFSET.

This check is not completely correct for kernel memory (the upper
boundary has to be checked as well so we do not end up with highmem
pages), and wrong for IO mappings (the mapping must exist in the vmalloc
region).

Fix this by using the proper predicates (virt_addr_valid and
is_vmalloc_addr), which also work correctly on ARM64 (where the vmalloc
region is below PAGE_OFFSET).

Also change the BUG_ON() into a less agressive error return.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/kvm/mmu.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ead6b16..ec14269 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -200,8 +200,13 @@ static int __create_hyp_mappings(void *from, void *to, 
unsigned long *pfn_base)
unsigned long addr, next;
int err = 0;
 
-   BUG_ON(start  end);
-   if (start  PAGE_OFFSET)
+   if (start = end)
+   return -EINVAL;
+   /* Check for a valid kernel memory mapping */
+   if (!pfn_base  (!virt_addr_valid(from) || !virt_addr_valid(to - 1)))
+   return -EINVAL;
+   /* Check for a valid kernel IO mapping */
+   if (pfn_base  (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)))
return -EINVAL;
 
mutex_lock(kvm_hyp_pgd_mutex);
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/29] ARM: KVM: move hyp init to kvm_host.h

2013-03-04 Thread Marc Zyngier
Make the split of the pgd_ptr an implementation specific thing
by moving the init call to an inline function.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_host.h | 19 +++
 arch/arm/kvm/arm.c  | 12 +---
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 24f457a..f00a557 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -186,4 +186,23 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const 
struct kvm_one_reg *);
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
 
+static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr,
+  unsigned long hyp_stack_ptr,
+  unsigned long vector_ptr)
+{
+   unsigned long pgd_low, pgd_high;
+
+   pgd_low = (pgd_ptr  ((1ULL  32) - 1));
+   pgd_high = (pgd_ptr  32ULL);
+
+   /*
+* Call initialization code, and switch to the full blown
+* HYP code. The init code doesn't need to preserve these registers as
+* r1-r3 and r12 are already callee save according to the AAPCS.
+* Note that we slightly misuse the prototype by casing the pgd_low to
+* a void *.
+*/
+   kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
+}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index de783ee..3c7c50a 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -813,7 +813,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 static void cpu_init_hyp_mode(void *vector)
 {
unsigned long long pgd_ptr;
-   unsigned long pgd_low, pgd_high;
unsigned long hyp_stack_ptr;
unsigned long stack_page;
unsigned long vector_ptr;
@@ -822,20 +821,11 @@ static void cpu_init_hyp_mode(void *vector)
__hyp_set_vectors((unsigned long)vector);
 
pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
-   pgd_low = (pgd_ptr  ((1ULL  32) - 1));
-   pgd_high = (pgd_ptr  32ULL);
stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
hyp_stack_ptr = stack_page + PAGE_SIZE;
vector_ptr = (unsigned long)__kvm_hyp_vector;
 
-   /*
-* Call initialization code, and switch to the full blown
-* HYP code. The init code doesn't need to preserve these registers as
-* r1-r3 and r12 are already callee save according to the AAPCS.
-* Note that we slightly misuse the prototype by casing the pgd_low to
-* a void *.
-*/
-   kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
+   __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 
 /**
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/29] ARM: KVM: abstract HSR_EC_IABT away

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h | 5 +
 arch/arm/kvm/mmu.c | 8 +++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 75e54ba..2f5cc48 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -146,6 +146,11 @@ static inline u8 kvm_vcpu_trap_get_class(struct kvm_vcpu 
*vcpu)
return kvm_vcpu_get_hsr(vcpu)  HSR_EC_SHIFT;
 }
 
+static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
+{
+   return kvm_vcpu_trap_get_class(vcpu) == HSR_EC_IABT;
+}
+
 static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
 {
return kvm_vcpu_get_hsr(vcpu)  HSR_FSC_TYPE;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index e00f28d..8e9047a 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -585,7 +585,6 @@ out_unlock:
  */
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-   unsigned long hsr_ec;
unsigned long fault_status;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
@@ -593,8 +592,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
gfn_t gfn;
int ret, idx;
 
-   hsr_ec = kvm_vcpu_trap_get_class(vcpu);
-   is_iabt = (hsr_ec == HSR_EC_IABT);
+   is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 
trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
@@ -603,8 +601,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
/* Check the stage-2 fault is trans. fault or write fault */
fault_status = kvm_vcpu_trap_get_fault(vcpu);
if (fault_status != FSC_FAULT  fault_status != FSC_PERM) {
-   kvm_err(Unsupported fault status: EC=%#lx DFCS=%#lx\n,
-   hsr_ec, fault_status);
+   kvm_err(Unsupported fault status: EC=%#x DFCS=%#lx\n,
+   kvm_vcpu_trap_get_class(vcpu), fault_status);
return -EFAULT;
}
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/29] ARM: KVM: pre-arm64 KVM/arm rework

2013-03-04 Thread Marc Zyngier
This patch series is reworking KVM/arm in order to prepare the code
to be shared with the upcoming KVM/arm64.

Nothing major here, just a lot of accessors, small cleanups and fixes
to make the code useable on arm64.

This code has been tested on VE-TC2 and arm64 models. As always,
comments are most welcome.

Marc Zyngier (29):
  ARM: KVM: convert GP registers from u32 to unsigned long
  ARM: KVM: abstract fault register accesses
  ARM: KVM: abstract HSR_ISV away
  ARM: KVM: abstract HSR_WNR away
  ARM: KVM: abstract HSR_SSE away
  ARM: KVM: abstract HSR_SRT_{MASK,SHIFT} away
  ARM: KVM: abstract external abort detection away
  ARM: KVM: abstract S1TW abort detection away
  ARM: KVM: abstract SAS decoding away
  ARM: KVM: abstract IL decoding away
  ARM: KVM: abstract exception class decoding away
  ARM: KVM: abstract fault decoding away
  ARM: KVM: abstract HSR_EC_IABT away
  ARM: KVM: move kvm_condition_valid to emulate.c
  ARM: KVM: move exit handler selection to a separate file
  ARM: KVM: move kvm_handle_wfi to handle_exit.c
  ARM: KVM: abstract most MMU operations
  ARM: KVM: remove superfluous include from kvm_vgic.h
  ARM: KVM: move hyp init to kvm_host.h
  ARM: KVM: use kvm_kernel_vfp_t as an abstract type for VFP containers
  ARM: KVM: allow HYP mappings to be at an offset from kernel mappings
  ARM: KVM: fix address validation for HYP mappings
  ARM: KVM: sanitize freeing of HYP page tables
  ARM: KVM: move kvm_target_cpu to guest.c
  ARM: KVM: fix fault_ipa computing
  ARM: KVM: vgic: decouple alignment restriction from page size
  ARM: KVM: move include of asm/idmap.h to kvm_mmu.h
  ARM: KVM: change kvm_tlb_flush_vmid to kvm_tlb_flush_vmid_ipa
  ARM: KVM: Fix length of mmio access

 arch/arm/include/asm/kvm_asm.h |   2 +-
 arch/arm/include/asm/kvm_emulate.h | 101 +--
 arch/arm/include/asm/kvm_host.h|  42 ++--
 arch/arm/include/asm/kvm_mmu.h |  67 +
 arch/arm/include/asm/kvm_vgic.h|   1 -
 arch/arm/include/uapi/asm/kvm.h|  12 +--
 arch/arm/kernel/asm-offsets.c  |   8 +-
 arch/arm/kvm/Makefile  |   2 +-
 arch/arm/kvm/arm.c | 194 +
 arch/arm/kvm/coproc.c  |  28 +++---
 arch/arm/kvm/coproc.h  |   4 +-
 arch/arm/kvm/emulate.c |  75 +-
 arch/arm/kvm/guest.c   |  17 
 arch/arm/kvm/handle_exit.c | 159 ++
 arch/arm/kvm/interrupts.S  |   9 +-
 arch/arm/kvm/mmio.c|  46 -
 arch/arm/kvm/mmu.c | 184 +--
 arch/arm/kvm/vgic.c|   2 +-
 18 files changed, 568 insertions(+), 385 deletions(-)
 create mode 100644 arch/arm/kvm/handle_exit.c

-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/29] ARM: KVM: abstract HSR_SRT_{MASK,SHIFT} away

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h | 5 +
 arch/arm/kvm/mmio.c| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 2a077bc..603f5ea 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -105,4 +105,9 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu 
*vcpu)
return !!(kvm_vcpu_get_hsr(vcpu)  HSR_SSE);
 }
 
+static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
+{
+   return (kvm_vcpu_get_hsr(vcpu)  HSR_SRT_MASK)  HSR_SRT_SHIFT;
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 058029c..586063d 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -94,7 +94,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
 
is_write = kvm_vcpu_dabt_iswrite(vcpu);
sign_extend = kvm_vcpu_dabt_issext(vcpu);
-   rt = (kvm_vcpu_get_hsr(vcpu)  HSR_SRT_MASK)  HSR_SRT_SHIFT;
+   rt = kvm_vcpu_dabt_get_rd(vcpu);
 
if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
/* IO memory trying to read/write pc */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 27/29] ARM: KVM: move include of asm/idmap.h to kvm_mmu.h

2013-03-04 Thread Marc Zyngier
Since the arm64 code doesn't have a global asm/idmap.h file, move
the inclusion to asm/kvm_mmu.h.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_mmu.h | 1 +
 arch/arm/kvm/mmu.c | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 3c71a1d..970f3b5 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -21,6 +21,7 @@
 
 #include asm/cacheflush.h
 #include asm/pgalloc.h
+#include asm/idmap.h
 
 /*
  * We directly use the kernel VA for the HYP, as we can directly share
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index f601e6f..b694f58 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -20,7 +20,6 @@
 #include linux/kvm_host.h
 #include linux/io.h
 #include trace/events/kvm.h
-#include asm/idmap.h
 #include asm/pgalloc.h
 #include asm/cacheflush.h
 #include asm/kvm_arm.h
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 23/29] ARM: KVM: sanitize freeing of HYP page tables

2013-03-04 Thread Marc Zyngier
Instead of trying to free everything from PAGE_OFFSET to the
top of memory, use the virt_addr_valid macro to check the
upper limit.

Also do the same for the vmalloc region where the IO mappings
are allocated.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/kvm/mmu.c | 44 ++--
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ec14269..71d15bc 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -86,34 +86,42 @@ static void free_ptes(pmd_t *pmd, unsigned long addr)
}
 }
 
+static void free_hyp_pgd_entry(unsigned long addr)
+{
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   unsigned long hyp_addr = KERN_TO_HYP(addr);
+
+   pgd = hyp_pgd + pgd_index(hyp_addr);
+   pud = pud_offset(pgd, hyp_addr);
+
+   if (pud_none(*pud))
+   return;
+   BUG_ON(pud_bad(*pud));
+
+   pmd = pmd_offset(pud, hyp_addr);
+   free_ptes(pmd, addr);
+   pmd_free(NULL, pmd);
+   pud_clear(pud);
+}
+
 /**
  * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
  *
  * Assumes this is a page table used strictly in Hyp-mode and therefore 
contains
- * only mappings in the kernel memory area, which is above PAGE_OFFSET.
+ * either mappings in the kernel memory area (above PAGE_OFFSET), or
+ * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
  */
 void free_hyp_pmds(void)
 {
-   pgd_t *pgd;
-   pud_t *pud;
-   pmd_t *pmd;
unsigned long addr;
 
mutex_lock(kvm_hyp_pgd_mutex);
-   for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) {
-   unsigned long hyp_addr = KERN_TO_HYP(addr);
-   pgd = hyp_pgd + pgd_index(hyp_addr);
-   pud = pud_offset(pgd, hyp_addr);
-
-   if (pud_none(*pud))
-   continue;
-   BUG_ON(pud_bad(*pud));
-
-   pmd = pmd_offset(pud, hyp_addr);
-   free_ptes(pmd, addr);
-   pmd_free(NULL, pmd);
-   pud_clear(pud);
-   }
+   for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
+   free_hyp_pgd_entry(addr);
+   for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += 
PGDIR_SIZE)
+   free_hyp_pgd_entry(addr);
mutex_unlock(kvm_hyp_pgd_mutex);
 }
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/29] ARM: KVM: move kvm_condition_valid to emulate.c

2013-03-04 Thread Marc Zyngier
This is really hardware emulation, and as such it better be with
its little friends.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h |  1 +
 arch/arm/kvm/arm.c | 45 --
 arch/arm/kvm/emulate.c | 45 ++
 3 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 2f5cc48..88c43df 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -28,6 +28,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
 unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
 
 int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+bool kvm_condition_valid(struct kvm_vcpu *vcpu);
 void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr);
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 4e83d57..93aaba6 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -44,7 +44,6 @@
 #include asm/kvm_emulate.h
 #include asm/kvm_coproc.h
 #include asm/kvm_psci.h
-#include asm/opcodes.h
 
 #ifdef REQUIRES_VIRT
 __asm__(.arch_extension   virt);
@@ -546,50 +545,6 @@ static exit_handle_fn arm_exit_handlers[] = {
 };
 
 /*
- * A conditional instruction is allowed to trap, even though it
- * wouldn't be executed.  So let's re-implement the hardware, in
- * software!
- */
-static bool kvm_condition_valid(struct kvm_vcpu *vcpu)
-{
-   unsigned long cpsr, cond, insn;
-
-   /*
-* Exception Code 0 can only happen if we set HCR.TGE to 1, to
-* catch undefined instructions, and then we won't get past
-* the arm_exit_handlers test anyway.
-*/
-   BUG_ON(!kvm_vcpu_trap_get_class(vcpu));
-
-   /* Top two bits non-zero?  Unconditional. */
-   if (kvm_vcpu_get_hsr(vcpu)  30)
-   return true;
-
-   cpsr = *vcpu_cpsr(vcpu);
-
-   /* Is condition field valid? */
-   if ((kvm_vcpu_get_hsr(vcpu)  HSR_CV)  HSR_CV_SHIFT)
-   cond = (kvm_vcpu_get_hsr(vcpu)  HSR_COND)  HSR_COND_SHIFT;
-   else {
-   /* This can happen in Thumb mode: examine IT state. */
-   unsigned long it;
-
-   it = ((cpsr  8)  0xFC) | ((cpsr  25)  0x3);
-
-   /* it == 0 = unconditional. */
-   if (it == 0)
-   return true;
-
-   /* The cond for this insn works out as the top 4 bits. */
-   cond = (it  4);
-   }
-
-   /* Shift makes it look like an ARM-mode instruction */
-   insn = cond  28;
-   return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
-}
-
-/*
  * Return  0 to return to guest,  0 on error, 0 (and set exit_reason) on
  * proper exit to QEMU.
  */
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index d3094eb..04dbac6 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -20,6 +20,7 @@
 #include linux/kvm_host.h
 #include asm/kvm_arm.h
 #include asm/kvm_emulate.h
+#include asm/opcodes.h
 #include trace/events/kvm.h
 
 #include trace.h
@@ -176,6 +177,50 @@ int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run 
*run)
return 1;
 }
 
+/*
+ * A conditional instruction is allowed to trap, even though it
+ * wouldn't be executed.  So let's re-implement the hardware, in
+ * software!
+ */
+bool kvm_condition_valid(struct kvm_vcpu *vcpu)
+{
+   unsigned long cpsr, cond, insn;
+
+   /*
+* Exception Code 0 can only happen if we set HCR.TGE to 1, to
+* catch undefined instructions, and then we won't get past
+* the arm_exit_handlers test anyway.
+*/
+   BUG_ON(!kvm_vcpu_trap_get_class(vcpu));
+
+   /* Top two bits non-zero?  Unconditional. */
+   if (kvm_vcpu_get_hsr(vcpu)  30)
+   return true;
+
+   cpsr = *vcpu_cpsr(vcpu);
+
+   /* Is condition field valid? */
+   if ((kvm_vcpu_get_hsr(vcpu)  HSR_CV)  HSR_CV_SHIFT)
+   cond = (kvm_vcpu_get_hsr(vcpu)  HSR_COND)  HSR_COND_SHIFT;
+   else {
+   /* This can happen in Thumb mode: examine IT state. */
+   unsigned long it;
+
+   it = ((cpsr  8)  0xFC) | ((cpsr  25)  0x3);
+
+   /* it == 0 = unconditional. */
+   if (it == 0)
+   return true;
+
+   /* The cond for this insn works out as the top 4 bits. */
+   cond = (it  4);
+   }
+
+   /* Shift makes it look like an ARM-mode instruction */
+   insn = cond  28;
+   return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
+}
+
 /**
  * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
  * @vcpu:  The VCPU pointer
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in

[PATCH 08/29] ARM: KVM: abstract S1TW abort detection away

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h | 5 +
 arch/arm/kvm/mmio.c| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 90e92b4..c27d9c9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -115,4 +115,9 @@ static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu 
*vcpu)
return (kvm_vcpu_get_hsr(vcpu)  8)  1;
 }
 
+static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
+{
+   return (kvm_vcpu_get_hsr(vcpu)  7)  1;
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index e4682a3..6495c1c 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -71,7 +71,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
return 1;
}
 
-   if ((kvm_vcpu_get_hsr(vcpu)  7)  1) {
+   if (kvm_vcpu_dabt_iss1tw(vcpu)) {
/* page table accesses IO mem: tell guest to fix its TTBR */
kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
return 1;
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/29] ARM: KVM: convert GP registers from u32 to unsigned long

2013-03-04 Thread Marc Zyngier
On 32bit ARM, unsigned long is guaranteed to be a 32bit quantity.
On 64bit ARM, it is a 64bit quantity.

In order to be able to share code between the two architectures,
convert the registers to be unsigned long, so the core code can
be oblivious of the change.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h | 12 ++--
 arch/arm/include/uapi/asm/kvm.h| 12 ++--
 arch/arm/kvm/coproc.c  |  4 ++--
 arch/arm/kvm/coproc.h  |  4 ++--
 arch/arm/kvm/emulate.c | 22 +++---
 arch/arm/kvm/mmio.c|  2 +-
 6 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index fd61199..510488a 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -23,8 +23,8 @@
 #include asm/kvm_asm.h
 #include asm/kvm_mmio.h
 
-u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
-u32 *vcpu_spsr(struct kvm_vcpu *vcpu);
+unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
 
 int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
 void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr);
@@ -37,14 +37,14 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
return 1;
 }
 
-static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu)
+static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
 {
-   return (u32 *)vcpu-arch.regs.usr_regs.ARM_pc;
+   return vcpu-arch.regs.usr_regs.ARM_pc;
 }
 
-static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu)
+static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu)
 {
-   return (u32 *)vcpu-arch.regs.usr_regs.ARM_cpsr;
+   return vcpu-arch.regs.usr_regs.ARM_cpsr;
 }
 
 static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 023bfeb..c1ee007 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -53,12 +53,12 @@
 #define KVM_ARM_FIQ_spsr   fiq_regs[7]
 
 struct kvm_regs {
-   struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */
-   __u32 svc_regs[3];  /* SP_svc, LR_svc, SPSR_svc */
-   __u32 abt_regs[3];  /* SP_abt, LR_abt, SPSR_abt */
-   __u32 und_regs[3];  /* SP_und, LR_und, SPSR_und */
-   __u32 irq_regs[3];  /* SP_irq, LR_irq, SPSR_irq */
-   __u32 fiq_regs[8];  /* R8_fiq - R14_fiq, SPSR_fiq */
+   struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */
+   unsigned long svc_regs[3];  /* SP_svc, LR_svc, SPSR_svc */
+   unsigned long abt_regs[3];  /* SP_abt, LR_abt, SPSR_abt */
+   unsigned long und_regs[3];  /* SP_und, LR_und, SPSR_und */
+   unsigned long irq_regs[3];  /* SP_irq, LR_irq, SPSR_irq */
+   unsigned long fiq_regs[8];  /* R8_fiq - R14_fiq, SPSR_fiq */
 };
 
 /* Supported Processor Types */
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 4ea9a98..38e76bc 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -76,7 +76,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
const struct coproc_params *p,
const struct coproc_reg *r)
 {
-   u32 val;
+   unsigned long val;
int cpu;
 
cpu = get_cpu();
@@ -298,7 +298,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
}
/* If access function fails, it should complain. */
} else {
-   kvm_err(Unsupported guest CP15 access at: %08x\n,
+   kvm_err(Unsupported guest CP15 access at: %08lx\n,
*vcpu_pc(vcpu));
print_cp_instr(params);
}
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 992adfa..b7301d3 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -84,7 +84,7 @@ static inline bool read_zero(struct kvm_vcpu *vcpu,
 static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
  const struct coproc_params *params)
 {
-   kvm_debug(CP15 write to read-only register at: %08x\n,
+   kvm_debug(CP15 write to read-only register at: %08lx\n,
  *vcpu_pc(vcpu));
print_cp_instr(params);
return false;
@@ -93,7 +93,7 @@ static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
 static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
const struct coproc_params *params)
 {
-   kvm_debug(CP15 read to write-only register at: %08x\n,
+   kvm_debug(CP15 read to write-only register at: %08lx\n,
  *vcpu_pc(vcpu));
print_cp_instr(params);
return false;
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index d61450a..d3094eb 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -109,10 +109,10 @@ static 

[PATCH 05/29] ARM: KVM: abstract HSR_SSE away

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/include/asm/kvm_emulate.h | 5 +
 arch/arm/kvm/mmio.c| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 236fadd..2a077bc 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -100,4 +100,9 @@ static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu 
*vcpu)
return !!(kvm_vcpu_get_hsr(vcpu)  HSR_WNR);
 }
 
+static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu)
+{
+   return !!(kvm_vcpu_get_hsr(vcpu)  HSR_SSE);
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 7d58cb5..058029c 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -93,7 +93,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
}
 
is_write = kvm_vcpu_dabt_iswrite(vcpu);
-   sign_extend = kvm_vcpu_get_hsr(vcpu)  HSR_SSE;
+   sign_extend = kvm_vcpu_dabt_issext(vcpu);
rt = (kvm_vcpu_get_hsr(vcpu)  HSR_SRT_MASK)  HSR_SRT_SHIFT;
 
if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 2/8] start vm after resetting it

2013-03-04 Thread Hu Tao
On Thu, Feb 28, 2013 at 02:23:42PM +0100, Jan Kiszka wrote:
 On 2013-02-28 13:13, Hu Tao wrote:
  From: Wen Congyang we...@cn.fujitsu.com
  
  The guest should run after resetting it, but it does not run if its
  old state is RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED.
  
  We don't set runstate to RUN_STATE_PAUSED when resetting the guest,
  so the runstate will be changed from RUN_STATE_INTERNAL_ERROR or
  RUN_STATE_PAUSED to RUN_STATE_RUNNING(not RUN_STATE_PAUSED).
 
 I just wonder what will happen if I interrupted the guest via gdb and
 then issue monitor system_reset, also via gdb - common pattern if you
 set a breakpoint on some BUG() or fault handler and then want to restart
 the guest. Will the guest continue then while gdb thinks it is still
 stopped? Likely, we do not differentiate between gdb-initiated stops and
 the rest. Could you clarify?

Guest won't continue unless issue gdb continue. Anyway, I'll seperate
this patch, as Paolo requested.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v13 2/8] start vm after resetting it

2013-03-04 Thread Hu Tao
On Mon, Mar 04, 2013 at 10:32:17AM +0100, Paolo Bonzini wrote:
 Il 28/02/2013 13:13, Hu Tao ha scritto:
  From: Wen Congyang we...@cn.fujitsu.com
  
  The guest should run after resetting it, but it does not run if its
  old state is RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED.
  
  We don't set runstate to RUN_STATE_PAUSED when resetting the guest,
  so the runstate will be changed from RUN_STATE_INTERNAL_ERROR or
  RUN_STATE_PAUSED to RUN_STATE_RUNNING(not RUN_STATE_PAUSED).
 
 This is also debatable.  In particular, restarting an INTERNAL_ERROR
 guest makes it harder to inspect the state at the time of the failure.
 
 INTERNAL_ERROR should never happen, let's separate this patch too.

Sure.

 
 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/29] arm64: KVM: define HYP and Stage-2 translation page flags

2013-03-04 Thread Marc Zyngier
Add HYP and S2 page flags, for both normal and device memory.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/pgtable-hwdef.h | 13 +
 arch/arm64/include/asm/pgtable.h   | 13 +
 arch/arm64/mm/mmu.c|  6 +-
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable-hwdef.h 
b/arch/arm64/include/asm/pgtable-hwdef.h
index 75fd13d..acb4ee5 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -35,6 +35,7 @@
 /*
  * Section
  */
+#define PMD_SECT_USER  (_AT(pteval_t, 1)  6) /* AP[1] */
 #define PMD_SECT_S (_AT(pmdval_t, 3)  8)
 #define PMD_SECT_AF(_AT(pmdval_t, 1)  10)
 #define PMD_SECT_NG(_AT(pmdval_t, 1)  11)
@@ -68,6 +69,18 @@
 #define PTE_ATTRINDX_MASK  (_AT(pteval_t, 7)  2)
 
 /*
+ * 2nd stage PTE definitions
+ */
+#define PTE_S2_RDONLY   (_AT(pteval_t, 1)  6)   /* HAP[1]   */
+#define PTE_S2_RDWR (_AT(pteval_t, 2)  6)   /* HAP[2:1] */
+
+/*
+ * EL2/HYP PTE/PMD definitions
+ */
+#define PMD_HYPPMD_SECT_USER
+#define PTE_HYPPTE_USER
+
+/*
  * 40-bit physical address supported.
  */
 #define PHYS_MASK_SHIFT(40)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e333a24..11c608a 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -60,6 +60,7 @@ extern void __pgd_error(const char *file, int line, unsigned 
long val);
 #define _PAGE_DEFAULT  PTE_TYPE_PAGE | PTE_AF
 
 extern pgprot_t pgprot_default;
+extern pgprot_tpgprot_device;
 
 #define __pgprot_modify(prot,mask,bits) \
__pgprot((pgprot_val(prot)  ~(mask)) | (bits))
@@ -76,6 +77,12 @@ extern pgprot_t pgprot_default;
 #define PAGE_KERNEL_MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | 
PTE_DIRTY)
 #define PAGE_KERNEL_EXEC   _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY)
 
+#define PAGE_HYP   _MOD_PROT(pgprot_default, PTE_HYP)
+#define PAGE_HYP_DEVICE_MOD_PROT(pgprot_device, PTE_HYP)
+
+#define PAGE_S2_MOD_PROT(pgprot_default, PTE_USER | 
PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE _MOD_PROT(pgprot_device, PTE_USER | PTE_S2_RDWR)
+
 #define __PAGE_NONE__pgprot(((_PAGE_DEFAULT)  ~PTE_TYPE_MASK) | 
PTE_PROT_NONE)
 #define __PAGE_SHARED  __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | 
PTE_PXN | PTE_UXN)
 #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | 
PTE_PXN)
@@ -197,6 +204,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, 
unsigned long pfn,
 
 #define pmd_bad(pmd)   (!(pmd_val(pmd)  2))
 
+#define pmd_table(pmd) ((pmd_val(pmd)  PMD_TYPE_MASK) == \
+PMD_TYPE_TABLE)
+#define pmd_sect(pmd)  ((pmd_val(pmd)  PMD_TYPE_MASK) == \
+PMD_TYPE_SECT)
+
+
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
*pmdp = pmd;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 224b44a..df03aea 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -44,6 +44,7 @@ struct page *empty_zero_page;
 EXPORT_SYMBOL(empty_zero_page);
 
 pgprot_t pgprot_default;
+pgprot_t pgprot_device;
 EXPORT_SYMBOL(pgprot_default);
 
 static pmdval_t prot_sect_kernel;
@@ -127,10 +128,11 @@ early_param(cachepolicy, early_cachepolicy);
  */
 static void __init init_mem_pgprot(void)
 {
-   pteval_t default_pgprot;
+   pteval_t default_pgprot, device_pgprot;
int i;
 
default_pgprot = PTE_ATTRINDX(MT_NORMAL);
+   device_pgprot = PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN;
prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | 
PMD_ATTRINDX(MT_NORMAL);
 
 #ifdef CONFIG_SMP
@@ -138,6 +140,7 @@ static void __init init_mem_pgprot(void)
 * Mark memory with the shared attribute for SMP systems
 */
default_pgprot |= PTE_SHARED;
+   device_pgprot |= PTE_SHARED;
prot_sect_kernel |= PMD_SECT_S;
 #endif
 
@@ -147,6 +150,7 @@ static void __init init_mem_pgprot(void)
}
 
pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot);
+   pgprot_device = __pgprot(PTE_TYPE_PAGE | PTE_AF | device_pgprot);
 }
 
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/29] Port of KVM to arm64

2013-03-04 Thread Marc Zyngier
This series contains the implementation of KVM for arm64. It depends
on the pre-arm64 rework series I posted earlier, as well as on the
tiny perf patch sent just after.

The code is unsurprisingly extremely similar to the KVM/arm code, and
a lot of it is actually shared with the 32bit version. Some of the
include files are duplicated though (I'm definitely willing to fix
that).

In terms of features:
- Support for 4k and 64k pages
- Support for 32bit and 64bit guests
- PSCI support for SMP booting

As we do not have a 64bit QEMU port, it has been tested using kvmtool
(support has already been merged).

Marc Zyngier (29):
  arm64: KVM: define HYP and Stage-2 translation page flags
  arm64: KVM: HYP mode idmap support
  arm64: KVM: EL2 register definitions
  arm64: KVM: system register definitions for 64bit guests
  arm64: KVM: Basic ESR_EL2 helpers and vcpu register access
  arm64: KVM: fault injection into a guest
  arm64: KVM: architecture specific MMU backend
  arm64: KVM: user space interface
  arm64: KVM: system register handling
  arm64: KVM: Cortex-A57 specific system registers handling
  arm64: KVM: virtual CPU reset
  arm64: KVM: kvm_arch and kvm_vcpu_arch definitions
  arm64: KVM: MMIO access backend
  arm64: KVM: guest one-reg interface
  arm64: KVM: hypervisor initialization code
  arm64: KVM: HYP mode world switch implementation
  arm64: KVM: Exit handling
  arm64: KVM: Plug the VGIC
  arm64: KVM: Plug the arch timer
  arm64: KVM: PSCI implementation
  arm64: KVM: Build system integration
  arm64: KVM: define 32bit specific registers
  arm64: KVM: 32bit GP register access
  arm64: KVM: 32bit conditional execution emulation
  arm64: KVM: 32bit handling of coprocessor traps
  arm64: KVM: 32bit coprocessor access for Cortex-A57
  arm64: KVM: 32bit specific register world switch
  arm64: KVM: 32bit guest fault injection
  arm64: KVM: enable initialization of a 32bit vcpu

 arch/arm/kvm/arch_timer.c   |1 +
 arch/arm64/Kconfig  |2 +
 arch/arm64/Makefile |2 +-
 arch/arm64/include/asm/kvm_arch_timer.h |   58 ++
 arch/arm64/include/asm/kvm_arm.h|  243 +++
 arch/arm64/include/asm/kvm_asm.h|  104 +++
 arch/arm64/include/asm/kvm_coproc.h |   56 ++
 arch/arm64/include/asm/kvm_emulate.h|  181 +
 arch/arm64/include/asm/kvm_host.h   |  192 ++
 arch/arm64/include/asm/kvm_mmio.h   |   59 ++
 arch/arm64/include/asm/kvm_mmu.h|  126 
 arch/arm64/include/asm/kvm_psci.h   |   23 +
 arch/arm64/include/asm/kvm_vgic.h   |  156 +
 arch/arm64/include/asm/pgtable-hwdef.h  |   13 +
 arch/arm64/include/asm/pgtable.h|   13 +
 arch/arm64/include/uapi/asm/kvm.h   |  190 ++
 arch/arm64/kernel/asm-offsets.c |   33 +
 arch/arm64/kernel/vmlinux.lds.S |   10 +
 arch/arm64/kvm/Kconfig  |   59 ++
 arch/arm64/kvm/Makefile |   18 +
 arch/arm64/kvm/emulate.c|  154 +
 arch/arm64/kvm/guest.c  |  246 +++
 arch/arm64/kvm/handle_exit.c|  124 
 arch/arm64/kvm/hyp-init.S   |   89 +++
 arch/arm64/kvm/hyp.S|  826 +++
 arch/arm64/kvm/idmap.c  |  141 
 arch/arm64/kvm/idmap.h  |8 +
 arch/arm64/kvm/inject_fault.c   |  194 ++
 arch/arm64/kvm/regmap.c |  168 +
 arch/arm64/kvm/reset.c  |   83 +++
 arch/arm64/kvm/sys_regs.c   | 1113 +++
 arch/arm64/kvm/sys_regs.h   |  141 
 arch/arm64/kvm/sys_regs_a57.c   |  118 
 arch/arm64/mm/mmu.c |6 +-
 include/uapi/linux/kvm.h|1 +
 35 files changed, 4949 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_arch_timer.h
 create mode 100644 arch/arm64/include/asm/kvm_arm.h
 create mode 100644 arch/arm64/include/asm/kvm_asm.h
 create mode 100644 arch/arm64/include/asm/kvm_coproc.h
 create mode 100644 arch/arm64/include/asm/kvm_emulate.h
 create mode 100644 arch/arm64/include/asm/kvm_host.h
 create mode 100644 arch/arm64/include/asm/kvm_mmio.h
 create mode 100644 arch/arm64/include/asm/kvm_mmu.h
 create mode 100644 arch/arm64/include/asm/kvm_psci.h
 create mode 100644 arch/arm64/include/asm/kvm_vgic.h
 create mode 100644 arch/arm64/include/uapi/asm/kvm.h
 create mode 100644 arch/arm64/kvm/Kconfig
 create mode 100644 arch/arm64/kvm/Makefile
 create mode 100644 arch/arm64/kvm/emulate.c
 create mode 100644 arch/arm64/kvm/guest.c
 create mode 100644 arch/arm64/kvm/handle_exit.c
 create mode 100644 arch/arm64/kvm/hyp-init.S
 create mode 100644 arch/arm64/kvm/hyp.S
 create mode 100644 arch/arm64/kvm/idmap.c
 create mode 100644 arch/arm64/kvm/idmap.h
 create mode 100644 arch/arm64/kvm/inject_fault.c
 create mode 100644 arch/arm64/kvm/regmap.c
 create mode 100644 arch/arm64/kvm/reset.c
 create mode 100644 

[PATCH 02/29] arm64: KVM: HYP mode idmap support

2013-03-04 Thread Marc Zyngier
Add the necessary infrastructure for identity-mapped HYP page
tables. Idmap-ed code must be in the .hyp.idmap.text linker
section.

The rest of the HYP ends up in .hyp.text.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/kernel/vmlinux.lds.S |  10 +++
 arch/arm64/kvm/idmap.c  | 141 
 arch/arm64/kvm/idmap.h  |   8 +++
 3 files changed, 159 insertions(+)
 create mode 100644 arch/arm64/kvm/idmap.c
 create mode 100644 arch/arm64/kvm/idmap.h

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 3fae2be..51b87c3 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -17,6 +17,15 @@ ENTRY(stext)
 
 jiffies = jiffies_64;
 
+#define HYPERVISOR_TEXT\
+   ALIGN_FUNCTION();   \
+   VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
+   *(.hyp.idmap.text)  \
+   VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;   \
+   VMLINUX_SYMBOL(__hyp_text_start) = .;   \
+   *(.hyp.text)\
+   VMLINUX_SYMBOL(__hyp_text_end) = .;
+
 SECTIONS
 {
/*
@@ -49,6 +58,7 @@ SECTIONS
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
+   HYPERVISOR_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
diff --git a/arch/arm64/kvm/idmap.c b/arch/arm64/kvm/idmap.c
new file mode 100644
index 000..68a55d4
--- /dev/null
+++ b/arch/arm64/kvm/idmap.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include linux/module.h
+#include linux/kernel.h
+#include linux/slab.h
+
+#include asm/cputype.h
+#include asm/pgalloc.h
+#include asm/pgtable.h
+#include asm/sections.h
+#include asm/virt.h
+
+#include idmap.h
+
+pgd_t *hyp_pgd;
+
+/*
+ * We always use a 2-level mapping for hyp-idmap:
+ * - Section mapped for 4kB pages
+ * - Page mapped for 64kB pages
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+static void idmap_add_pte(pmd_t *pmd, unsigned long addr, unsigned long end)
+{
+   struct page *page;
+   pte_t *pte;
+   unsigned long next;
+
+   if (pmd_none(*pmd)) {
+   pte = pte_alloc_one_kernel(NULL, addr);
+   if (!pte) {
+   pr_warning(Failed to allocate identity pte.\n);
+   return;
+   }
+   pmd_populate_kernel(NULL, pmd, pte);
+   }
+
+   pte = pte_offset_kernel(pmd, addr);
+
+   do {
+   page = phys_to_page(addr);
+   next = (addr  PAGE_MASK) + PAGE_SIZE;
+   set_pte(pte, mk_pte(page, PAGE_HYP));
+   } while (pte++, addr = next, addr  end);
+}
+#else
+#define HYP_SECT_PROT  (PMD_TYPE_SECT | PMD_SECT_AF | \
+PMD_ATTRINDX(MT_NORMAL) | PMD_HYP)
+
+/*
+ * For 4kB pages, we use a section to perform the identity mapping,
+ * hence the direct call to __pmd_populate().
+ */
+static void idmap_add_pte(pmd_t *pmd, unsigned long addr, unsigned long end)
+{
+   __pmd_populate(pmd, addr  PMD_MASK, HYP_SECT_PROT);
+}
+#endif
+
+static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end)
+{
+   pmd_t *pmd;
+   unsigned long next;
+
+   if (pud_none_or_clear_bad(pud)) {
+   pmd = pmd_alloc_one(NULL, addr);
+   if (!pmd) {
+   pr_warning(Failed to allocate identity pmd.\n);
+   return;
+   }
+   pud_populate(NULL, pud, pmd);
+   }
+
+   pmd = pmd_offset(pud, addr);
+
+   do {
+   next = pmd_addr_end(addr, end);
+   idmap_add_pte(pmd, addr, next);
+   } while (pmd++, addr = next, addr != end);
+}
+
+static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+   pud_t *pud = pud_offset(pgd, addr);
+   unsigned long next;
+
+   do {
+   next = pud_addr_end(addr, end);
+   idmap_add_pmd(pud, addr, next);
+   } while (pud++, addr = next, addr != end);
+}
+
+extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+
+static int __init hyp_idmap_setup(void)
+{
+   unsigned long addr, end;
+   

[PATCH 03/29] arm64: KVM: EL2 register definitions

2013-03-04 Thread Marc Zyngier
Define all the useful bitfields for EL2 registers.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_arm.h | 243 +++
 1 file changed, 243 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_arm.h

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
new file mode 100644
index 000..6561507
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM64_KVM_ARM_H__
+#define __ARM64_KVM_ARM_H__
+
+#include asm/types.h
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_ID (1  33)
+#define HCR_CD (1  32)
+#define HCR_RW_SHIFT   31
+#define HCR_RW (1  HCR_RW_SHIFT)
+#define HCR_TRVM   (1  30)
+#define HCR_HCD(1  29)
+#define HCR_TDZ(1  28)
+#define HCR_TGE(1  27)
+#define HCR_TVM(1  26)
+#define HCR_TTLB   (1  25)
+#define HCR_TPU(1  24)
+#define HCR_TPC(1  23)
+#define HCR_TSW(1  22)
+#define HCR_TAC(1  21)
+#define HCR_TIDCP  (1  20)
+#define HCR_TSC(1  19)
+#define HCR_TID3   (1  18)
+#define HCR_TID2   (1  17)
+#define HCR_TID1   (1  16)
+#define HCR_TID0   (1  15)
+#define HCR_TWE(1  14)
+#define HCR_TWI(1  13)
+#define HCR_DC (1  12)
+#define HCR_BSU(3  10)
+#define HCR_BSU_IS (1  10)
+#define HCR_FB (1  9)
+#define HCR_VA (1  8)
+#define HCR_VI (1  7)
+#define HCR_VF (1  6)
+#define HCR_AMO(1  5)
+#define HCR_IMO(1  4)
+#define HCR_FMO(1  3)
+#define HCR_PTW(1  2)
+#define HCR_SWIO   (1  1)
+#define HCR_VM (1)
+
+/*
+ * The bits we set in HCR:
+ * RW: 64bit by default, can be overriden for 32bit VMs
+ * TAC:Trap ACTLR
+ * TSC:Trap SMC
+ * TSW:Trap cache operations by set/way
+ * TWI:Trap WFI
+ * TIDCP:  Trap L2CTLR/L2ECTLR
+ * BSU_IS: Upgrade barriers to the inner shareable domain
+ * FB: Force broadcast of all maintainance operations
+ * AMO:Override CPSR.A and enable signaling with VA
+ * IMO:Override CPSR.I and enable signaling with VI
+ * FMO:Override CPSR.F and enable signaling with VF
+ * SWIO:   Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
+HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+HCR_SWIO | HCR_TIDCP | HCR_RW)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+
+/* Hyp System Control Register (SCTLR_EL2) bits */
+#define SCTLR_EL2_EE   (1  25)
+#define SCTLR_EL2_WXN  (1  19)
+#define SCTLR_EL2_I(1  12)
+#define SCTLR_EL2_SA   (1  3)
+#define SCTLR_EL2_C(1  2)
+#define SCTLR_EL2_A(1  1)
+#define SCTLR_EL2_M1
+#define SCTLR_EL2_FLAGS(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |  
\
+SCTLR_EL2_SA | SCTLR_EL2_I)
+
+/* TCR_EL2 Registers bits */
+#define TCR_EL2_TBI(1  20)
+#define TCR_EL2_PS (7  16)
+#define TCR_EL2_PS_40B (2  16)
+#define TCR_EL2_TG0(1  14)
+#define TCR_EL2_SH0(3  12)
+#define TCR_EL2_ORGN0  (3  10)
+#define TCR_EL2_IRGN0  (3  8)
+#define TCR_EL2_T0SZ   0x3f
+#define TCR_EL2_MASK   (TCR_EL2_TG0 | TCR_EL2_SH0 | \
+TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+
+#define TCR_EL2_FLAGS  (TCR_EL2_PS_40B)
+
+/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_PS_MASK   (7  16)
+#define VTCR_EL2_PS_40B(2  16)
+#define VTCR_EL2_TG0_MASK  (1  14)
+#define VTCR_EL2_TG0_4K(0  14)
+#define VTCR_EL2_TG0_64K   (1  14)
+#define VTCR_EL2_SH0_MASK  (3  12)
+#define VTCR_EL2_SH0_INNER (3  12)
+#define VTCR_EL2_ORGN0_MASK(3  10)
+#define VTCR_EL2_ORGN0_WBWA(3  10)
+#define VTCR_EL2_IRGN0_MASK(3  8)
+#define VTCR_EL2_IRGN0_WBWA(3  8)
+#define VTCR_EL2_SL0_MASK  (3  6)
+#define VTCR_EL2_SL0_LVL1  (1  6)
+#define VTCR_EL2_T0SZ_MASK 0x3f
+#define VTCR_EL2_T0SZ_40B  24
+
+#ifdef 

[PATCH 05/29] arm64: KVM: Basic ESR_EL2 helpers and vcpu register access

2013-03-04 Thread Marc Zyngier
Implements helpers for dealing with the EL2 syndrome register as
well as accessing the vcpu registers.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_emulate.h | 159 +++
 1 file changed, 159 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_emulate.h

diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
new file mode 100644
index 000..16a343b
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * Derived from arch/arm/include/kvm_emulate.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM64_KVM_EMULATE_H__
+#define __ARM64_KVM_EMULATE_H__
+
+#include linux/kvm_host.h
+#include asm/kvm_asm.h
+#include asm/kvm_arm.h
+#include asm/kvm_mmio.h
+#include asm/ptrace.h
+
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
+static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
+{
+   return (unsigned long *)vcpu-arch.regs.regs.pc;
+}
+
+static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu)
+{
+   return (unsigned long *)vcpu-arch.regs.regs.pstate;
+}
+
+static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
+{
+   return false;   /* 32bit? Bahhh... */
+}
+
+static inline bool kvm_condition_valid(struct kvm_vcpu *vcpu)
+{
+   return true;/* No conditionals on arm64 */
+}
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+   *vcpu_pc(vcpu) += 4;
+}
+
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
+{
+   return (unsigned long *)vcpu-arch.regs.regs.regs[reg_num];
+
+}
+
+/* Get vcpu SPSR for current mode */
+static inline unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
+{
+   return vcpu-arch.regs.spsr[KVM_SPSR_EL1];
+}
+
+static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
+{
+   return false;
+}
+
+static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
+{
+   u32 mode = *vcpu_cpsr(vcpu)  PSR_MODE_MASK;
+
+   return mode != PSR_MODE_EL0t;
+}
+
+static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
+{
+   return vcpu-arch.fault.esr_el2;
+}
+
+static inline unsigned long kvm_vcpu_get_hfar(struct kvm_vcpu *vcpu)
+{
+   return vcpu-arch.fault.far_el2;
+}
+
+static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu)
+{
+   return ((phys_addr_t)vcpu-arch.fault.hpfar_el2  HPFAR_MASK)  8;
+}
+
+static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
+{
+   return !!(kvm_vcpu_get_hsr(vcpu)  ESR_EL2_ISV);
+}
+
+static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu)
+{
+   return !!(kvm_vcpu_get_hsr(vcpu)  ESR_EL2_WNR);
+}
+
+static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu)
+{
+   return !!(kvm_vcpu_get_hsr(vcpu)  ESR_EL2_SSE);
+}
+
+static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
+{
+   return (kvm_vcpu_get_hsr(vcpu)  ESR_EL2_SRT_MASK)  ESR_EL2_SRT_SHIFT;
+}
+
+static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
+{
+   return !!(kvm_vcpu_get_hsr(vcpu)  ESR_EL2_EA);
+}
+
+static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
+{
+   return !!(kvm_vcpu_get_hsr(vcpu)  ESR_EL2_S1PTW);
+}
+
+static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
+{
+   return 1  ((kvm_vcpu_get_hsr(vcpu)  ESR_EL2_SAS)  
ESR_EL2_SAS_SHIFT);
+}
+
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(struct kvm_vcpu *vcpu)
+{
+   return !!(kvm_vcpu_get_hsr(vcpu)  ESR_EL2_IL);
+}
+
+static inline u8 kvm_vcpu_trap_get_class(struct kvm_vcpu *vcpu)
+{
+   return kvm_vcpu_get_hsr(vcpu)  ESR_EL2_EC_SHIFT;
+}
+
+static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
+{
+   return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
+{
+   return kvm_vcpu_get_hsr(vcpu)  ESR_EL2_FSC_TYPE;
+}
+
+#endif /* __ARM64_KVM_EMULATE_H__ */
-- 
1.7.12.4

--
To unsubscribe 

[PATCH 06/29] arm64: KVM: fault injection into a guest

2013-03-04 Thread Marc Zyngier
Implement the injection of a fault (undefined, data abort or
prefetch abort) into a 64bit guest.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/kvm/inject_fault.c | 117 ++
 1 file changed, 117 insertions(+)
 create mode 100644 arch/arm64/kvm/inject_fault.c

diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
new file mode 100644
index 000..80b245f
--- /dev/null
+++ b/arch/arm64/kvm/inject_fault.c
@@ -0,0 +1,117 @@
+/*
+ * Fault injection for 64bit guests.
+ *
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include linux/kvm_host.h
+#include asm/kvm_emulate.h
+
+static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long 
addr)
+{
+   unsigned long cpsr = *vcpu_cpsr(vcpu);
+   int is_aarch32;
+   u32 esr = 0;
+
+   is_aarch32 = vcpu_mode_is_32bit(vcpu);
+
+   *vcpu_spsr(vcpu) = cpsr;
+   vcpu-arch.regs.elr_el1 = *vcpu_pc(vcpu);
+
+   *vcpu_cpsr(vcpu) = PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT;
+   *vcpu_pc(vcpu) = vcpu-arch.sys_regs[VBAR_EL1] + 0x200;
+
+   vcpu-arch.sys_regs[FAR_EL1] = addr;
+
+   /*
+* Build an {i,d}abort, depending on the level and the
+* instruction set. Report an external synchronous abort.
+*/
+   if (kvm_vcpu_trap_il_is32bit(vcpu))
+   esr |= (1  25);
+
+   if (is_aarch32 || (cpsr  PSR_MODE_MASK) == PSR_MODE_EL0t)
+   esr |= (0x20  26);
+   else
+   esr |= (0x21  26);
+
+   if (!is_iabt)
+   esr |= (1  28);
+
+   vcpu-arch.sys_regs[ESR_EL1] = esr | 0x10;
+}
+
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+   unsigned long cpsr = *vcpu_cpsr(vcpu);
+   u32 esr = 0;
+
+   *vcpu_spsr(vcpu) = cpsr;
+   vcpu-arch.regs.elr_el1 = *vcpu_pc(vcpu);
+
+   *vcpu_cpsr(vcpu) = PSR_MODE_EL1h | PSR_F_BIT | PSR_I_BIT;
+   *vcpu_pc(vcpu) = vcpu-arch.sys_regs[VBAR_EL1] + 0x200;
+
+   /*
+* Build an unknown exception, depending on the instruction
+* set.
+*/
+   if (kvm_vcpu_trap_il_is32bit(vcpu))
+   esr |= (1  25);
+
+   vcpu-arch.sys_regs[ESR_EL1] = esr;
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+   inject_abt64(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+   inject_abt64(vcpu, true, addr);
+}
+
+/**
+ * kvm_inject_undefined - inject a undefined instruction into the guest
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+   inject_undef64(vcpu);
+}
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/29] arm64: KVM: system register handling

2013-03-04 Thread Marc Zyngier
Provide 64bit system register handling, modeled after the cp15
handling for ARM.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_coproc.h |  51 ++
 arch/arm64/include/uapi/asm/kvm.h   |  56 +++
 arch/arm64/kvm/sys_regs.c   | 962 
 arch/arm64/kvm/sys_regs.h   | 141 ++
 include/uapi/linux/kvm.h|   1 +
 5 files changed, 1211 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_coproc.h
 create mode 100644 arch/arm64/kvm/sys_regs.c
 create mode 100644 arch/arm64/kvm/sys_regs.h

diff --git a/arch/arm64/include/asm/kvm_coproc.h 
b/arch/arm64/include/asm/kvm_coproc.h
new file mode 100644
index 000..e791894
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * Derived from arch/arm/include/asm/kvm_coproc.h
+ * Copyright (C) 2012 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM64_KVM_COPROC_H__
+#define __ARM64_KVM_COPROC_H__
+
+#include linux/kvm_host.h
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+struct kvm_sys_reg_table {
+   const struct sys_reg_desc *table;
+   size_t num;
+};
+
+struct kvm_sys_reg_target_table {
+   unsigned target;
+   struct kvm_sys_reg_table table64;
+};
+
+void kvm_register_target_sys_reg_table(struct kvm_sys_reg_target_table *table);
+
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#define kvm_coproc_table_init kvm_sys_reg_table_init
+void kvm_sys_reg_table_init(void);
+
+struct kvm_one_reg;
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index f5525f1..fffeb11 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -87,6 +87,62 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK0x0FFF
+#define KVM_REG_ARM_COPROC_SHIFT   16
+#define KVM_REG_ARM_32_OPC2_MASK   0x0007
+#define KVM_REG_ARM_32_OPC2_SHIFT  0
+#define KVM_REG_ARM_OPC1_MASK  0x0078
+#define KVM_REG_ARM_OPC1_SHIFT 3
+#define KVM_REG_ARM_CRM_MASK   0x0780
+#define KVM_REG_ARM_CRM_SHIFT  7
+#define KVM_REG_ARM_32_CRN_MASK0x7800
+#define KVM_REG_ARM_32_CRN_SHIFT   11
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE   (0x0010  KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 
sizeof(unsigned long))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX  (0x0011  KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK  0xFF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT 8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR(0x00  KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT0
+
+/* VFP registers: we could overload CP10 like ARM does, but that's ugly. */
+#define KVM_REG_ARM_VFP(0x0012  
KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_VFP_MASK   0x
+#define KVM_REG_ARM_VFP_BASE_REG   0x0
+#define KVM_REG_ARM_VFP_FPSID  0x1000
+#define KVM_REG_ARM_VFP_FPSCR  0x1001
+#define KVM_REG_ARM_VFP_MVFR1  0x1006
+#define KVM_REG_ARM_VFP_MVFR0  0x1007
+#define KVM_REG_ARM_VFP_FPEXC  0x1008
+#define KVM_REG_ARM_VFP_FPINST 0x1009
+#define KVM_REG_ARM_VFP_FPINST20x100A
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG   (0x0013  KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK  0xc000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT 14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK  0x3800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT 11
+#define 

[PATCH 08/29] arm64: KVM: user space interface

2013-03-04 Thread Marc Zyngier
Provide the kvm.h file that defines the user space visible
interface.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/uapi/asm/kvm.h | 112 ++
 1 file changed, 112 insertions(+)
 create mode 100644 arch/arm64/include/uapi/asm/kvm.h

diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 000..f5525f1
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1   0
+#define KVM_NR_SPSR1
+
+#ifndef __ASSEMBLY__
+#include asm/types.h
+#include asm/ptrace.h
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+
+#define KVM_REG_SIZE(id)   \
+   (1U  (((id)  KVM_REG_SIZE_MASK)  KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+   struct user_pt_regs regs;   /* sp = sp_el0 */
+
+   unsigned long   sp_el1;
+   unsigned long   elr_el1;
+
+   unsigned long   spsr[KVM_NR_SPSR];
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_CORTEX_A57  0
+#define KVM_ARM_NUM_TARGETS1
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT  0
+#define KVM_ARM_DEVICE_TYPE_MASK   (0x  KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT16
+#define KVM_ARM_DEVICE_ID_MASK (0x  KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2 0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST 0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU  1
+
+#define KVM_VGIC_V2_DIST_SIZE  0x1000
+#define KVM_VGIC_V2_CPU_SIZE   0x2000
+
+struct kvm_vcpu_init {
+   __u32 target;
+   __u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT 24
+#define KVM_ARM_IRQ_TYPE_MASK  0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT 16
+#define KVM_ARM_IRQ_VCPU_MASK  0xff
+#define KVM_ARM_IRQ_NUM_SHIFT  0
+#define KVM_ARM_IRQ_NUM_MASK   0x
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU   0
+#define KVM_ARM_IRQ_TYPE_SPI   1
+#define KVM_ARM_IRQ_TYPE_PPI   2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ0
+#define KVM_ARM_IRQ_CPU_FIQ1
+
+/* Highest supported SPI, from VGIC_NR_IRQS */
+#define KVM_ARM_IRQ_GIC_MAX127
+
+#endif
+
+#endif /* __ARM_KVM_H__ */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 22/29] arm64: KVM: define 32bit specific registers

2013-03-04 Thread Marc Zyngier
Define the 32bit specific registers (SPSRs, cp15...).

Most CPU registers are directly mapped to a 64bit register
(r0-x0...). Only the SPSRs have separate registers.

cp15 registers are also mapped into their 64bit counterpart in most
cases.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_asm.h  | 38 +-
 arch/arm64/include/asm/kvm_host.h |  5 -
 arch/arm64/include/uapi/asm/kvm.h |  7 ++-
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 851fee5..3f4e6e1 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,7 +42,43 @@
 #defineTPIDR_EL1   18  /* Thread ID, Privileged */
 #defineAMAIR_EL1   19  /* Aux Memory Attribute Indirection 
Register */
 #defineCNTKCTL_EL1 20  /* Timer Control Register (EL1) */
-#defineNR_SYS_REGS 21
+/* 32bit specific registers. Keep them at the end of the range */
+#defineDACR32_EL2  21  /* Domain Access Control Register */
+#defineIFSR32_EL2  22  /* Instruction Fault Status Register */
+#defineFPEXC32_EL2 23  /* Floating-Point Exception Control 
Register */
+#defineDBGVCR32_EL224  /* Debug Vector Catch Register */
+#defineTEECR32_EL1 25  /* ThumbEE Configuration Register */
+#defineTEEHBR32_EL126  /* ThumbEE Handler Base Register */
+#defineNR_SYS_REGS 27
+
+/* 32bit mapping */
+#define c0_MPIDR   (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
+#define c0_CSSELR  (CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR   (SCTLR_EL1 * 2) /* System Control Register */
+#define c1_ACTLR   (ACTLR_EL1 * 2) /* Auxilliary Control Register */
+#define c1_CPACR   (CPACR_EL1 * 2) /* Coprocessor Access Control */
+#define c2_TTBR0   (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
+#define c2_TTBR0_high  (c2_TTBR0 + 1)  /* TTBR0 top 32 bits */
+#define c2_TTBR1   (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
+#define c2_TTBR1_high  (c2_TTBR1 + 1)  /* TTBR1 top 32 bits */
+#define c2_TTBCR   (TCR_EL1 * 2)   /* Translation Table Base Control R. */
+#define c3_DACR(DACR32_EL2 * 2)/* Domain Access Control 
Register */
+#define c5_DFSR(ESR_EL1 * 2)   /* Data Fault Status Register */
+#define c5_IFSR(IFSR32_EL2 * 2)/* Instruction Fault Status 
Register */
+#define c5_ADFSR   (AFSR0_EL1 * 2) /* Auxilary Data Fault Status R */
+#define c5_AIFSR   (AFSR1_EL1 * 2) /* Auxilary Instr Fault Status R */
+#define c6_DFAR(FAR_EL1 * 2)   /* Data Fault Address Register 
*/
+#define c6_IFAR(c6_DFAR + 1)   /* Instruction Fault Address 
Register */
+#define c10_PRRR   (MAIR_EL1 * 2)  /* Primary Region Remap Register */
+#define c10_NMRR   (c10_PRRR + 1)  /* Normal Memory Remap Register */
+#define c12_VBAR   (VBAR_EL1 * 2)  /* Vector Base Address Register */
+#define c13_CID(CONTEXTIDR_EL1 * 2)/* Context ID Register 
*/
+#define c13_TID_URW(TPIDR_EL0 * 2) /* Thread ID, User R/W */
+#define c13_TID_URO(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV   (TPIDR_EL1 * 2) /* Thread ID, Priveleged */
+#define c10_AMAIR  (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+#define NR_CP15_REGS   (NR_SYS_REGS * 2)
 
 #define ARM_EXCEPTION_IRQ0
 #define ARM_EXCEPTION_TRAP   1
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 68558ac..24dc8d7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -85,7 +85,10 @@ typedef struct user_fpsimd_state kvm_kernel_vfp_t;
 
 struct kvm_vcpu_arch {
struct kvm_regs regs;
-   u64 sys_regs[NR_SYS_REGS];
+   union {
+   u64 sys_regs[NR_SYS_REGS];
+   u32 cp15[NR_CP15_REGS];
+   };
 
/* HYP configuration */
u64 hcr_el2;
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index 24c8318..f9c269e 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -23,7 +23,12 @@
 #define __ARM_KVM_H__
 
 #define KVM_SPSR_EL1   0
-#define KVM_NR_SPSR1
+#define KVM_SPSR_SVC   KVM_SPSR_EL1
+#define KVM_SPSR_ABT   1
+#define KVM_SPSR_UND   2
+#define KVM_SPSR_IRQ   3
+#define KVM_SPSR_FIQ   4
+#define KVM_NR_SPSR5
 
 #ifndef __ASSEMBLY__
 #include asm/types.h
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 21/29] arm64: KVM: Build system integration

2013-03-04 Thread Marc Zyngier
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/Kconfig  |  2 ++
 arch/arm64/Makefile |  2 +-
 arch/arm64/kvm/Kconfig  | 59 +
 arch/arm64/kvm/Makefile | 18 +++
 4 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kvm/Kconfig
 create mode 100644 arch/arm64/kvm/Makefile

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 536611a..cff688b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -241,6 +241,8 @@ source drivers/Kconfig
 
 source fs/Kconfig
 
+source arch/arm64/kvm/Kconfig
+
 source arch/arm64/Kconfig.debug
 
 source security/Kconfig
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index c95c5cb..ae89e63 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -36,7 +36,7 @@ TEXT_OFFSET := 0x0008
 
 export TEXT_OFFSET GZFLAGS
 
-core-y += arch/arm64/kernel/ arch/arm64/mm/
+core-y += arch/arm64/kernel/ arch/arm64/mm/ arch/arm64/kvm/
 libs-y := arch/arm64/lib/ $(libs-y)
 libs-y += $(LIBGCC)
 
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
new file mode 100644
index 000..a76be8b
--- /dev/null
+++ b/arch/arm64/kvm/Kconfig
@@ -0,0 +1,59 @@
+#
+# KVM configuration
+#
+
+source virt/kvm/Kconfig
+
+menuconfig VIRTUALIZATION
+   bool Virtualization
+   ---help---
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+   bool Kernel-based Virtual Machine (KVM) support
+   select PREEMPT_NOTIFIERS
+   select ANON_INODES
+   select KVM_MMIO
+   select KVM_ARM_HOST
+   select KVM_ARM_VGIC
+   select KVM_ARM_TIMER
+   ---help---
+ Support hosting virtualized guest machines.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_ARM_HOST
+   bool
+   depends on KVM
+   depends on MMU
+   select  MMU_NOTIFIER
+   ---help---
+ Provides host support for ARM processors.
+
+config KVM_ARM_VGIC
+bool
+   depends on KVM_ARM_HOST  OF
+   select HAVE_KVM_IRQCHIP
+   ---help---
+ Adds support for a hardware assisted, in-kernel GIC emulation.
+
+config KVM_ARM_TIMER
+bool
+   depends on KVM_ARM_VGIC
+   select HAVE_KVM_IRQCHIP
+   ---help---
+ Adds support for the Architected Timers in virtual machines
+
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
new file mode 100644
index 000..14ba38d
--- /dev/null
+++ b/arch/arm64/kvm/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+obj-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../virt/kvm/, kvm_main.o 
coalesced_mmio.o)
+
+obj-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../arch/arm/kvm/, arm.o mmu.o 
mmio.o psci.o perf.o)
+
+obj-$(CONFIG_KVM_ARM_HOST) += inject_fault.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o idmap.o
+obj-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_a57.o
+
+obj-$(CONFIG_KVM_ARM_VGIC) += $(addprefix ../../../arch/arm/kvm/, vgic.o)
+obj-$(CONFIG_KVM_ARM_TIMER) += $(addprefix ../../../arch/arm/kvm/, 
arch_timer.o)
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 20/29] arm64: KVM: PSCI implementation

2013-03-04 Thread Marc Zyngier
Wire the PSCI backend into the exit handling code.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_host.h |  2 +-
 arch/arm64/include/asm/kvm_psci.h | 23 +++
 arch/arm64/include/uapi/asm/kvm.h | 16 
 arch/arm64/kvm/handle_exit.c  | 16 +++-
 4 files changed, 47 insertions(+), 10 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_psci.h

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 85e706b..68558ac 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #include asm/kvm_vgic.h
 #include asm/kvm_arch_timer.h
 
-#define KVM_VCPU_MAX_FEATURES 0
+#define KVM_VCPU_MAX_FEATURES 1
 
 /* We don't currently support large pages. */
 #define KVM_HPAGE_GFN_SHIFT(x) 0
diff --git a/arch/arm64/include/asm/kvm_psci.h 
b/arch/arm64/include/asm/kvm_psci.h
new file mode 100644
index 000..d96f054
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM64_KVM_PSCI_H__
+#define __ARM64_KVM_PSCI_H__
+
+bool kvm_psci_call(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index fffeb11..24c8318 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -64,6 +64,8 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE  0x1000
 #define KVM_VGIC_V2_CPU_SIZE   0x2000
 
+#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
+
 struct kvm_vcpu_init {
__u32 target;
__u32 features[7];
@@ -163,6 +165,20 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX127
 
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE   0x95c1ba5e
+#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPENDKVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFFKVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATEKVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS   0
+#define KVM_PSCI_RET_NI((unsigned long)-1)
+#define KVM_PSCI_RET_INVAL ((unsigned long)-2)
+#define KVM_PSCI_RET_DENIED((unsigned long)-3)
+
 #endif
 
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 0e1fa4e..fa38230 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -24,26 +24,24 @@
 #include asm/kvm_emulate.h
 #include asm/kvm_coproc.h
 #include asm/kvm_mmu.h
+#include asm/kvm_psci.h
 
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-   /*
-* Guest called HVC instruction:
-* Let it know we don't want that by injecting an undefined exception.
-*/
-   kvm_debug(hvc: %x (at %08lx), kvm_vcpu_get_hsr(vcpu)  ((1  16) - 
1),
- *vcpu_pc(vcpu));
-   kvm_debug( HSR: %8x, kvm_vcpu_get_hsr(vcpu));
+   if (kvm_psci_call(vcpu))
+   return 1;
+
kvm_inject_undefined(vcpu);
return 1;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-   /* We don't support SMC; don't do that. */
-   kvm_debug(smc: at %08lx, *vcpu_pc(vcpu));
+   if (kvm_psci_call(vcpu))
+   return 1;
+
kvm_inject_undefined(vcpu);
return 1;
 }
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/29] arm64: KVM: Plug the VGIC

2013-03-04 Thread Marc Zyngier
Shouldn't be needed - a complete duplicate from arch/arm.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_vgic.h | 156 ++
 1 file changed, 156 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_vgic.h

diff --git a/arch/arm64/include/asm/kvm_vgic.h 
b/arch/arm64/include/asm/kvm_vgic.h
new file mode 100644
index 000..f353f22
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_vgic.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM64_KVM_VGIC_H
+#define __ARM64_KVM_VGIC_H
+
+#include linux/kernel.h
+#include linux/kvm.h
+#include linux/irqreturn.h
+#include linux/spinlock.h
+#include linux/types.h
+#include linux/irqchip/arm-gic.h
+
+#define VGIC_NR_IRQS   128
+#define VGIC_NR_SGIS   16
+#define VGIC_NR_PPIS   16
+#define VGIC_NR_PRIVATE_IRQS   (VGIC_NR_SGIS + VGIC_NR_PPIS)
+#define VGIC_NR_SHARED_IRQS(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
+#define VGIC_MAX_CPUS  KVM_MAX_VCPUS
+
+/* Sanity checks... */
+#if (VGIC_MAX_CPUS  8)
+#error Invalid number of CPU interfaces
+#endif
+
+#if (VGIC_NR_IRQS  31)
+#error VGIC_NR_IRQS must be a multiple of 32
+#endif
+
+#if (VGIC_NR_IRQS  1024)
+#error VGIC_NR_IRQS must be = 1024
+#endif
+
+/*
+ * The GIC distributor registers describing interrupts have two parts:
+ * - 32 per-CPU interrupts (SGI + PPI)
+ * - a bunch of shared interrupts (SPI)
+ */
+struct vgic_bitmap {
+   union {
+   u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
+   DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
+   } percpu[VGIC_MAX_CPUS];
+   union {
+   u32 reg[VGIC_NR_SHARED_IRQS / 32];
+   DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
+   } shared;
+};
+
+struct vgic_bytemap {
+   u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
+   u32 shared[VGIC_NR_SHARED_IRQS  / 4];
+};
+
+struct vgic_dist {
+   spinlock_t  lock;
+   boolready;
+
+   /* Virtual control interface mapping */
+   void __iomem*vctrl_base;
+
+   /* Distributor and vcpu interface mapping in the guest */
+   phys_addr_t vgic_dist_base;
+   phys_addr_t vgic_cpu_base;
+
+   /* Distributor enabled */
+   u32 enabled;
+
+   /* Interrupt enabled (one bit per IRQ) */
+   struct vgic_bitmap  irq_enabled;
+
+   /* Interrupt 'pin' level */
+   struct vgic_bitmap  irq_state;
+
+   /* Level-triggered interrupt in progress */
+   struct vgic_bitmap  irq_active;
+
+   /* Interrupt priority. Not used yet. */
+   struct vgic_bytemap irq_priority;
+
+   /* Level/edge triggered */
+   struct vgic_bitmap  irq_cfg;
+
+   /* Source CPU per SGI and target CPU */
+   u8  irq_sgi_sources[VGIC_MAX_CPUS][16];
+
+   /* Target CPU for each IRQ */
+   u8  irq_spi_cpu[VGIC_NR_SHARED_IRQS];
+   struct vgic_bitmap  irq_spi_target[VGIC_MAX_CPUS];
+
+   /* Bitmap indicating which CPU has something pending */
+   unsigned long   irq_pending_on_cpu;
+};
+
+struct vgic_cpu {
+   /* per IRQ to LR mapping */
+   u8  vgic_irq_lr_map[VGIC_NR_IRQS];
+
+   /* Pending interrupts on this VCPU */
+   DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
+   DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS);
+
+   /* Bitmap of used/free list registers */
+   DECLARE_BITMAP( lr_used, 64);
+
+   /* Number of list registers on this CPU */
+   int nr_lr;
+
+   /* CPU vif control registers for world switch */
+   u32 vgic_hcr;
+   u32 vgic_vmcr;
+   u32 vgic_misr;  /* Saved only */
+   u32 vgic_eisr[2];   /* Saved only */
+   u32 vgic_elrsr[2];  /* Saved only */
+   u32 vgic_apr;
+   u32 vgic_lr[64];/* A15 has only 4... */
+};
+
+#define LR_EMPTY   0xff
+
+struct kvm;
+struct kvm_vcpu;
+struct kvm_run;
+struct kvm_exit_mmio;
+
+int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
+int kvm_vgic_hyp_init(void);
+int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_create(struct kvm *kvm);
+int 

[PATCH 13/29] arm64: KVM: MMIO access backend

2013-03-04 Thread Marc Zyngier
Define the necessary structures to perform an MMIO access.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_mmio.h | 59 +++
 1 file changed, 59 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_mmio.h

diff --git a/arch/arm64/include/asm/kvm_mmio.h 
b/arch/arm64/include/asm/kvm_mmio.h
new file mode 100644
index 000..fc2f689
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM64_KVM_MMIO_H__
+#define __ARM64_KVM_MMIO_H__
+
+#include linux/kvm_host.h
+#include asm/kvm_asm.h
+#include asm/kvm_arm.h
+
+/*
+ * This is annoying. The mmio code requires this, even if we don't
+ * need any decoding. To be fixed.
+ */
+struct kvm_decode {
+   unsigned long rt;
+   bool sign_extend;
+};
+
+/*
+ * The in-kernel MMIO emulation code wants to use a copy of run-mmio,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_mmio {
+   phys_addr_t phys_addr;
+   u8  data[8];
+   u32 len;
+   boolis_write;
+};
+
+static inline void kvm_prepare_mmio(struct kvm_run *run,
+   struct kvm_exit_mmio *mmio)
+{
+   run-mmio.phys_addr = mmio-phys_addr;
+   run-mmio.len   = mmio-len;
+   run-mmio.is_write  = mmio-is_write;
+   memcpy(run-mmio.data, mmio-data, mmio-len);
+   run-exit_reason= KVM_EXIT_MMIO;
+}
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+phys_addr_t fault_ipa);
+
+#endif /* __ARM64_KVM_MMIO_H__ */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/29] arm64: KVM: HYP mode world switch implementation

2013-03-04 Thread Marc Zyngier
The HYP mode world switch in all its glory.

Implements save/restore of host/guest registers, EL2 trapping,
IPA resolution, and additional services (tlb invalidation).

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/kernel/asm-offsets.c |  33 ++
 arch/arm64/kvm/hyp.S| 756 
 2 files changed, 789 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp.S

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a2a4d81..a7f706a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -21,6 +21,7 @@
 #include linux/sched.h
 #include linux/mm.h
 #include linux/dma-mapping.h
+#include linux/kvm_host.h
 #include asm/thread_info.h
 #include asm/memory.h
 #include asm/cputable.h
@@ -104,5 +105,37 @@ int main(void)
   BLANK();
   DEFINE(TZ_MINWEST,   offsetof(struct timezone, tz_minuteswest));
   DEFINE(TZ_DSTTIME,   offsetof(struct timezone, tz_dsttime));
+  BLANK();
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_REGS,offsetof(struct kvm_vcpu, arch.regs));
+  DEFINE(VCPU_USER_PT_REGS,offsetof(struct kvm_regs, regs));
+  DEFINE(VCPU_VFP_GUEST,   offsetof(struct kvm_vcpu, arch.vfp_guest));
+  DEFINE(VCPU_VFP_HOST,offsetof(struct kvm_vcpu, 
arch.vfp_host));
+  DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
+  DEFINE(VCPU_IRQ_LINES,   offsetof(struct kvm_vcpu, arch.irq_lines));
+  DEFINE(VCPU_SP_EL1,  offsetof(struct kvm_vcpu, arch.regs.sp_el1));
+  DEFINE(VCPU_ELR_EL1, offsetof(struct kvm_vcpu, arch.regs.elr_el1));
+  DEFINE(VCPU_SPSR,offsetof(struct kvm_vcpu, arch.regs.spsr));
+  DEFINE(VCPU_SYSREGS, offsetof(struct kvm_vcpu, arch.sys_regs));
+  DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
+  DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
+  DEFINE(VCPU_HPFAR_EL2,   offsetof(struct kvm_vcpu, 
arch.fault.hpfar_el2));
+  DEFINE(VCPU_TIMER_CNTV_CTL,  offsetof(struct kvm_vcpu, 
arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, 
arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,offsetof(struct kvm, arch.timer.enabled));
+  DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_VGIC_CPU,offsetof(struct kvm_vcpu, 
arch.vgic_cpu));
+  DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr));
+  DEFINE(VGIC_CPU_VMCR,offsetof(struct vgic_cpu, vgic_vmcr));
+  DEFINE(VGIC_CPU_MISR,offsetof(struct vgic_cpu, vgic_misr));
+  DEFINE(VGIC_CPU_EISR,offsetof(struct vgic_cpu, vgic_eisr));
+  DEFINE(VGIC_CPU_ELRSR,   offsetof(struct vgic_cpu, vgic_elrsr));
+  DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr));
+  DEFINE(VGIC_CPU_LR,  offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_CPU_NR_LR,   offsetof(struct vgic_cpu, nr_lr));
+  DEFINE(KVM_VTTBR,offsetof(struct kvm, arch.vttbr));
+  DEFINE(KVM_VGIC_VCTRL,   offsetof(struct kvm, arch.vgic.vctrl_base));
+#endif
   return 0;
 }
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
new file mode 100644
index 000..cd7506d
--- /dev/null
+++ b/arch/arm64/kvm/hyp.S
@@ -0,0 +1,756 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include linux/linkage.h
+#include linux/irqchip/arm-gic.h
+
+#include asm/assembler.h
+#include asm/memory.h
+#include asm/asm-offsets.h
+#include asm/fpsimdmacros.h
+#include asm/kvm.h
+#include asm/kvm_asm.h
+#include asm/kvm_arm.h
+#include asm/kvm_mmu.h
+
+#define REG_OFFSET(x)  (VCPU_REGS + VCPU_USER_PT_REGS + 8*x)
+#define SPSR_OFFSET(x) (VCPU_SPSR + 8*x)
+#define SYSREG_OFFSET(x)   (VCPU_SYSREGS + 8*x)
+
+   .text
+   .pushsection.hyp.text, ax
+   .align  PAGE_SHIFT
+
+__kvm_hyp_code_start:
+   .globl __kvm_hyp_code_start
+
+.macro save_host_regs
+   pushx19, x20
+   pushx21, x22
+   pushx23, x24
+   pushx25, x26
+   pushx27, x28
+   pushx29, lr
+
+   mrs x19, sp_el0
+   mrs x20, sp_el1
+   mrs x21, elr_el1
+   mrs x22, spsr_el1
+   mrs x23, 

[PATCH 10/29] arm64: KVM: Cortex-A57 specific system registers handling

2013-03-04 Thread Marc Zyngier
Add the support code for Cortex-A57 specific system registers.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/kvm/sys_regs_a57.c | 96 +++
 1 file changed, 96 insertions(+)
 create mode 100644 arch/arm64/kvm/sys_regs_a57.c

diff --git a/arch/arm64/kvm/sys_regs_a57.c b/arch/arm64/kvm/sys_regs_a57.c
new file mode 100644
index 000..dcc88fe
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs_a57.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * Based on arch/arm/kvm/coproc_a15.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell ru...@rustcorp.au
+ *  Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+#include linux/kvm_host.h
+#include asm/cputype.h
+#include asm/kvm_arm.h
+#include asm/kvm_asm.h
+#include asm/kvm_host.h
+#include asm/kvm_emulate.h
+#include asm/kvm_coproc.h
+#include linux/init.h
+
+#include sys_regs.h
+
+#define MPIDR_EL1_AFF0_MASK0xff
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+   /*
+* Simply map the vcpu_id into the Aff0 field of the MPIDR.
+*/
+   vcpu-arch.sys_regs[MPIDR_EL1] = (1  31) | (vcpu-vcpu_id  
MPIDR_EL1_AFF0_MASK);
+}
+
+static bool access_actlr(struct kvm_vcpu *vcpu,
+const struct sys_reg_params *p,
+const struct sys_reg_desc *r)
+{
+   if (p-is_write)
+   return ignore_write(vcpu, p);
+
+   *vcpu_reg(vcpu, p-Rt) = vcpu-arch.sys_regs[ACTLR_EL1];
+   return true;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+   u64 actlr;
+
+   asm volatile(mrs %0, actlr_el1\n : =r (actlr));
+   vcpu-arch.sys_regs[ACTLR_EL1] = actlr;
+}
+
+/*
+ * A57-specific sys-reg registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc a57_sys_regs[] = {
+   { Op0(0b11), Op1(0b000), CRn(0b), CRm(0b), Op2(0b101), /* 
MPIDR_EL1 */
+ NULL, reset_mpidr, MPIDR_EL1 },
+   { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b), Op2(0b000), /* 
SCTLR_EL1 */
+ NULL, reset_val, SCTLR_EL1, 0x00C50078 },
+   { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b), Op2(0b001), /* 
ACTLR_EL1 */
+ access_actlr, reset_actlr, ACTLR_EL1 },
+   { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b), Op2(0b010), /* 
CPACR_EL1 */
+ NULL, reset_val, CPACR_EL1, 0 },
+};
+
+static struct kvm_sys_reg_target_table a57_target_table = {
+   .target = KVM_ARM_TARGET_CORTEX_A57,
+   .table64 = {
+   .table = a57_sys_regs,
+   .num = ARRAY_SIZE(a57_sys_regs),
+   },
+};
+
+static int __init sys_reg_a57_init(void)
+{
+   unsigned int i;
+
+   for (i = 1; i  ARRAY_SIZE(a57_sys_regs); i++)
+   BUG_ON(cmp_sys_reg(a57_sys_regs[i-1],
+  a57_sys_regs[i]) = 0);
+
+   kvm_register_target_sys_reg_table(a57_target_table);
+   return 0;
+}
+late_initcall(sys_reg_a57_init);
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/29] arm64: KVM: kvm_arch and kvm_vcpu_arch definitions

2013-03-04 Thread Marc Zyngier
Provide the architecture dependent structures for VM and
vcpu abstractions.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_host.h | 178 ++
 1 file changed, 178 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_host.h

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
new file mode 100644
index 000..d1095d1
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * Derived from arch/arm/include/asm/kvm_host.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM64_KVM_HOST_H__
+#define __ARM64_KVM_HOST_H__
+
+#include asm/kvm.h
+#include asm/kvm_asm.h
+#include asm/kvm_mmio.h
+
+#define KVM_MAX_VCPUS 4
+#define KVM_USER_MEM_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#include asm/kvm_vgic.h
+#include asm/kvm_arch_timer.h
+
+#define KVM_VCPU_MAX_FEATURES 0
+
+/* We don't currently support large pages. */
+#define KVM_HPAGE_GFN_SHIFT(x) 0
+#define KVM_NR_PAGE_SIZES  1
+#define KVM_PAGES_PER_HPAGE(x) (1UL31)
+
+struct kvm_vcpu;
+int kvm_target_cpu(void);
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+
+struct kvm_arch {
+   /* The VMID generation used for the virt. memory system */
+   u64vmid_gen;
+   u32vmid;
+
+   /* 1-level 2nd stage table and lock */
+   spinlock_t pgd_lock;
+   pgd_t *pgd;
+
+   /* VTTBR value associated with above pgd and vmid */
+   u64vttbr;
+
+   /* Interrupt controller */
+   struct vgic_distvgic;
+
+   /* Timer */
+   struct arch_timer_kvm   timer;
+};
+
+#define KVM_NR_MEM_OBJS 40
+
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
+struct kvm_mmu_memory_cache {
+   int nobjs;
+   void *objects[KVM_NR_MEM_OBJS];
+};
+
+struct kvm_vcpu_fault_info {
+   u32 esr_el2;/* Hyp Syndrom Register */
+   u64 far_el2;/* Hyp Fault Address Register */
+   u64 hpfar_el2;  /* Hyp IPA Fault Address Register */
+};
+
+typedef struct user_fpsimd_state kvm_kernel_vfp_t;
+
+struct kvm_vcpu_arch {
+   struct kvm_regs regs;
+   u64 sys_regs[NR_SYS_REGS];
+
+   /* HYP configuration */
+   u64 hcr_el2;
+
+   /* Exception Information */
+   struct kvm_vcpu_fault_info fault;
+
+   /* Floating point registers (VFP and Advanced SIMD/NEON) */
+   kvm_kernel_vfp_t vfp_guest;
+   kvm_kernel_vfp_t *vfp_host;
+
+   /* VGIC state */
+   struct vgic_cpu vgic_cpu;
+   struct arch_timer_cpu timer_cpu;
+
+   /*
+* Anything that is not used directly from assembly code goes
+* here.
+*/
+   /* dcache set/way operation pending */
+   int last_pcpu;
+   cpumask_t require_dcache_flush;
+
+   /* Don't run the guest */
+   bool pause;
+
+   /* IO related fields */
+   struct kvm_decode mmio_decode;
+
+   /* Interrupt related fields */
+   u64 irq_lines;  /* IRQ and FIQ levels */
+
+   /* Cache some mmu pages needed inside spinlock regions */
+   struct kvm_mmu_memory_cache mmu_page_cache;
+
+   /* Target CPU and feature flags */
+   u32 target;
+   DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+
+   /* Detect first run of a vcpu */
+   bool has_run_once;
+};
+
+struct kvm_vm_stat {
+   u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+   u32 halt_wakeup;
+};
+
+struct kvm_vcpu_init;
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+   const struct kvm_vcpu_init *init);
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+struct kvm_one_reg;
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+   unsigned long start, unsigned long end);
+void 

[PATCH 14/29] arm64: KVM: guest one-reg interface

2013-03-04 Thread Marc Zyngier
Let userspace play with the guest registers.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/kvm/guest.c | 240 +
 1 file changed, 240 insertions(+)
 create mode 100644 arch/arm64/kvm/guest.c

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
new file mode 100644
index 000..2a8aaf8
--- /dev/null
+++ b/arch/arm64/kvm/guest.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * Derived from arch/arm/kvm/guest.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include linux/errno.h
+#include linux/err.h
+#include linux/kvm_host.h
+#include linux/module.h
+#include linux/vmalloc.h
+#include linux/fs.h
+#include asm/cputype.h
+#include asm/uaccess.h
+#include asm/kvm.h
+#include asm/kvm_asm.h
+#include asm/kvm_emulate.h
+#include asm/kvm_coproc.h
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+   { NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+   vcpu-arch.hcr_el2 = HCR_GUEST_FLAGS;
+   return 0;
+}
+
+static u64 core_reg_offset_from_id(u64 id)
+{
+   return id  ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
+}
+
+static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+   unsigned long __user *uaddr = (unsigned long __user *)(unsigned 
long)reg-addr;
+   struct kvm_regs *regs = vcpu-arch.regs;
+   u64 off;
+
+   if (KVM_REG_SIZE(reg-id) != sizeof(unsigned long))
+   return -ENOENT;
+
+   /* Our ID is an index into the kvm_regs struct. */
+   off = core_reg_offset_from_id(reg-id);
+   if (off = sizeof(*regs) / KVM_REG_SIZE(reg-id))
+   return -ENOENT;
+
+   return put_user(((unsigned long *)regs)[off], uaddr);
+}
+
+static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+   unsigned long __user *uaddr = (unsigned long __user *)(unsigned 
long)reg-addr;
+   struct kvm_regs *regs = vcpu-arch.regs;
+   u64 off, val;
+
+   if (KVM_REG_SIZE(reg-id) != sizeof(unsigned long))
+   return -ENOENT;
+
+   /* Our ID is an index into the kvm_regs struct. */
+   off = core_reg_offset_from_id(reg-id);
+   if (off = sizeof(*regs) / KVM_REG_SIZE(reg-id))
+   return -ENOENT;
+
+   if (get_user(val, uaddr) != 0)
+   return -EFAULT;
+
+   if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
+   unsigned long mode = val  COMPAT_PSR_MODE_MASK;
+   switch (mode) {
+   case PSR_MODE_EL0t:
+   case PSR_MODE_EL1t:
+   case PSR_MODE_EL1h:
+   break;
+   default:
+   return -EINVAL;
+   }
+   }
+
+   ((unsigned long *)regs)[off] = val;
+   return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+   return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+   return -EINVAL;
+}
+
+static unsigned long num_core_regs(void)
+{
+   return sizeof(struct kvm_regs) / sizeof(unsigned long);
+}
+
+/**
+ * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
+{
+   return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu);
+}
+
+/**
+ * kvm_arm_copy_reg_indices - get indices of all registers.
+ *
+ * We do core registers right here, then we apppend system regs.
+ */
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+   unsigned int i;
+   const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | 
KVM_REG_ARM_CORE;
+
+   for (i = 0; i  sizeof(struct kvm_regs)/sizeof(unsigned long); i++) {
+   if (put_user(core_reg | i, uindices))
+   return -EFAULT;
+   uindices++;
+   }
+
+   return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
+}
+
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+   /* We currently use nothing arch-specific in upper 32 bits */
+   if ((reg-id  ~KVM_REG_SIZE_MASK)  32 != KVM_REG_ARM64  32)
+   return -EINVAL;
+
+ 

[PATCH 19/29] arm64: KVM: Plug the arch timer

2013-03-04 Thread Marc Zyngier
Shouldn't be needed - a complete duplicate from arch/arm.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/kvm/arch_timer.c   |  1 +
 arch/arm64/include/asm/kvm_arch_timer.h | 58 +
 2 files changed, 59 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_arch_timer.h

diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c
index 6ac938d..ca04a99 100644
--- a/arch/arm/kvm/arch_timer.c
+++ b/arch/arm/kvm/arch_timer.c
@@ -194,6 +194,7 @@ static struct notifier_block kvm_timer_cpu_nb = {
 
 static const struct of_device_id arch_timer_of_match[] = {
{ .compatible   = arm,armv7-timer,},
+   { .compatible   = arm,armv8-timer,},
{},
 };
 
diff --git a/arch/arm64/include/asm/kvm_arch_timer.h 
b/arch/arm64/include/asm/kvm_arch_timer.h
new file mode 100644
index 000..eb02273
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_arch_timer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __ARM64_KVM_ARCH_TIMER_H
+#define __ARM64_KVM_ARCH_TIMER_H
+
+#include linux/clocksource.h
+#include linux/hrtimer.h
+#include linux/workqueue.h
+
+struct arch_timer_kvm {
+   /* Is the timer enabled */
+   boolenabled;
+
+   /* Virtual offset, restored only */
+   cycle_t cntvoff;
+};
+
+struct arch_timer_cpu {
+   /* Background timer used when the guest is not running */
+   struct hrtimer  timer;
+
+   /* Work queued with the above timer expires */
+   struct work_struct  expired;
+
+   /* Background timer active */
+   boolarmed;
+
+   /* Timer IRQ */
+   const struct kvm_irq_level  *irq;
+
+   /* Registers: control register, timer value */
+   u32 cntv_ctl;   /* Saved/restored */
+   cycle_t cntv_cval;  /* Saved/restored */
+};
+
+int kvm_timer_hyp_init(void);
+int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
+void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
+void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
+
+#endif
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 24/29] arm64: KVM: 32bit conditional execution emulation

2013-03-04 Thread Marc Zyngier
As conditionnal instructions can trap on AArch32, add the thinest
possible emulation layer to keep 32bit guests happy.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_emulate.h |  13 ++-
 arch/arm64/kvm/Makefile  |   2 +-
 arch/arm64/kvm/emulate.c | 154 +++
 3 files changed, 166 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm64/kvm/emulate.c

diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index 2e72a4f..4d5e0ee 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -31,6 +31,9 @@
 unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num);
 unsigned long *vcpu_spsr32(struct kvm_vcpu *vcpu);
 
+bool kvm_condition_valid32(struct kvm_vcpu *vcpu);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
@@ -52,12 +55,18 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 
 static inline bool kvm_condition_valid(struct kvm_vcpu *vcpu)
 {
-   return true;/* No conditionals on arm64 */
+   if (vcpu_mode_is_32bit(vcpu))
+   return kvm_condition_valid32(vcpu);
+
+   return true;
 }
 
 static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
 {
-   *vcpu_pc(vcpu) += 4;
+   if (vcpu_mode_is_32bit(vcpu))
+   kvm_skip_instr32(vcpu, is_wide_instr);
+   else
+   *vcpu_pc(vcpu) += 4;
 }
 
 static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 50f9da0..a6ba0d8 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../virt/kvm/, 
kvm_main.o coalesc
 
 obj-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../arch/arm/kvm/, arm.o mmu.o 
mmio.o psci.o perf.o)
 
-obj-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
+obj-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o idmap.o
 obj-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_a57.o
 
diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c
new file mode 100644
index 000..6b3dbc3
--- /dev/null
+++ b/arch/arm64/kvm/emulate.c
@@ -0,0 +1,154 @@
+/*
+ * (not much of an) Emulation layer for 32bit guests.
+ *
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include linux/kvm_host.h
+#include asm/kvm_emulate.h
+
+/*
+ * stolen from arch/arm/kernel/opcodes.c
+ *
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+   0xF0F0, /* EQ == Z set*/
+   0x0F0F, /* NE */
+   0x, /* CS == C set*/
+   0x, /* CC */
+   0xFF00, /* MI == N set*/
+   0x00FF, /* PL */
+   0x, /* VS == V set*/
+   0x, /* VC */
+   0x0C0C, /* HI == C set  Z clear */
+   0xF3F3, /* LS == C clear || Z set */
+   0xAA55, /* GE == (N==V)   */
+   0x55AA, /* LT == (N!=V)   */
+   0x0A05, /* GT == (!Z  (N==V))   */
+   0xF5FA, /* LE == (Z || (N!=V))*/
+   0x, /* AL always  */
+   0   /* NV */
+};
+
+static int kvm_vcpu_get_condition(struct kvm_vcpu *vcpu)
+{
+   u32 esr = kvm_vcpu_get_hsr(vcpu);
+
+   if (esr  ESR_EL2_CV)
+   return (esr  ESR_EL2_COND)  ESR_EL2_COND_SHIFT;
+
+   return -1;
+}
+
+/*
+ * Check if a trapped instruction should have been executed or not.
+ */
+bool kvm_condition_valid32(struct kvm_vcpu *vcpu)
+{
+   unsigned long cpsr;
+   

[PATCH 25/29] arm64: KVM: 32bit handling of coprocessor traps

2013-03-04 Thread Marc Zyngier
Provide the necessary infrastructure to trap coprocessor accesses that
occur when running 32bit guests.

Also wire SMC and HVC trapped in 32bit mode while were at it.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_coproc.h |   5 ++
 arch/arm64/kvm/handle_exit.c|   7 ++
 arch/arm64/kvm/sys_regs.c   | 165 ++--
 3 files changed, 170 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_coproc.h 
b/arch/arm64/include/asm/kvm_coproc.h
index e791894..0378be9 100644
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -33,10 +33,15 @@ struct kvm_sys_reg_table {
 struct kvm_sys_reg_target_table {
unsigned target;
struct kvm_sys_reg_table table64;
+   struct kvm_sys_reg_table table32;
 };
 
 void kvm_register_target_sys_reg_table(struct kvm_sys_reg_target_table *table);
 
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #define kvm_coproc_table_init kvm_sys_reg_table_init
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index fa38230..3e61dcb 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -62,6 +62,13 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
 static exit_handle_fn arm_exit_handlers[] = {
[ESR_EL2_EC_WFI]= kvm_handle_wfi,
+   [ESR_EL2_EC_CP15_32]= kvm_handle_cp15_32,
+   [ESR_EL2_EC_CP15_64]= kvm_handle_cp15_64,
+   [ESR_EL2_EC_CP14_MR]= kvm_handle_cp14_access,
+   [ESR_EL2_EC_CP14_LS]= kvm_handle_cp14_load_store,
+   [ESR_EL2_EC_CP14_64]= kvm_handle_cp14_access,
+   [ESR_EL2_EC_HVC32]  = handle_hvc,
+   [ESR_EL2_EC_SMC32]  = handle_smc,
[ESR_EL2_EC_HVC64]  = handle_hvc,
[ESR_EL2_EC_SMC64]  = handle_smc,
[ESR_EL2_EC_SYS64]  = kvm_handle_sys_reg,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 9fc8c17..1b1cb21 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -38,6 +38,10 @@
  * types are different. My gut feeling is that it should be pretty
  * easy to merge, but that would be an ABI breakage -- again. VFP
  * would also need to be abstracted.
+ *
+ * For AArch32, we only take care of what is being trapped. Anything
+ * that has to do with init and userspace access has to go via the
+ * 64bit interface.
  */
 
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
@@ -256,6 +260,36 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* TPIDRRO_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b), Op2(0b011),
  NULL, reset_unknown, TPIDRRO_EL0 },
+
+   /* DACR32_EL2 */
+   { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b), Op2(0b000),
+ NULL, reset_unknown, DACR32_EL2 },
+   /* IFSR32_EL2 */
+   { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b), Op2(0b001),
+ NULL, reset_unknown, IFSR32_EL2 },
+};
+
+/* Trapped cp15 registers */
+static const struct sys_reg_desc cp15_regs[] = {
+   /*
+* DC{C,I,CI}SW operations:
+*/
+   { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
+   { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
+   { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake },
+   { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake },
+   { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake },
+   { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake },
+   { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
+   { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
+   { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
 };
 
 /* Target specific emulation tables */
@@ -267,13 +301,20 @@ void kvm_register_target_sys_reg_table(struct 
kvm_sys_reg_target_table *table)
 }
 
 /* Get specific register table for this target. */
-static const struct sys_reg_desc *get_target_table(unsigned target, size_t 
*num)
+static const struct sys_reg_desc *get_target_table(unsigned target,
+  bool mode_is_64,
+  size_t *num)
 {
struct kvm_sys_reg_target_table *table;
 
table = target_tables[target];
-   *num = 

[PATCH 26/29] arm64: KVM: 32bit coprocessor access for Cortex-A57

2013-03-04 Thread Marc Zyngier
Enable handling of 32bit coprocessor traps for Cortex-A57.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/kvm/sys_regs_a57.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs_a57.c b/arch/arm64/kvm/sys_regs_a57.c
index dcc88fe..56c0641 100644
--- a/arch/arm64/kvm/sys_regs_a57.c
+++ b/arch/arm64/kvm/sys_regs_a57.c
@@ -59,6 +59,17 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct 
sys_reg_desc *r)
vcpu-arch.sys_regs[ACTLR_EL1] = actlr;
 }
 
+static bool access_ectlr(struct kvm_vcpu *vcpu,
+const struct sys_reg_params *p,
+const struct sys_reg_desc *r)
+{
+   if (p-is_write)
+   return ignore_write(vcpu, p);
+
+   *vcpu_reg(vcpu, p-Rt) = 0;
+   return true;
+}
+
 /*
  * A57-specific sys-reg registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -74,12 +85,23 @@ static const struct sys_reg_desc a57_sys_regs[] = {
  NULL, reset_val, CPACR_EL1, 0 },
 };
 
+static const struct sys_reg_desc a57_cp15_regs[] = {
+   { Op1(0b000), CRn(0b0001), CRm(0b), Op2(0b001), /* ACTLR */
+ access_actlr },
+   { Op1(0b001), CRn(0b), CRm(0b), Op2(0b000), /* ECTLR */
+ access_ectlr },
+};
+
 static struct kvm_sys_reg_target_table a57_target_table = {
.target = KVM_ARM_TARGET_CORTEX_A57,
.table64 = {
.table = a57_sys_regs,
.num = ARRAY_SIZE(a57_sys_regs),
},
+   .table32 = {
+   .table = a57_cp15_regs,
+   .num = ARRAY_SIZE(a57_cp15_regs),
+   },
 };
 
 static int __init sys_reg_a57_init(void)
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 27/29] arm64: KVM: 32bit specific register world switch

2013-03-04 Thread Marc Zyngier
Allow registers specific to 32bit guests to be saved/restored
during the world switch.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/kvm/hyp.S | 70 
 1 file changed, 70 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index cd7506d..1d4b0a7 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -312,6 +312,74 @@ __kvm_hyp_code_start:
load_sysregs
 .endm
 
+.macro skip_32bit_state tmp, target
+   // Skip 32bit state if not needed
+   mrs \tmp, hcr_el2
+   tbnz\tmp, #HCR_RW_SHIFT, \target
+.endm
+
+.macro skip_tee_state tmp, target
+   // Skip ThumbEE state if not needed
+   mrs \tmp, id_pfr0_el1
+   tbz \tmp, #12, \target
+.endm
+
+.macro save_guest_32bit_state
+   skip_32bit_state x2, 1f
+
+   add x2, x0, #SPSR_OFFSET(KVM_SPSR_ABT)
+   mrs x4, spsr_abt
+   mrs x5, spsr_und
+   mrs x6, spsr_irq
+   mrs x7, spsr_fiq
+   stp x4, x5, [x2], #16
+   stp x6, x7, [x2]
+
+   add x2, x0, #SYSREG_OFFSET(DACR32_EL2)
+   mrs x4, dacr32_el2
+   mrs x5, ifsr32_el2
+   mrs x6, fpexc32_el2
+   mrs x7, dbgvcr32_el2
+   stp x4, x5, [x2], #16
+   stp x6, x7, [x2]
+
+   skip_tee_state x8, 1f
+
+   add x2, x0, #SYSREG_OFFSET(TEECR32_EL1)
+   mrs x4, teecr32_el1
+   mrs x5, teehbr32_el1
+   stp x4, x5, [x2]
+1:
+.endm
+
+.macro restore_guest_32bit_state
+   skip_32bit_state x2, 1f
+
+   add x2, x0, #SPSR_OFFSET(KVM_SPSR_ABT)
+   ldp x4, x5, [x2], #16
+   ldp x6, x7, [x2]
+   msr spsr_abt, x4
+   msr spsr_und, x5
+   msr spsr_irq, x6
+   msr spsr_fiq, x7
+
+   add x2, x0, #SYSREG_OFFSET(DACR32_EL2)
+   ldp x4, x5, [x2], #16
+   ldp x6, x7, [x2]
+   msr dacr32_el2, x4
+   msr ifsr32_el2, x5
+   msr fpexc32_el2, x6
+   msr dbgvcr32_el2, x7
+
+   skip_tee_state x8, 1f
+
+   add x2, x0, #SYSREG_OFFSET(TEECR32_EL1)
+   ldp x4, x5, [x2]
+   msr teecr32_el1, x4
+   msr teehbr32_el1, x5
+1:
+.endm
+
 .macro activate_traps
ldr x2, [x0, #VCPU_IRQ_LINES]
ldr x1, [x0, #VCPU_HCR_EL2]
@@ -513,6 +581,7 @@ ENTRY(__kvm_vcpu_run)
restore_timer_state
restore_guest_sysregs
restore_guest_fpsimd
+   restore_guest_32bit_state
restore_guest_regs
 
// That's it, no more messing around.
@@ -523,6 +592,7 @@ __kvm_vcpu_return:
// Assume x0 is the vcpu pointer, x1 the return code
// Guest's x0-x3 are on the stack
save_guest_regs
+   save_guest_32bit_state
save_guest_fpsimd
save_guest_sysregs
save_timer_state
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 28/29] arm64: KVM: 32bit guest fault injection

2013-03-04 Thread Marc Zyngier
Add fault injection capability for 32bit guests.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/kvm/inject_fault.c | 79 ++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 80b245f..85a4548 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -1,5 +1,5 @@
 /*
- * Fault injection for 64bit guests.
+ * Fault injection for both 32 and 64bit guests.
  *
  * Copyright (C) 2012 - ARM Ltd
  * Author: Marc Zyngier marc.zyng...@arm.com
@@ -24,6 +24,74 @@
 #include linux/kvm_host.h
 #include asm/kvm_emulate.h
 
+static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+   unsigned long cpsr;
+   unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
+   bool is_thumb = (new_spsr_value  COMPAT_PSR_T_BIT);
+   u32 return_offset = (is_thumb) ? 4 : 0;
+   u32 sctlr = vcpu-arch.cp15[c1_SCTLR];
+
+   cpsr = mode | COMPAT_PSR_I_BIT;
+
+   if (sctlr  (1  30))
+   cpsr |= COMPAT_PSR_T_BIT;
+   if (sctlr  (1  25))
+   cpsr |= COMPAT_PSR_E_BIT;
+
+   *vcpu_cpsr(vcpu) = cpsr;
+
+   /* Note: These now point to the banked copies */
+   *vcpu_spsr(vcpu) = new_spsr_value;
+   *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+
+   /* Branch to exception vector */
+   if (sctlr  (1  13))
+   vect_offset += 0x;
+   else /* always have security exceptions */
+   vect_offset += vcpu-arch.cp15[c12_VBAR];
+
+   *vcpu_pc(vcpu) = vect_offset;
+}
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+   prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
+unsigned long addr)
+{
+   u32 vect_offset;
+   u32 *far, *fsr;
+   bool is_lpae;
+
+   if (is_pabt) {
+   vect_offset = 12;
+   far = vcpu-arch.cp15[c6_IFAR];
+   fsr = vcpu-arch.cp15[c5_IFSR];
+   } else { /* !iabt */
+   vect_offset = 16;
+   far = vcpu-arch.cp15[c6_DFAR];
+   fsr = vcpu-arch.cp15[c5_DFSR];
+   }
+
+   prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, 
vect_offset);
+
+   *far = addr;
+
+   /* Always give debug fault for now - should give guest a clue */
+   is_lpae = (vcpu-arch.cp15[c2_TTBCR]  31);
+   if (is_lpae)
+   *fsr = 1  9 | 0x22;
+   else
+   *fsr = 2;
+}
+
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long 
addr)
 {
unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -89,6 +157,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
  */
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
+   if (!(vcpu-arch.hcr_el2  HCR_RW))
+   inject_abt32(vcpu, false, addr);
+
inject_abt64(vcpu, false, addr);
 }
 
@@ -102,6 +173,9 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long 
addr)
  */
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
+   if (!(vcpu-arch.hcr_el2  HCR_RW))
+   inject_abt32(vcpu, true, addr);
+
inject_abt64(vcpu, true, addr);
 }
 
@@ -113,5 +187,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long 
addr)
  */
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
+   if (!(vcpu-arch.hcr_el2  HCR_RW))
+   inject_undef32(vcpu);
+
inject_undef64(vcpu);
 }
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 29/29] arm64: KVM: enable initialization of a 32bit vcpu

2013-03-04 Thread Marc Zyngier
Wire the init of a 32bit vcpu by allowing 32bit modes in pstate,
and providing sensible defaults out of reset state.

This feature is of course conditionned by the presence of 32bit
capability on the physical CPU.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm64/include/asm/kvm_host.h |  2 +-
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 arch/arm64/kvm/guest.c|  6 ++
 arch/arm64/kvm/reset.c| 22 +-
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 24dc8d7..0f17cfe 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #include asm/kvm_vgic.h
 #include asm/kvm_arch_timer.h
 
-#define KVM_VCPU_MAX_FEATURES 1
+#define KVM_VCPU_MAX_FEATURES 2
 
 /* We don't currently support large pages. */
 #define KVM_HPAGE_GFN_SHIFT(x) 0
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index f9c269e..813427f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -70,6 +70,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE   0x2000
 
 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
 
 struct kvm_vcpu_init {
__u32 target;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2a8aaf8..123703d 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -84,6 +84,12 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct 
kvm_one_reg *reg)
if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
unsigned long mode = val  COMPAT_PSR_MODE_MASK;
switch (mode) {
+   case COMPAT_PSR_MODE_USR:
+   case COMPAT_PSR_MODE_FIQ:
+   case COMPAT_PSR_MODE_IRQ:
+   case COMPAT_PSR_MODE_SVC:
+   case COMPAT_PSR_MODE_ABT:
+   case COMPAT_PSR_MODE_UND:
case PSR_MODE_EL0t:
case PSR_MODE_EL1t:
case PSR_MODE_EL1h:
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 3ac2f20..411659e 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -35,6 +35,19 @@ static struct kvm_regs default_regs_reset = {
.regs.pstate = PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
+static struct kvm_regs default_regs_reset32 = {
+   .regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT |
+   COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
+};
+
+static bool cpu_has_32bit_el1(void)
+{
+   u64 pfr0;
+
+   pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+   return !!(pfr0  0x20);
+}
+
 /**
  * kvm_reset_vcpu - sets core registers and sys_regs to reset value
  * @vcpu: The VCPU pointer
@@ -49,7 +62,14 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 
switch (vcpu-arch.target) {
default:
-   cpu_reset = default_regs_reset;
+   if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu-arch.features)) {
+   if (!cpu_has_32bit_el1())
+   return -EINVAL;
+   cpu_reset = default_regs_reset32;
+   vcpu-arch.hcr_el2 = ~HCR_RW;
+   } else {
+   cpu_reset = default_regs_reset;
+   }
break;
}
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >