[PATCH v2] KVM test: Enable timedrift for Linux guests

2010-03-24 Thread Jason Wang
We should also test timedrift for Linux guests especially for guest
with pvclock. So this patch enable the timedrift for linux guests.

Changes from v1:
- Correct the wrong name for guest load cleaning
- Use -no-kvm-pit-reinjection for linux guests and -rtc-td-hack for
windows guests.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 client/tests/kvm/tests_base.cfg.sample |   14 --
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/client/tests/kvm/tests_base.cfg.sample 
b/client/tests/kvm/tests_base.cfg.sample
index 8cc83a9..29a2430 100644
--- a/client/tests/kvm/tests_base.cfg.sample
+++ b/client/tests/kvm/tests_base.cfg.sample
@@ -147,7 +147,6 @@ variants:
 type = linux_s3
 
 - timedrift:install setup unattended_install
-extra_params +=  -rtc-td-hack
 variants:
 - with_load:
 type = timedrift
@@ -330,7 +329,7 @@ variants:
 variants:
 # Linux section
 - @Linux:
-no timedrift autoit
+no autoit
 shutdown_command = shutdown -h now
 reboot_command = shutdown -r now
 status_test_command = echo $?
@@ -342,6 +341,16 @@ variants:
 file_transfer_port = 22
 mem_chk_cmd = dmidecode -t 17 | awk -F: '/Size/ {print $2}'
 cpu_chk_cmd = grep -c processor /proc/cpuinfo
+timedrift:
+extra_params +=  -no-kvm-pit-reinjection
+time_command = date +'TIME: %a %m/%d/%Y %H:%M:%S.%N'
+time_filter_re = (?:TIME: \w\w\w )(.{19})(?:\.\d\d)
+time_format = %m/%d/%Y %H:%M:%S
+guest_load_command = dd if=/dev/urandom of=/dev/null
+guest_load_instances = 2
+guest_load_stop_command = killall -9 dd
+host_load_command = bzip2 -c --best /dev/urandom  /dev/null
+host_load_instances = 8
 
 variants:
 - Fedora:
@@ -717,6 +726,7 @@ variants:
 stress_boot:
 alive_test_cmd = systeminfo
 timedrift:
+extra_params +=  -rtc-td-hack
 time_command = echo TIME: %date% %time%
 time_filter_re = (?=TIME: \w\w\w ).{19}(?=\.\d\d)
 time_format = %m/%d/%Y %H:%M:%S

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 07:09 AM, Andi Kleen wrote:

Joerg Roedelj...@8bytes.org  writes:
   

Sidenote: I really think we should come to a conclusion about the
   concept. KVM integration into perf is very useful feature to
  analyze virtualization workloads.
 

Agreed. I especially would like to see instruction/branch tracing
working this way.  This would a lot of the benefits of a simulator on
a real CPU.
   


If you're profiling a single guest it makes more sense to do this from 
inside the guest - you can profile userspace as well as the kernel.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Andi Kleen
 If you're profiling a single guest it makes more sense to do this from 
 inside the guest - you can profile userspace as well as the kernel.

I'm interested in debugging the guest without guest cooperation.

In many cases qemu's new gdb stub works for that, but in some cases
I would prefer instruction/branch traces over standard gdb style
debugging.

I used to use that very successfully with simulators in the past
for some hard bugs.

-Andi
-- 
a...@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM-AUTOTEST PATCH] KVM test: timedrift: open load sessions before taking initial time

2010-03-24 Thread Michael Goldish
Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/tests/timedrift.py |   18 ++
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/client/tests/kvm/tests/timedrift.py 
b/client/tests/kvm/tests/timedrift.py
index 194f09c..9cb7489 100644
--- a/client/tests/kvm/tests/timedrift.py
+++ b/client/tests/kvm/tests/timedrift.py
@@ -82,15 +82,8 @@ def run_timedrift(test, params, env):
 # Set the VM's CPU affinity
 prev_affinity = set_cpu_affinity(vm.get_pid(), cpu_mask)
 
-# Get time before load
-# (ht stands for host time, gt stands for guest time)
-(ht0, gt0) = kvm_test_utils.get_time(session,
- time_command,
- time_filter_re,
- time_format)
-
 try:
-# Run some load on the guest
+# Open shell sessions with the guest
 logging.info(Starting load on guest...)
 for i in range(guest_load_instances):
 load_session = vm.remote_login()
@@ -99,6 +92,15 @@ def run_timedrift(test, params, env):
 load_session.set_output_prefix((guest load %d)  % i)
 load_session.set_output_func(logging.debug)
 guest_load_sessions.append(load_session)
+
+# Get time before load
+# (ht stands for host time, gt stands for guest time)
+(ht0, gt0) = kvm_test_utils.get_time(session,
+ time_command,
+ time_filter_re,
+ time_format)
+
+# Run some load on the guest
 for load_session in guest_load_sessions:
 load_session.sendline(guest_load_command)
 
-- 
1.5.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 09:38 AM, Andi Kleen wrote:

If you're profiling a single guest it makes more sense to do this from
inside the guest - you can profile userspace as well as the kernel.
 

I'm interested in debugging the guest without guest cooperation.

In many cases qemu's new gdb stub works for that, but in some cases
I would prefer instruction/branch traces over standard gdb style
debugging.
   


Isn't gdb supposed to be able to use branch traces?  It makes sense to 
expose them via the gdb stub then.  Not to say an external tool doesn't 
make sense.



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: document KVM_REQ_PENDING_TIMER usage

2010-03-24 Thread Avi Kivity

On 03/23/2010 07:15 PM, Marcelo Tosatti wrote:

Document that KVM_REQ_PENDING_TIMER is implicitly used during guest
entry.
   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] KVM MMU: check reserved bits only if CR4.PSE=1 or CR4.PAE=1

2010-03-24 Thread Avi Kivity

On 03/19/2010 11:58 AM, Xiao Guangrong wrote:

- Check reserved bits only if CR4.PAE=1 or CR4.PSE=1 when guest #PF occurs
- Fix a typo in reset_rsvds_bits_mask()

   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Andi Kleen
Avi Kivity a...@redhat.com writes:

 On 03/24/2010 09:38 AM, Andi Kleen wrote:
 If you're profiling a single guest it makes more sense to do this from
 inside the guest - you can profile userspace as well as the kernel.
  
 I'm interested in debugging the guest without guest cooperation.

 In many cases qemu's new gdb stub works for that, but in some cases
 I would prefer instruction/branch traces over standard gdb style
 debugging.


 Isn't gdb supposed to be able to use branch traces? 

AFAIK not. The ptrace interface is only used by idb I believe.
I might be wrong on that.

Not sure if there is even a remote protocol command for 
branch traces either.

There's a concept of tracepoints in the protocol, but it 
doesn't quite match at.

 It makes sense to
 expose them via the gdb stub then.  Not to say an external tool
 doesn't make sense.

Ok that would work for me too. As long as I can set start/stop
triggers and pipe the log somewhere it's fine for me.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Bail out when VCPU_CREATE fails

2010-03-24 Thread Avi Kivity

On 03/22/2010 12:49 PM, Alexander Graf wrote:

When we fail to create a VCPU we have no way to tell our callers that something
failed. So the caller happily uses a completely broken state.

This code should become deprecated in the process of converting qemu-kvm to
qemu anyways, so let's not care about remdeling it but just bailing out when
something breaks. Also give the user a hint on why the VCPU_CREATE might have
failed.

This fixes a segmentation fault with -smp  VCPU_MAX in the host kernel.

   


Applied, thanks.  Note, kvm reports the number of supported vcpus using 
KVM_CAP_NR_VCPUS, so we can fail in vl.c.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Bail out when VCPU_CREATE fails

2010-03-24 Thread Alexander Graf

On 24.03.2010, at 10:32, Avi Kivity wrote:

 On 03/22/2010 12:49 PM, Alexander Graf wrote:
 When we fail to create a VCPU we have no way to tell our callers that 
 something
 failed. So the caller happily uses a completely broken state.
 
 This code should become deprecated in the process of converting qemu-kvm to
 qemu anyways, so let's not care about remdeling it but just bailing out when
 something breaks. Also give the user a hint on why the VCPU_CREATE might have
 failed.
 
 This fixes a segmentation fault with -smp  VCPU_MAX in the host kernel.
 
   
 
 Applied, thanks.  Note, kvm reports the number of supported vcpus using 
 KVM_CAP_NR_VCPUS, so we can fail in vl.c.

Sounds like the right fix to do for qemu.git. As soon as SMP support is in 
there.


Alex--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 0/6] misc qemu-kvm cleanups

2010-03-24 Thread Avi Kivity

On 03/23/2010 06:53 PM, Marcelo Tosatti wrote:

See individual patches for details.


   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 0/6] misc uq/master updates (v2)

2010-03-24 Thread Avi Kivity

On 03/23/2010 06:37 PM, Marcelo Tosatti wrote:

See individual patches for details.


   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] qemu-kvm: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Amos Kong
When input some invialid words in QMP port, qemu outputs this error message:
parse error: invalid keyword `%s'
This patch makes qemu output the content.

Signed-off-by: Amos Kong ak...@redhat.com
---
 json-parser.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/json-parser.c b/json-parser.c
index 579928f..98a82af 100644
--- a/json-parser.c
+++ b/json-parser.c
@@ -12,6 +12,7 @@
  */
 
 #include stdbool.h
+#include stdarg.h
 
 #include qemu-common.h
 #include qstring.h
@@ -93,7 +94,11 @@ static int token_is_escape(QObject *obj, const char *value)
  */
 static void parse_error(JSONParserContext *ctxt, QObject *token, const char 
*msg, ...)
 {
-fprintf(stderr, parse error: %s\n, msg);
+va_list ap;
+va_start(ap, msg);
+fprintf(stderr, parse error:);
+vfprintf(stderr, msg, ap);
+fprintf(stderr, \n);
 }
 
 /**
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Completing big real mode emulation

2010-03-24 Thread Sheng Yang
On Saturday 20 March 2010 23:00:49 Alexander Graf wrote:
 Am 20.03.2010 um 15:02 schrieb Mohammed Gamal m.gamal...@gmail.com:
  On Sat, Mar 20, 2010 at 3:18 PM, Avi Kivity a...@redhat.com wrote:
  On 03/20/2010 10:55 AM, Alexander Graf wrote:
  I'd say that a GSoC project would rather focus on making a guest
  OS work
  than working on generic big real mode. Having Windows 98 support
  is way more
  visible to the users. And hopefully more fun to implement too,
  as it's a
  visible goal :-).
 
  Big real mode allows you to boot various OSes, such as that old
  Ubuntu/SuSE boot loader which triggered the whole thing.
 
  I thought legacy Windows uses it too?
 
  IIRC even current Windows (last I checked was XP, but it's probably
  true for
  newer) invokes big real mode inadvertently.  All it takes is not to
  clear fs
  and gs while switching to real mode.  It works because the real
  mode code
  never uses gs and fs (i.e. while we are technically in big real
  mode, the
  guest never relies on this), and because there are enough hacks in
  vmx.c to
  make it work (restoring fs and gs after the switch back).  IIRC
  there are
  other cases of invalid guest state that we hack into place during
  mode
  switches.
 
  Either way - then we should make the goal of the project to
  support those
  old boot loaders. IMHO it should contain visibility. Doing
  theoretical stuff
  is just less fun for all parties. Or does that stuff work already?
 
  Mostly those old guests aged beyond usefulness.  They are still
  broken, but
  nobody installs new images.  Old images installed via workarounds
  work.
 
  Goals for this task could include:
 
   - get those older guests working
   - get emulate_invalid_guest_state=1 to work on all supported guests
   - switch to emulate_invalid_guest_state=1 as the default
   - drop the code supporting emulate_invalid_guest_state=0 eventually
 
  To this end I guess the next logical step is to compile a list of
  guests that are currently not working/work with hacks only, and get
  them working. Here are some suggestions:
  - MINIX 3.1.6 (developers have been recently filing bug reports
  because of boot failures)
  - Win XP with emulation enabled
  - FreeDOS with memory extenders
 
  Any other guests you'd like to see on this list?
 
 I remember old openSUSE iso bootloaders had issues. I think it was
 around 10.3, but might have been earlier.
 
At least 10u2 installer has trouble. I had spent some time on it, finally 
found it's due to ISOLINUX.

The basic issue is it assume that SS selector/base is unchanged when 
enter/exit protect mode. At that time, I've cooked a hack workaround for it, 
but didn't think it's proper to upstream.

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qemu-kvm: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Aurelien Jarno
Hi,

On Wed, Mar 24, 2010 at 06:00:53PM +0800, Amos Kong wrote:
 When input some invialid words in QMP port, qemu outputs this error message:
 parse error: invalid keyword `%s'
 This patch makes qemu output the content.

Is this patch for QEMU or KVM? If it is for QEMU, you should put the
QEMU mailing list in Cc:. If it is for KVM, I don't have commit access
there.

 Signed-off-by: Amos Kong ak...@redhat.com
 ---
  json-parser.c |7 ++-
  1 files changed, 6 insertions(+), 1 deletions(-)
 
 diff --git a/json-parser.c b/json-parser.c
 index 579928f..98a82af 100644
 --- a/json-parser.c
 +++ b/json-parser.c
 @@ -12,6 +12,7 @@
   */
  
  #include stdbool.h
 +#include stdarg.h
  
  #include qemu-common.h
  #include qstring.h
 @@ -93,7 +94,11 @@ static int token_is_escape(QObject *obj, const char *value)
   */
  static void parse_error(JSONParserContext *ctxt, QObject *token, const char 
 *msg, ...)
  {
 -fprintf(stderr, parse error: %s\n, msg);
 +va_list ap;
 +va_start(ap, msg);
 +fprintf(stderr, parse error:);
 +vfprintf(stderr, msg, ap);
 +fprintf(stderr, \n);
  }
  
  /**
 -- 
 1.6.3.3
 
 

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 06:57:47AM +0200, Avi Kivity wrote:
 On 03/23/2010 08:21 PM, Joerg Roedel wrote:
 This enumeration is a very small and non-intrusive feature. Making it
 aware of namespaces is easy too.


 It's easier (and safer and all the other boring bits) not to do it at  
 all in the kernel.

For the KVM stack is doesn't matter where it is implemented. It is as
easy in qemu or libvirt as in the kernel. I also don't see big risks. On
the perf side and for its users it is a lot easier to have this in the
kernel.
I for example always use plain qemu when running kvm guests and never
used libvirt. The only central entity I have here is the kvm kernel
modules. I don't want to start using it only to be able to use perf kvm.

 Who would be the consumer of such notifications? A 'perf kvm list' can
 live without I guess. If we need them later we can still add them.

 System-wide monitoring needs to work equally well for guests started  
 before or after the monitor.

Could be easily done using notifier chains already in the kernel.
Probably implemented with much less than 100 lines of additional code.

 Even disregarding that, if you introduce  an API, people will start
 using it and complaining if it's incomplete.

There is nothing wrong with that. We only need to define what this API
should be used for to prevent rank growth. It could be an
instrumentation-only API for example.

 My statement was not limited to enumeration, I should have been more
 clear about that. The guest filesystem access-channel is another
 affected part. The 'perf kvm top' command will access the guest
 filesystem regularly and going over qemu would be more overhead here.


 Why?  Also, the real cost would be accessing the filesystem, not copying  
 data over qemu.

When measuring cache-misses any additional (and in this case
unnecessary) copy-overhead result in less appropriate results.

 Providing this in the KVM module directly also has the benefit that it
 would work out-of-the-box with different userspaces too.  Or do we want
 to limit 'perf kvm' to the libvirt-qemu-kvm software stack?

 Other userspaces can also provide this functionality, like they have to  
 provide disk, network, and display emulation.  The kernel is not a huge  
 library.

This has nothing to do with a library. It is about entity and resource
management which is what os kernels are about. The virtual machine is
the entity (similar to a process) and we want to add additional access
channels and names to it.

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Paolo Bonzini

On 03/22/2010 08:13 AM, Avi Kivity wrote:


(btw, why are you interested in desktop-on-desktop?  one use case is
developers, which don't really need fancy GUIs; a second is people who
test out distributions, but that doesn't seem to be a huge population;
and a third is people running Windows for some application that doesn't
run on Linux - hopefully a small catergory as well.


This third category is pretty well served by virt-manager.  It has its 
quirks and shortcomings, but at least it exists.


Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 01:59 PM, Joerg Roedel wrote:

On Wed, Mar 24, 2010 at 06:57:47AM +0200, Avi Kivity wrote:
   

On 03/23/2010 08:21 PM, Joerg Roedel wrote:
 

This enumeration is a very small and non-intrusive feature. Making it
aware of namespaces is easy too.

   

It's easier (and safer and all the other boring bits) not to do it at
all in the kernel.
 

For the KVM stack is doesn't matter where it is implemented. It is as
easy in qemu or libvirt as in the kernel. I also don't see big risks. On
the perf side and for its users it is a lot easier to have this in the
kernel.
I for example always use plain qemu when running kvm guests and never
used libvirt. The only central entity I have here is the kvm kernel
modules. I don't want to start using it only to be able to use perf kvm.
   


You can always provide the kernel and module paths as command line 
parameters.  It just won't be transparently usable, but if you're using 
qemu from the command line, presumably you can live with that.



Who would be the consumer of such notifications? A 'perf kvm list' can
live without I guess. If we need them later we can still add them.
   

System-wide monitoring needs to work equally well for guests started
before or after the monitor.
 

Could be easily done using notifier chains already in the kernel.
Probably implemented with much less than 100 lines of additional code.
   


And a userspace interface for that.


Even disregarding that, if you introduce  an API, people will start
using it and complaining if it's incomplete.
 

There is nothing wrong with that. We only need to define what this API
should be used for to prevent rank growth. It could be an
instrumentation-only API for example.
   


If we make an API, I'd like it to be generally useful.

It's a total headache.  For example, we'd need security module hooks to 
determine access permissions.  So far we managed to avoid that since kvm 
doesn't allow you to access any information beyond what you provided it 
directly.




My statement was not limited to enumeration, I should have been more
clear about that. The guest filesystem access-channel is another
affected part. The 'perf kvm top' command will access the guest
filesystem regularly and going over qemu would be more overhead here.

   

Why?  Also, the real cost would be accessing the filesystem, not copying
data over qemu.
 

When measuring cache-misses any additional (and in this case
unnecessary) copy-overhead result in less appropriate results.
   


Copying the objects is a one time cost.  If you run perf for more than a 
second or two, it would fetch and cache all of the data.  It's really 
the same problem with non-guest profiling, only magnified a bit.



Providing this in the KVM module directly also has the benefit that it
would work out-of-the-box with different userspaces too.  Or do we want
to limit 'perf kvm' to the libvirt-qemu-kvm software stack?
   

Other userspaces can also provide this functionality, like they have to
provide disk, network, and display emulation.  The kernel is not a huge
library.
 

This has nothing to do with a library. It is about entity and resource
management which is what os kernels are about. The virtual machine is
the entity (similar to a process) and we want to add additional access
channels and names to it.
   


kvm.ko has only a small subset of the information that is used to define 
a guest.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: SIGSEGV with -smp 17+, and error handling around...

2010-03-24 Thread Avi Kivity

On 03/17/2010 10:12 PM, Michael Tokarev wrote:

When run with -smp 17 or greather, kvm
fails like this:

$ kvm -smp 17
kvm_create_vcpu: Invalid argument
kvm_setup_mce FAILED: Invalid argument
KVM_SET_LAPIC failed
Segmentation fault
$ _

In qemu-kvm.c, the kvm_create_vcpu() routine
(which is used in a vcpu thread to set up
vcpu) is declared as void, i.e, no error
return.  And the code that calls it blindly
assumes that it will never fail...

But the first error message above is from kernel,
which - apparently - refuses to create 17th vCPU.
Hence we've a vcpu thread which is empty/dummy and
not even fully initialized... so it fails later
in the game.

This all looks quite... raw, not polished ;)

Can we somehow handle the (several possible) errors
in that (and other) places, and how we ever can act
on them?  Abort?  Warn the user and reduce the number
of vcpus accordingly (seems wrong, esp. if it were
some first vcpus or in the middle which failed)...
   


A patch from Alex fixing this was just committed.  I'll apply it to the 
stable branch as well.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm crashes with Assertion ... failed.

2010-03-24 Thread Avi Kivity

On 03/17/2010 11:14 PM, André Weidemann wrote:

qemu-system-x86_64 -cpu core2duo -vga cirrus -boot order=ndc -vnc
192.168.3.42:2 -k de -smp 4,cores=4 -drive
file=/vmware/Windows7Test_600G.img,if=ide,index=0,cache=writeback -m
1024 -net nic,model=e1000,macaddr=DE:AD:BE:EF:12:3A -net
tap,script=/usr/local/bin/qemu-ifup  -monitor pty -name
Windows7test,process=Windows7test -drive
file=/dev/storage/Windows7test,if=ide,index=1,cache=none,aio=native


Andre,

Can you try qemu-kvm-0.12.3 ?



I did the following:
git clone git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git 
qemu-kvm-2010-03-17

cd qemu-kvm-2010-03-17
git checkout -b test qemu-kvm-0.12.3
./configure
make -j6  make install

I started the VM again exactly as I did the last time and it crashed 
again with the same error message.
qemu-system-x86_64: 
/usr/local/src/qemu-kvm-2010-03-17/hw/ide/internal.h:507: 
bmdma_active_if: Assertion `bmdma-unit != (uint8_t)-1' failed.




Does this happen with a guest installed on kvm, or just with the guest 
that (guessing from the name) was imported from vmware?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PCI device passthrough / memory mapping issue

2010-03-24 Thread Avi Kivity

On 03/19/2010 12:46 AM, Fede wrote:

I'm currently working to enable vga passthrough in kvm.
   



assigned_dev_iomem_map: e_phys=1000 r_virt=0x7fa64af0e000 type=0
len=0200 region_num=3
kvm_register_phys_mem:580 memory: gpa: 1000, size: 200, uaddr:
7fa64af0e000, slot: 7,flags: 0
create_userspace_phys_mem: File exists
assigned_dev_iomem_map: Error: create new mapping failed


This worked a month ago. But after some git updates there's a problem.

When the real device regions are mapped from real virtual memory to
guest physical addresses in kvm, it overlaps region 3 with the guest
physical memory assigned to kernel space (0x1000 to 0x1020)

I've been trying to look for the answer to this question: Why is gpa
0x1000 chosen and not any other free memory space?
   


The BIOS generally assigns addresses, then the OS updates them if it 
wants to.  Is the crash before or after the OS has started loading?


If before, suggest you add some printfs to seabios to explain its decisions.


It seems that this addresses are being chosen here (for example, for region 3):
assigned_dev_pci_read_config: (4.0): address=001c val=0x len=4
assigned_dev_pci_write_config: (4.0): address=001c val=0x len=4
assigned_dev_pci_read_config: (4.0): address=001c val=0xfe00 len=4
   


The above sequence is how the bios determines the BAR size. (0x200 
in this case).



assigned_dev_pci_write_config: (4.0): address=001c val=0x len=4
   


Now it clears the mess from the previous step.


assigned_dev_pci_read_config: (4.0): address=001c val=0x len=4
assigned_dev_pci_write_config: (4.0): address=001c val=0x1000 len=4
   


Here it assigns a new address.  It's clearly wrong.  A log in seabios 
will explain this.



What does this mean? Why PCI BARs are being written and read?



Why the
values that are written differs from the ones that are read after?
   


BARs are not RAM cells.  the least significant address bits are 
hardwired to zero, that's how the BIOS detects the BAR size (and 
required alignment).



The last write is the gpa that is used.

Is this a bug? I can't find the source of this gpa addresses. I need
to change them.
   


Looks like a bug in seabios.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv6 0/4] qemu-kvm: vhost net port

2010-03-24 Thread Avi Kivity

On 03/17/2010 03:04 PM, Michael S. Tsirkin wrote:

This is port of vhost v6 patch set I posted previously to qemu-kvm, for
those that want to get good performance out of it :) This patchset needs
to be applied when qemu.git one gets merged, this includes irqchip
support.

   


Ping me when this happens please.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM-AUTOTEST PATCH] KVM test: increase default timeout for autotest.sleeptest

2010-03-24 Thread Michael Goldish
Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/tests_base.cfg.sample |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/client/tests/kvm/tests_base.cfg.sample 
b/client/tests/kvm/tests_base.cfg.sample
index 249f1b4..b8288fc 100644
--- a/client/tests/kvm/tests_base.cfg.sample
+++ b/client/tests/kvm/tests_base.cfg.sample
@@ -111,7 +111,7 @@ variants:
 variants:
 - sleeptest:
 test_name = sleeptest
-test_timeout = 30
+test_timeout = 120
 test_control_file = sleeptest.control
 - dbench:
 test_name = dbench
-- 
1.5.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 02:50 PM, Joerg Roedel wrote:



You can always provide the kernel and module paths as command line
parameters.  It just won't be transparently usable, but if you're using
qemu from the command line, presumably you can live with that.
 

I don't want the tool for myself only. A typical perf user expects that
it works transparent.
   


A typical kvm user uses libvirt, so we can integrate it with that.


Could be easily done using notifier chains already in the kernel.
Probably implemented with much less than 100 lines of additional code.
   

And a userspace interface for that.
 

Not necessarily. The perf event is configured to measure systemwide kvm
by userspace. The kernel side of perf takes care that it stays
system-wide even with added vm instances. So in this case the consumer
for the notifier would be the perf kernel part. No userspace interface
required.
   


Someone needs to know about the new guest to fetch its symbols.  Or do 
you want that part in the kernel too?



If we make an API, I'd like it to be generally useful.
 

Thats hard to do at this point since we don't know what people will use
it for. We should keep it simple in the beginning and add new features
as they are requested and make sense in this context.
   


IMO this use case is to rare to warrant its own API, especially as there 
are alternatives.



It's a total headache.  For example, we'd need security module hooks to
determine access permissions.  So far we managed to avoid that since kvm
doesn't allow you to access any information beyond what you provided it
directly.
 

Depends on how it is designed. A filesystem approach was already
mentioned. We could create /sys/kvm/ for example to expose information
about virtual machines to userspace. This would not require any new
security hooks.
   


Who would set the security context on those files?  Plus, we need cgroup 
support so you can't see one container's guests from an unrelated container.



Copying the objects is a one time cost.  If you run perf for more than a
second or two, it would fetch and cache all of the data.  It's really
the same problem with non-guest profiling, only magnified a bit.
 

I don't think we can cache filesystem data of a running guest on the
host. It is too hard to keep such a cache coherent.
   


I don't see any choice.  The guest can change its symbols at any time 
(say by kexec), without any notification.



Other userspaces can also provide this functionality, like they have to
provide disk, network, and display emulation.  The kernel is not a huge
library.
 

If two userspaces run in parallel what is the single instance where perf
can get a list of guests from?
   


I don't know.  Surely that's solvable though.


kvm.ko has only a small subset of the information that is used to define
a guest.
 

The subset is not small. It contains all guest vcpus, the complete
interrupt routing hardware emulation and manages event the guests
memory.
   


It doesn't contain most of the mmio and pio address space.  Integration 
with qemu would allow perf to tell us that the guest is hitting the 
interrupt status register of a virtio-blk device in pci slot 5 (the 
information is already available through the kvm_mmio trace event, but 
only qemu can decode it).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 03:05:02PM +0200, Avi Kivity wrote:
 On 03/24/2010 02:50 PM, Joerg Roedel wrote:

 I don't want the tool for myself only. A typical perf user expects that
 it works transparent.

 A typical kvm user uses libvirt, so we can integrate it with that.

Someone who uses libvirt and virt-manager by default is probably not
interested in this feature at the same level a kvm developer is. And
developers tend not to use libvirt for low-level kvm development.  A
number of developers have stated in this thread already that they would
appreciate a solution for guest enumeration that would not involve
libvirt.

 Someone needs to know about the new guest to fetch its symbols.  Or do  
 you want that part in the kernel too?

The samples will be tagged with the guest-name (and some additional
information perf needs). Perf userspace can access the symbols then
through /sys/kvm/guest0/fs/...

 Depends on how it is designed. A filesystem approach was already
 mentioned. We could create /sys/kvm/ for example to expose information
 about virtual machines to userspace. This would not require any new
 security hooks.

 Who would set the security context on those files?

An approach like: The files are owned and only readable by the same
user that started the vm. might be a good start. So a user can measure
its own guests and root can measure all of them.

 Plus, we need cgroup  support so you can't see one container's guests
 from an unrelated container.

cgroup support is an issue but we can solve that too. Its in general
still less complex than going through the whole libvirt-qemu-kvm stack.

 Integration with qemu would allow perf to tell us that the guest is
 hitting the interrupt status register of a virtio-blk device in pci
 slot 5 (the information is already available through the kvm_mmio
 trace event, but  only qemu can decode it).

Yeah that would be interesting information. But it is more related to
tracing than to pmu measurements.
The information which you mentioned above are probably better
captured by an extension of trace-events to userspace.

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Alexander Graf
Avi Kivity wrote:
 On 03/24/2010 02:50 PM, Joerg Roedel wrote:

 You can always provide the kernel and module paths as command line
 parameters.  It just won't be transparently usable, but if you're using
 qemu from the command line, presumably you can live with that.
  
 I don't want the tool for myself only. A typical perf user expects that
 it works transparent.


 A typical kvm user uses libvirt, so we can integrate it with that.

 Could be easily done using notifier chains already in the kernel.
 Probably implemented with much less than 100 lines of additional code.

 And a userspace interface for that.
  
 Not necessarily. The perf event is configured to measure systemwide kvm
 by userspace. The kernel side of perf takes care that it stays
 system-wide even with added vm instances. So in this case the consumer
 for the notifier would be the perf kernel part. No userspace interface
 required.


 Someone needs to know about the new guest to fetch its symbols.  Or do
 you want that part in the kernel too?


How about we add a virtio guest file system access device? The guest
would then expose its own file system using that device.

On the host side this would simply be a -virtioguestfs
unix:/tmp/guest.fs and you'd get a unix socket that gives you full
access to the guest file system by using commands. I envision something
like:

SEND: GET /proc/version
RECV: Linux version 2.6.27.37-0.1-default (ge...@buildhost) (gcc version
4.3.2 [gcc-4_3-branch revision 141291] (SUSE Linux) ) #1 SMP 2009-10-15
14:56:58 +0200

Now all we need is integration in perf to enumerate virtual machines
based on libvirt. If you want to run qemu-kvm directly, just go with
--guestfs=/tmp/guest.fs and perf could fetch all required information
automatically.

This should solve all issues while staying 100% in user space, right?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 03:46 PM, Joerg Roedel wrote:

On Wed, Mar 24, 2010 at 03:05:02PM +0200, Avi Kivity wrote:
   

On 03/24/2010 02:50 PM, Joerg Roedel wrote:
 
   

I don't want the tool for myself only. A typical perf user expects that
it works transparent.
   

A typical kvm user uses libvirt, so we can integrate it with that.
 

Someone who uses libvirt and virt-manager by default is probably not
interested in this feature at the same level a kvm developer is. And
developers tend not to use libvirt for low-level kvm development.  A
number of developers have stated in this thread already that they would
appreciate a solution for guest enumeration that would not involve
libvirt.
   


So would I.  But when I weigh the benefit of truly transparent 
system-wide perf integration for users who don't use libvirt but do use 
perf, versus the cost of transforming kvm from a single-process API to a 
system-wide API with all the complications that I've listed, it comes 
out in favour of not adding the API.


Those few users can probably script something to cover their needs.


Someone needs to know about the new guest to fetch its symbols.  Or do
you want that part in the kernel too?
 

The samples will be tagged with the guest-name (and some additional
information perf needs). Perf userspace can access the symbols then
through /sys/kvm/guest0/fs/...
   


I take that as a yes?  So we need a virtio-serial client in the kernel 
(which might be exploitable by a malicious guest if buggy) and a 
fs-over-virtio-serial client in the kernel (also exploitable).



Depends on how it is designed. A filesystem approach was already
mentioned. We could create /sys/kvm/ for example to expose information
about virtual machines to userspace. This would not require any new
security hooks.
   

Who would set the security context on those files?
 

An approach like: The files are owned and only readable by the same
user that started the vm. might be a good start. So a user can measure
its own guests and root can measure all of them.
   


That's not how sVirt works.  sVirt isolates a user's VMs from each 
other, so if a guest breaks into qemu it can't break into other guests 
owned by the same user.


The users who need this API (!libvirt and perf) probably don't care 
about sVirt, but a new API must not break it.



Plus, we need cgroup  support so you can't see one container's guests
from an unrelated container.
 

cgroup support is an issue but we can solve that too. Its in general
still less complex than going through the whole libvirt-qemu-kvm stack.
   


It's a tradeoff.  IMO, going through qemu is the better way, and also 
provides more information.



Integration with qemu would allow perf to tell us that the guest is
hitting the interrupt status register of a virtio-blk device in pci
slot 5 (the information is already available through the kvm_mmio
trace event, but  only qemu can decode it).
 

Yeah that would be interesting information. But it is more related to
tracing than to pmu measurements.
The information which you mentioned above are probably better
captured by an extension of trace-events to userspace.
   


It's all related.  You start with perf, see a problem with mmio, call up 
a histogram of mmio or interrupts or whatever, then zoom in on the 
misbehaving device.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 03:53 PM, Alexander Graf wrote:



Someone needs to know about the new guest to fetch its symbols.  Or do
you want that part in the kernel too?
 


How about we add a virtio guest file system access device? The guest
would then expose its own file system using that device.

On the host side this would simply be a -virtioguestfs
unix:/tmp/guest.fs and you'd get a unix socket that gives you full
access to the guest file system by using commands. I envision something
like:
   


The idea is to use a dedicated channel over virtio-serial.  If the 
channel is present the file server can serve files over it.



SEND: GET /proc/version
RECV: Linux version 2.6.27.37-0.1-default (ge...@buildhost) (gcc version
4.3.2 [gcc-4_3-branch revision 141291] (SUSE Linux) ) #1 SMP 2009-10-15
14:56:58 +0200

Now all we need is integration in perf to enumerate virtual machines
based on libvirt. If you want to run qemu-kvm directly, just go with
--guestfs=/tmp/guest.fs and perf could fetch all required information
automatically.

This should solve all issues while staying 100% in user space, right?
   


Yeah, needs a fuse filesystem to populate the host namespace (kind of 
sshfs over virtio-serial).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qemu-kvm: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Luiz Capitulino
On Wed, 24 Mar 2010 11:29:40 +0100
Aurelien Jarno aurel...@aurel32.net wrote:

 Hi,
 
 On Wed, Mar 24, 2010 at 06:00:53PM +0800, Amos Kong wrote:
  When input some invialid words in QMP port, qemu outputs this error message:
  parse error: invalid keyword `%s'
  This patch makes qemu output the content.
 
 Is this patch for QEMU or KVM? If it is for QEMU, you should put the
 QEMU mailing list in Cc:. If it is for KVM, I don't have commit access
 there.

 It's for QEMU and looks good to me.

 Amos, can you resend there please? Just to take the right route..

 
  Signed-off-by: Amos Kong ak...@redhat.com
  ---
   json-parser.c |7 ++-
   1 files changed, 6 insertions(+), 1 deletions(-)
  
  diff --git a/json-parser.c b/json-parser.c
  index 579928f..98a82af 100644
  --- a/json-parser.c
  +++ b/json-parser.c
  @@ -12,6 +12,7 @@
*/
   
   #include stdbool.h
  +#include stdarg.h
   
   #include qemu-common.h
   #include qstring.h
  @@ -93,7 +94,11 @@ static int token_is_escape(QObject *obj, const char 
  *value)
*/
   static void parse_error(JSONParserContext *ctxt, QObject *token, const 
  char *msg, ...)
   {
  -fprintf(stderr, parse error: %s\n, msg);
  +va_list ap;
  +va_start(ap, msg);
  +fprintf(stderr, parse error:);
  +vfprintf(stderr, msg, ap);
  +fprintf(stderr, \n);
   }
   
   /**
  -- 
  1.6.3.3
  
  
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH] pci passthrough: zap option rom scanning.

2010-03-24 Thread Alexander Graf
Marcelo Tosatti wrote:
 On Wed, Jan 20, 2010 at 11:58:48AM +0100, Gerd Hoffmann wrote:
   
 Nowdays (qemu 0.12) seabios loads option roms from pci rom bars.  So
 there is no need any more to scan for option roms and have qemu load
 them.  Zap the code.

 Signed-off-by: Gerd Hoffmann kra...@redhat.com
 

 Applied, thanks.
   

Without this patch (0.12.3) I get the following error message when
trying to pass 2 functions of an ixgbe adapter to the guest:

falla:/abuild/agraf/qemu-kvm/:[0]# qemu-kvm -pcidevice host=07:00.0
-pcidevice host=07:00.1 -nographic -append console=ttyS0 -kernel
/boot/vmlinuz.x -initrd /boot/initrd.x
device: 07:00.0: driver=pci-assign host=07:00.0
device: 07:00.1: driver=pci-assign host=07:00.1
rom: requested regions overlap (rom 07:00.1. free=0xac00,
addr=0x)
rom loading failed


The same code works with qemu-kvm.git. Cherry picking this commit
(51c0dad5ce383be94ca7c46e491ada17cc9ec416) also makes it work in
0.12-stable.


Thus I'd incline we also take this patch into 0.12-stable.


Alex


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Alexander Graf
Avi Kivity wrote:
 On 03/24/2010 03:53 PM, Alexander Graf wrote:

 Someone needs to know about the new guest to fetch its symbols.  Or do
 you want that part in the kernel too?
  

 How about we add a virtio guest file system access device? The guest
 would then expose its own file system using that device.

 On the host side this would simply be a -virtioguestfs
 unix:/tmp/guest.fs and you'd get a unix socket that gives you full
 access to the guest file system by using commands. I envision something
 like:


 The idea is to use a dedicated channel over virtio-serial.  If the
 channel is present the file server can serve files over it.

The file server being a kernel module inside the guest? We want to be
able to serve things as early and hassle free as possible, so in this
case I agree with Ingo that a kernel module is superior.


 SEND: GET /proc/version
 RECV: Linux version 2.6.27.37-0.1-default (ge...@buildhost) (gcc version
 4.3.2 [gcc-4_3-branch revision 141291] (SUSE Linux) ) #1 SMP 2009-10-15
 14:56:58 +0200

 Now all we need is integration in perf to enumerate virtual machines
 based on libvirt. If you want to run qemu-kvm directly, just go with
 --guestfs=/tmp/guest.fs and perf could fetch all required information
 automatically.

 This should solve all issues while staying 100% in user space, right?


 Yeah, needs a fuse filesystem to populate the host namespace (kind of
 sshfs over virtio-serial).

I don't see why we need a fuse filesystem. We can of course create one
later on. But for now all you need is a user connecting to that socket.


Alex


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH 2/4] KVM test: Add TSC into guest test

2010-03-24 Thread Lucas Meneghel Rodrigues
On Wed, Mar 24, 2010 at 2:59 AM, Jason Wang jasow...@redhat.com wrote:
 Lucas Meneghel Rodrigues wrote:
 On Mon, Mar 22, 2010 at 4:45 AM, Jason Wang jasow...@redhat.com wrote:

 TSC is used to check the whether the TSC of processors are
 synchronized which is useful for testing virtual TSC.


 The only thing that needs to be corrected here is that this test needs
 -smp  1 to work (actually, even numbers work better), so I'll make
 it not available with -smp 1. Thanks for the patch, I am going to put
 it upstream soon!


 I agree, and maybe we'd better also use smp  1 in the test of
 monotonic_time ?

Monotonic time has not this restriction, as far as I know... Unless I
am very mistaken

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  client/tests/kvm/autotest_control/tsc.control |   13 +
  client/tests/kvm/tests_base.cfg.sample        |    3 +++
  2 files changed, 16 insertions(+), 0 deletions(-)
  create mode 100644 client/tests/kvm/autotest_control/tsc.control

 diff --git a/client/tests/kvm/autotest_control/tsc.control 
 b/client/tests/kvm/autotest_control/tsc.control
 new file mode 100644
 index 000..0c1c65a
 --- /dev/null
 +++ b/client/tests/kvm/autotest_control/tsc.control
 @@ -0,0 +1,13 @@
 +NAME = 'Check TSC'
 +AUTHOR = 'Michael Davidson m...@google.com'
 +TIME = 'MEDIUM'
 +TEST_CLASS = 'Kernel'
 +TEST_CATEGORY = 'Functional'
 +TEST_TYPE = 'client'
 +DOC = 
 +checktsc is a user space program that checks TSC synchronization
 +between pairs of CPUs on an SMP system using a technique borrowed
 +from the Linux 2.6.18 kernel.
 +
 +
 +job.run_test('tsc')
 diff --git a/client/tests/kvm/tests_base.cfg.sample 
 b/client/tests/kvm/tests_base.cfg.sample
 index 2af6a05..861759e 100644
 --- a/client/tests/kvm/tests_base.cfg.sample
 +++ b/client/tests/kvm/tests_base.cfg.sample
 @@ -136,6 +136,9 @@ variants:
             - monotonic_time:
                 test_name = monotonic_time
                 test_control_file = monotonic_time.control
 +            - tsc:
 +                test_name = tsc
 +                test_control_file = tsc.control

     - linux_s3:     install setup unattended_install
         type = linux_s3

 ___
 Autotest mailing list
 autot...@test.kernel.org
 http://test.kernel.org/cgi-bin/mailman/listinfo/autotest







 ___
 Autotest mailing list
 autot...@test.kernel.org
 http://test.kernel.org/cgi-bin/mailman/listinfo/autotest




-- 
Lucas
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] KVM test: Enable timedrift for Linux guests

2010-03-24 Thread Lucas Meneghel Rodrigues
On Wed, Mar 24, 2010 at 3:25 AM, Jason Wang jasow...@redhat.com wrote:
 We should also test timedrift for Linux guests especially for guest
 with pvclock. So this patch enable the timedrift for linux guests.

 Changes from v1:
 - Correct the wrong name for guest load cleaning
 - Use -no-kvm-pit-reinjection for linux guests and -rtc-td-hack for
 windows guests.

Here I have a little doubt if the test is useful only while running
under these command line options (since we indeed have timedrift
failures without them). Maybe it makes more sense to create variants
with these options, to ensure that this command line will also be
tested. Michael?

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  client/tests/kvm/tests_base.cfg.sample |   14 --
  1 files changed, 12 insertions(+), 2 deletions(-)

 diff --git a/client/tests/kvm/tests_base.cfg.sample 
 b/client/tests/kvm/tests_base.cfg.sample
 index 8cc83a9..29a2430 100644
 --- a/client/tests/kvm/tests_base.cfg.sample
 +++ b/client/tests/kvm/tests_base.cfg.sample
 @@ -147,7 +147,6 @@ variants:
         type = linux_s3

     - timedrift:    install setup unattended_install
 -        extra_params +=  -rtc-td-hack
         variants:
             - with_load:
                 type = timedrift
 @@ -330,7 +329,7 @@ variants:
  variants:
     # Linux section
     - @Linux:
 -        no timedrift autoit
 +        no autoit
         shutdown_command = shutdown -h now
         reboot_command = shutdown -r now
         status_test_command = echo $?
 @@ -342,6 +341,16 @@ variants:
         file_transfer_port = 22
         mem_chk_cmd = dmidecode -t 17 | awk -F: '/Size/ {print $2}'
         cpu_chk_cmd = grep -c processor /proc/cpuinfo
 +        timedrift:
 +            extra_params +=  -no-kvm-pit-reinjection
 +            time_command = date +'TIME: %a %m/%d/%Y %H:%M:%S.%N'
 +            time_filter_re = (?:TIME: \w\w\w )(.{19})(?:\.\d\d)
 +            time_format = %m/%d/%Y %H:%M:%S
 +            guest_load_command = dd if=/dev/urandom of=/dev/null
 +            guest_load_instances = 2
 +            guest_load_stop_command = killall -9 dd
 +            host_load_command = bzip2 -c --best /dev/urandom  /dev/null
 +            host_load_instances = 8

         variants:
             - Fedora:
 @@ -717,6 +726,7 @@ variants:
         stress_boot:
             alive_test_cmd = systeminfo
         timedrift:
 +            extra_params +=  -rtc-td-hack
             time_command = echo TIME: %date% %time%
             time_filter_re = (?=TIME: \w\w\w ).{19}(?=\.\d\d)
             time_format = %m/%d/%Y %H:%M:%S

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html




-- 
Lucas
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 03:57:39PM +0200, Avi Kivity wrote:
 On 03/24/2010 03:46 PM, Joerg Roedel wrote:

 Someone who uses libvirt and virt-manager by default is probably not
 interested in this feature at the same level a kvm developer is. And
 developers tend not to use libvirt for low-level kvm development.  A
 number of developers have stated in this thread already that they would
 appreciate a solution for guest enumeration that would not involve
 libvirt.

 So would I.

Great.

 But when I weigh the benefit of truly transparent  system-wide perf
 integration for users who don't use libvirt but do use  perf, versus
 the cost of transforming kvm from a single-process API to a
 system-wide API with all the complications that I've listed, it comes
 out in favour of not adding the API.

Its not a transformation, its an extension. The current per-process
/dev/kvm stays mostly untouched. Its all about having something like
this:

$ cd /sys/kvm/guest0
$ ls -l
-r 1 root root 0 2009-08-17 12:05 name
dr-x-- 1 root root 0 2009-08-17 12:05 fs
$ cat name
guest0
$ # ...

The fs/ directory is used as the mount point for the guest root fs.

 The samples will be tagged with the guest-name (and some additional
 information perf needs). Perf userspace can access the symbols then
 through /sys/kvm/guest0/fs/...

 I take that as a yes?  So we need a virtio-serial client in the kernel  
 (which might be exploitable by a malicious guest if buggy) and a  
 fs-over-virtio-serial client in the kernel (also exploitable).

What I meant was: perf-kernel puts the guest-name into every sample and
perf-userspace accesses /sys/kvm/guest_name/fs/ later to resolve the
symbols. I leave the question of how the guest-fs is exposed to the host
out of this discussion. We should discuss this seperatly.


 An approach like: The files are owned and only readable by the same
 user that started the vm. might be a good start. So a user can measure
 its own guests and root can measure all of them.

 That's not how sVirt works.  sVirt isolates a user's VMs from each  
 other, so if a guest breaks into qemu it can't break into other guests  
 owned by the same user.

If a vm breaks into qemu it can access the host file system which is the
bigger problem. In this case there is no isolation anymore. From that
context it can even kill other VMs of the same user independent of a
hypothetical /sys/kvm/.

 Yeah that would be interesting information. But it is more related to
 tracing than to pmu measurements.  The information which you
 mentioned above are probably better captured by an extension of
 trace-events to userspace.

 It's all related.  You start with perf, see a problem with mmio, call up  
 a histogram of mmio or interrupts or whatever, then zoom in on the  
 misbehaving device.

Yes, but its different from the implementation point-of-view. For the
user it surely all plays together.

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 04:24 PM, Alexander Graf wrote:

Avi Kivity wrote:
   

On 03/24/2010 03:53 PM, Alexander Graf wrote:
 
   

Someone needs to know about the new guest to fetch its symbols.  Or do
you want that part in the kernel too?

 

How about we add a virtio guest file system access device? The guest
would then expose its own file system using that device.

On the host side this would simply be a -virtioguestfs
unix:/tmp/guest.fs and you'd get a unix socket that gives you full
access to the guest file system by using commands. I envision something
like:

   

The idea is to use a dedicated channel over virtio-serial.  If the
channel is present the file server can serve files over it.
 

The file server being a kernel module inside the guest? We want to be
able to serve things as early and hassle free as possible, so in this
case I agree with Ingo that a kernel module is superior.
   


No, just a daemon.  If it's important enough we can get distributions to 
package it by default, and then it will be hassle free.  If early 
enough is also so important, we can get it to start up on initrd.  If 
it's really critical, we can patch grub to serve the files as well.



SEND: GET /proc/version
RECV: Linux version 2.6.27.37-0.1-default (ge...@buildhost) (gcc version
4.3.2 [gcc-4_3-branch revision 141291] (SUSE Linux) ) #1 SMP 2009-10-15
14:56:58 +0200

Now all we need is integration in perf to enumerate virtual machines
based on libvirt. If you want to run qemu-kvm directly, just go with
--guestfs=/tmp/guest.fs and perf could fetch all required information
automatically.

This should solve all issues while staying 100% in user space, right?

   

Yeah, needs a fuse filesystem to populate the host namespace (kind of
sshfs over virtio-serial).
 

I don't see why we need a fuse filesystem. We can of course create one
later on. But for now all you need is a user connecting to that socket.
   


If the perf app knows the protocol, no problem.  But leave perf with 
pure filesystem access and hide the details in fuse.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 05:01 PM, Joerg Roedel wrote:



But when I weigh the benefit of truly transparent  system-wide perf
integration for users who don't use libvirt but do use  perf, versus
the cost of transforming kvm from a single-process API to a
system-wide API with all the complications that I've listed, it comes
out in favour of not adding the API.
 

Its not a transformation, its an extension. The current per-process
/dev/kvm stays mostly untouched. Its all about having something like
this:

$ cd /sys/kvm/guest0
$ ls -l
-r 1 root root 0 2009-08-17 12:05 name
dr-x-- 1 root root 0 2009-08-17 12:05 fs
$ cat name
guest0
$ # ...

The fs/ directory is used as the mount point for the guest root fs.
   


The problem is /sys/kvm, not /sys/kvm/fs.


The samples will be tagged with the guest-name (and some additional
information perf needs). Perf userspace can access the symbols then
through /sys/kvm/guest0/fs/...
   

I take that as a yes?  So we need a virtio-serial client in the kernel
(which might be exploitable by a malicious guest if buggy) and a
fs-over-virtio-serial client in the kernel (also exploitable).
 

What I meant was: perf-kernel puts the guest-name into every sample and
perf-userspace accesses /sys/kvm/guest_name/fs/ later to resolve the
symbols. I leave the question of how the guest-fs is exposed to the host
out of this discussion. We should discuss this seperatly.
   


How I see it: perf-kernel puts the guest pid into every sample, and 
perf-userspace uses that to resolve to a mountpoint served by fuse, or 
to a unix domain socket that serves the files.



An approach like: The files are owned and only readable by the same
user that started the vm. might be a good start. So a user can measure
its own guests and root can measure all of them.
   

That's not how sVirt works.  sVirt isolates a user's VMs from each
other, so if a guest breaks into qemu it can't break into other guests
owned by the same user.
 

If a vm breaks into qemu it can access the host file system which is the
bigger problem. In this case there is no isolation anymore. From that
context it can even kill other VMs of the same user independent of a
hypothetical /sys/kvm/.
   


It cannot.  sVirt labels the disk image and other files qemu needs with 
the appropriate label, and everything else is off limits.  Even if you 
run the guest as root, it won't have access to other files.



Yeah that would be interesting information. But it is more related to
tracing than to pmu measurements.  The information which you
mentioned above are probably better captured by an extension of
trace-events to userspace.
   

It's all related.  You start with perf, see a problem with mmio, call up
a histogram of mmio or interrupts or whatever, then zoom in on the
misbehaving device.
 

Yes, but its different from the implementation point-of-view. For the
user it surely all plays together.
   


We need qemu to cooperate for mmio tracing, and we can cooperate with 
qemu for symbol resolution.  If it prevents adding another kernel API, 
that's a win from my POV.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM Test report, kernel 647e9e... qemu 7811d4...

2010-03-24 Thread Avi Kivity

On 03/08/2010 08:40 AM, Hao, Xudong wrote:

Hi, all,
This is KVM biweekly test result against kvm.git: 
647e9ec3b543ea04d49a7323dfe0070682ed8465 and qemu-kvm.git: 
7811d4e8ec057d25db68f900be1f09a142faca49.

In the last month, KVM testing was blocked by one qemu-img issue and two qemu 
build issues. Now the qemu build issue and qemu-img bug all get fixed.

2. ltp diotest running time is 2.54 times than before
https://sourceforge.net/tracker/?func=detailaid=2723366group_id=180599atid=893831
   


Can you check the performance of this with cache=writeback?

The common on the report referring to cache=writethrough is incorrect (I 
think?)


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Daniel P. Berrange
On Wed, Mar 24, 2010 at 04:01:37PM +0100, Joerg Roedel wrote:
  An approach like: The files are owned and only readable by the same
  user that started the vm. might be a good start. So a user can measure
  its own guests and root can measure all of them.
 
  That's not how sVirt works.  sVirt isolates a user's VMs from each  
  other, so if a guest breaks into qemu it can't break into other guests  
  owned by the same user.
 
 If a vm breaks into qemu it can access the host file system which is the
 bigger problem. In this case there is no isolation anymore. From that
 context it can even kill other VMs of the same user independent of a
 hypothetical /sys/kvm/.

No it can't. With sVirt every single VM has a custom security label and
the policy only allows it access to disks / files with a matching label,
and prevents it attacking any other VMs or processes on the host. THis
confines the scope of any exploit in QEMU to those resources the admin
has explicitly assigned to the guest.

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


OEM version of Windows in kvm (SLIC Co)

2010-03-24 Thread Michael Tokarev
After a series of tries, I finally made my OEM copy
of Windows 7 to work in KVM using the original
registration key.

In short, one need SCIC table from the BIOS on the
original hardware, -- it should be in the BIOS in
the virtual machine too.  And second part is that
other tables in our virtual BIOS need to have the
same OEM identification as the SLIC table - namely
FACP (in case of kmv), and also XSDT and RSDT if
present.

The way to insert custom acpi table is using -acpitable
parameter.  But unfortunately kvm does not provide a way
to insert whole table this way, together with the header --
instead, it expects the header on the command line.  It
is possible to extract the header into printable form for
the command line, and cut it from the slic.bin, but I used
different way: I modified hw/acpi.c load whole thing from
the given file.

After doing that, and giving -acpitable file=slic.bin (with
file= parameter being my quick-n-dirty addition) to kvm,
I were able to see the correct SLIC table in /sys/firmware/
acpi/tables/SLIC in linux.  But windows refused to activate.

So the next step was to modify seabios OEM string which it
placed to other tables.  For that, in src/acpi.c I just added

  memcpy(h-oem_id, _ASUS_Notebook, 14);

to build_header() routine (yes it is a notebook from Asus with
licensed version of windows7 professional).

And after that step windows happily told me that I'm now using
genuine copy of it and the activation is completed.

As far as I can see I have right to run my licensed copy this
way, on the same notebook it were purchased with.

So.. the real question is: while this quick-n-dirty proof of
concept works, it's not the way to go.  What can be done to
simplify the whole thing and to do it the Right Way?

At least having a way to accept complete acpi table (with header
and checksum and everything) is - IMHO - a good thing.

But I'm not sure about the OEM ID strings in other tables in seabios, --
it is quite ugly, both in implementation (how to tell bios to change
its identify?) and in the whole fact of it, since we're lying to the
(virtual) machine.  But from another point of view, it should be a
good debugging tool, since some software behaves differently given
one or another strings in the BIOS...

Comments, anyone?

Thanks!

/mjt
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 03:26:53PM +, Daniel P. Berrange wrote:
 On Wed, Mar 24, 2010 at 04:01:37PM +0100, Joerg Roedel wrote:
   An approach like: The files are owned and only readable by the same
   user that started the vm. might be a good start. So a user can measure
   its own guests and root can measure all of them.
  
   That's not how sVirt works.  sVirt isolates a user's VMs from each  
   other, so if a guest breaks into qemu it can't break into other guests  
   owned by the same user.
  
  If a vm breaks into qemu it can access the host file system which is the
  bigger problem. In this case there is no isolation anymore. From that
  context it can even kill other VMs of the same user independent of a
  hypothetical /sys/kvm/.
 
 No it can't. With sVirt every single VM has a custom security label and
 the policy only allows it access to disks / files with a matching label,
 and prevents it attacking any other VMs or processes on the host. THis
 confines the scope of any exploit in QEMU to those resources the admin
 has explicitly assigned to the guest.

Even better. So a guest which breaks out can't even access its own
/sys/kvm/ directory. Perfect, it doesn't need that access anyway.

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 05:37 PM, Joerg Roedel wrote:



No it can't. With sVirt every single VM has a custom security label and
the policy only allows it access to disks / files with a matching label,
and prevents it attacking any other VMs or processes on the host. THis
confines the scope of any exploit in QEMU to those resources the admin
has explicitly assigned to the guest.
 

Even better. So a guest which breaks out can't even access its own
/sys/kvm/ directory. Perfect, it doesn't need that access anyway.

   


But what security label does that directory have?  How can we make sure 
that whoever needs access to those files, gets them?


Automatically created objects don't work well with that model.  They're 
simply missing information.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: document KVM_REQ_PENDING_TIMER usage

2010-03-24 Thread Marcelo Tosatti
On Wed, Mar 24, 2010 at 09:10:54AM +0800, 王箫 wrote:
 Thanks for pointing that, but is it possible that explicitly check the
 pending timer with kvm_cpu_has_pending_timer() in vcpu_enter_guest()? There
 seems some function duplication between KVM_REQ_PENDING_TIMER and
 ktimer-pending.

Right. KVM_REQ_PENDING_TIMER is per vcpu, and its one bit, while there
might be multiple ktimers per vcpu (its a shortcut between hrtimers and
guest entry, bypassing irq injection).

Yes, there is some duplication.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 05:12:55PM +0200, Avi Kivity wrote:
 On 03/24/2010 05:01 PM, Joerg Roedel wrote:
 $ cd /sys/kvm/guest0
 $ ls -l
 -r 1 root root 0 2009-08-17 12:05 name
 dr-x-- 1 root root 0 2009-08-17 12:05 fs
 $ cat name
 guest0
 $ # ...

 The fs/ directory is used as the mount point for the guest root fs.

 The problem is /sys/kvm, not /sys/kvm/fs.

I am not tied to /sys/kvm. We could also use /proc/pid/kvm/ for
example. This would keep anything in the process space (except for the
global list of VMs which we should have anyway).

 What I meant was: perf-kernel puts the guest-name into every sample and
 perf-userspace accesses /sys/kvm/guest_name/fs/ later to resolve the
 symbols. I leave the question of how the guest-fs is exposed to the host
 out of this discussion. We should discuss this seperatly.

 How I see it: perf-kernel puts the guest pid into every sample, and  
 perf-userspace uses that to resolve to a mountpoint served by fuse, or  
 to a unix domain socket that serves the files.

We need a bit more information than just the qemu-pid, but yes, this
would also work out.

 If a vm breaks into qemu it can access the host file system which is the
 bigger problem. In this case there is no isolation anymore. From that
 context it can even kill other VMs of the same user independent of a
 hypothetical /sys/kvm/.

 It cannot.  sVirt labels the disk image and other files qemu needs with  
 the appropriate label, and everything else is off limits.  Even if you  
 run the guest as root, it won't have access to other files.

See my reply to Daniel's email.

 Yes, but its different from the implementation point-of-view. For the
 user it surely all plays together.

 We need qemu to cooperate for mmio tracing, and we can cooperate with  
 qemu for symbol resolution.  If it prevents adding another kernel API,  
 that's a win from my POV.

Thats true. Probably qemu can inject this information in the
kvm-trace-events stream.

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 05:46 PM, Joerg Roedel wrote:

On Wed, Mar 24, 2010 at 05:12:55PM +0200, Avi Kivity wrote:
   

On 03/24/2010 05:01 PM, Joerg Roedel wrote:
 

$ cd /sys/kvm/guest0
$ ls -l
-r 1 root root 0 2009-08-17 12:05 name
dr-x-- 1 root root 0 2009-08-17 12:05 fs
$ cat name
guest0
$ # ...

The fs/ directory is used as the mount point for the guest root fs.
   

The problem is /sys/kvm, not /sys/kvm/fs.
 

I am not tied to /sys/kvm. We could also use /proc/pid/kvm/ for
example. This would keep anything in the process space (except for the
global list of VMs which we should have anyway).
   


How about ~/.qemu/guests/$pid?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 05:43:31PM +0200, Avi Kivity wrote:
 On 03/24/2010 05:37 PM, Joerg Roedel wrote:
 Even better. So a guest which breaks out can't even access its own
 /sys/kvm/ directory. Perfect, it doesn't need that access anyway.

 But what security label does that directory have?  How can we make sure  
 that whoever needs access to those files, gets them?

 Automatically created objects don't work well with that model.  They're  
 simply missing information.

If we go the /proc/pid/kvm way then the directory should probably
inherit the label from /proc/pid/?
Same could be applied to /sys/kvm/guest/ if we decide for it. The VM is
still bound to a single process with a /proc/pid after all.

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 05:50 PM, Joerg Roedel wrote:

On Wed, Mar 24, 2010 at 05:43:31PM +0200, Avi Kivity wrote:
   

On 03/24/2010 05:37 PM, Joerg Roedel wrote:
 

Even better. So a guest which breaks out can't even access its own
/sys/kvm/ directory. Perfect, it doesn't need that access anyway.
   

But what security label does that directory have?  How can we make sure
that whoever needs access to those files, gets them?

Automatically created objects don't work well with that model.  They're
simply missing information.
 

If we go the /proc/pid/kvm way then the directory should probably
inherit the label from /proc/pid/?
   


That's a security policy.  The security people like their policies 
outside the kernel.


For example, they may want a label that allows a trace context to read 
the data, and also qemu itself for introspection.



Same could be applied to /sys/kvm/guest/ if we decide for it. The VM is
still bound to a single process with a /proc/pid  after all.
   


Ditto.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


MSI-X not enabled for ixgbe device-passthrough

2010-03-24 Thread Hannes Reinecke
Hi all,

I'm trying to setup a system with device-passthrough for
an ixgbe NIC.
The device itself seems to work, but it isn't using MSI-X.
So some more advanced features like DCB offloading etc
won't work.

lspci output of the device:
07:00.0 Ethernet controller: Intel Corporation 82599EB 10-Gigabit Network 
Connection (rev 01)
Subsystem: Intel Corporation Ethernet Server Adapter X520-2
Flags: bus master, fast devsel, latency 0, IRQ 24
Memory at f5c8 (64-bit, prefetchable) [size=512K]
I/O ports at 5000 [size=32]
Memory at f5c7 (64-bit, prefetchable) [size=16K]
[virtual] Expansion ROM at e710 [disabled] [size=512K]
Capabilities: [40] Power Management version 3
Capabilities: [50] Message Signalled Interrupts: Mask+ 64bit+ Count=1/1 
Enable-
Capabilities: [70] MSI-X: Enable+ Mask- TabSize=64
Capabilities: [a0] Express Endpoint, MSI 00
Capabilities: [100] Advanced Error Reporting
UESta:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- 
RxOF- MalfTLP- ECRC- UnsupReq+ ACSVoil-
UEMsk:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- 
RxOF- MalfTLP- ECRC- UnsupReq+ ACSVoil-
UESvrt: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- 
RxOF- MalfTLP- ECRC- UnsupReq- ACSVoil-
CESta:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr+
CESta:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
AERCap: First Error Pointer: 00, GenCap+ CGenEn- ChkCap+ ChkEn-
Capabilities: [140] Device Serial Number 40-9e-3c-ff-ff-21-1b-00
Capabilities: [150] Alternative Routing-ID Interpretation (ARI)
ARICap: MFVC- ACS-, Next Function: 1
ARICtl: MFVC- ACS-, Function Group: 0
Capabilities: [160] Single Root I/O Virtualization (SR-IOV)
IOVCap: Migration-, Interrupt Message Number: 000
IOVCtl: Enable- Migration- Interrupt- MSE- ARIHierarchy+
IOVSta: Migration-
Initial VFs: 64, Total VFs: 64, Number of VFs: 64, Function 
Dependency Link: 00
VF offset: 128, stride: 2, Device ID: 10ed
Supported Page Size: 0553, System Page Size: 0001
VF Migration: offset: , BIR: 1
Kernel driver in use: ixgbe
Kernel modules: ixgbe

please let me know if you need more information.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries  Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH] qemu: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Markus Armbruster
Amos Kong ak...@redhat.com writes:

 When input some invialid word 'unknowcmd' through QMP port, qemu outputs this
 error message:
 parse error: invalid keyword `%s'
 This patch makes qemu output the content of invalid keyword, like:
 parse error: invalid keyword `unknowcmd'

 Signed-off-by: Amos Kong ak...@redhat.com

Looks good to me.

Hint: it's best to put a version in the subject when you respin, like
[PATCH v2] ...
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Peter Zijlstra
On Wed, 2010-03-24 at 16:01 +0100, Joerg Roedel wrote:

 What I meant was: perf-kernel puts the guest-name into every sample and
 perf-userspace accesses /sys/kvm/guest_name/fs/ later to resolve the
 symbols. I leave the question of how the guest-fs is exposed to the host
 out of this discussion. We should discuss this seperatly.

I'd much prefer a pid like suggested later, keeps the samples smaller.

But that said, we need guest kernel events like mmap and context
switches too, otherwise we simply can't make sense of guest userspace
addresses, we need to know the guest address space layout.

So aside from a filesystem content, we first need mmap and context
switch events to find the files we need to access.

And while I appreciate all the security talk, its basically pointless
anyway, the host can access it anyway, everybody agrees on that, but
still you're arguing the case..
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 05:59 PM, Joerg Roedel wrote:


   

I am not tied to /sys/kvm. We could also use /proc/pid/kvm/ for
example. This would keep anything in the process space (except for the
global list of VMs which we should have anyway).

   

How about ~/.qemu/guests/$pid?
 

That makes it hard for perf to find it and even harder to get a list of
all VMs.


Looks trivial to find a guest, less so with enumerating (still doable).


  With /proc/pid/kvm/guest we could symlink all guest
directories to /proc/kvm/ and perf reads the list from there. Also perf
can easily derive the directory for a guest from its pid.
Last but not least its kernel-created and thus independent from the
userspace part being used.
   


Doesn't perf already has a dependency on naming conventions for finding 
debug information?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 06:03 PM, Peter Zijlstra wrote:

On Wed, 2010-03-24 at 16:01 +0100, Joerg Roedel wrote:

   

What I meant was: perf-kernel puts the guest-name into every sample and
perf-userspace accesses /sys/kvm/guest_name/fs/ later to resolve the
symbols. I leave the question of how the guest-fs is exposed to the host
out of this discussion. We should discuss this seperatly.
 

I'd much prefer a pid like suggested later, keeps the samples smaller.

But that said, we need guest kernel events like mmap and context
switches too, otherwise we simply can't make sense of guest userspace
addresses, we need to know the guest address space layout.
   


The kernel knows some of the address space layout, qemu knows all of it.


So aside from a filesystem content, we first need mmap and context
switch events to find the files we need to access.
   


This only works for the guest kernel, we don't know anything about guest 
processes [1].



And while I appreciate all the security talk, its basically pointless
anyway, the host can access it anyway, everybody agrees on that, but
still you're arguing the case..
   


root can access anything, but we're not talking about root.  The idea is 
to protect against a guest that has exploited its qemu and is now 
attacking the host and its fellow guests.   uid protection is no good 
since we want to isolate the guest from host processes belonging to the 
same uid and from other guests running under the same uid.


[1] We can find out guest pids if we teach the kernel what to 
dereference, i.e. gs:offset1-offset2-offset3.  Of course this varies 
from kernel to kernel, so we need some kind of bytecode that we can run 
in perf nmi context.  Kind of what we need to run an unwinder for 
-fomit-frame-pointer.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 05:52:54PM +0200, Avi Kivity wrote:
 On 03/24/2010 05:50 PM, Joerg Roedel wrote:
 If we go the /proc/pid/kvm way then the directory should probably
 inherit the label from /proc/pid/?

 That's a security policy.  The security people like their policies  
 outside the kernel.

 For example, they may want a label that allows a trace context to read  
 the data, and also qemu itself for introspection.

Hm, I am not a security expert. But is this not only one entity more for
sVirt to handle? I would leave that decision to the sVirt developers.
Does attaching the same label as for the VM resources mean that root
could not access it anymore?

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm crashes with Assertion ... failed.

2010-03-24 Thread André Weidemann

Hi,
On 24.03.2010 13:17, Avi Kivity wrote:

On 03/17/2010 11:14 PM, André Weidemann wrote:

qemu-system-x86_64 -cpu core2duo -vga cirrus -boot order=ndc -vnc
192.168.3.42:2 -k de -smp 4,cores=4 -drive
file=/vmware/Windows7Test_600G.img,if=ide,index=0,cache=writeback -m
1024 -net nic,model=e1000,macaddr=DE:AD:BE:EF:12:3A -net
tap,script=/usr/local/bin/qemu-ifup -monitor pty -name
Windows7test,process=Windows7test -drive
file=/dev/storage/Windows7test,if=ide,index=1,cache=none,aio=native


Andre,

Can you try qemu-kvm-0.12.3 ?



I did the following:
git clone git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git
qemu-kvm-2010-03-17
cd qemu-kvm-2010-03-17
git checkout -b test qemu-kvm-0.12.3
./configure
make -j6  make install

I started the VM again exactly as I did the last time and it crashed
again with the same error message.
qemu-system-x86_64:
/usr/local/src/qemu-kvm-2010-03-17/hw/ide/internal.h:507:
bmdma_active_if: Assertion `bmdma-unit != (uint8_t)-1' failed.



Does this happen with a guest installed on kvm, or just with the guest
that (guessing from the name) was imported from vmware?


I booted the VM via PXE into an Ubuntu Live CD image. I only added the 
Windows disk image, so I could copy the resulting Excel file (from 
iozone) to this disk. The Windows 7 on this disk was installed under kvm 
0.12.3.


Regards
 André
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 06:17 PM, Joerg Roedel wrote:

On Wed, Mar 24, 2010 at 05:52:54PM +0200, Avi Kivity wrote:
   

On 03/24/2010 05:50 PM, Joerg Roedel wrote:
 

If we go the /proc/pid/kvm way then the directory should probably
inherit the label from /proc/pid/?
   

That's a security policy.  The security people like their policies
outside the kernel.

For example, they may want a label that allows a trace context to read
the data, and also qemu itself for introspection.
 

Hm, I am not a security expert.


I'm out of my depth here as well.


But is this not only one entity more for
sVirt to handle? I would leave that decision to the sVirt developers.
Does attaching the same label as for the VM resources mean that root
could not access it anymore?
   


IIUC processes run under a context, and there's a policy somewhere that 
tells you which context can access which label (and with what 
permissions).  There was a server on the Internet once that gave you 
root access and invited you to attack it.  No idea if anyone succeeded 
or not (I got bored after about a minute).


So it depends on the policy.  If you attach the same label, that means 
all files with the same label have the same access permissions.  I think.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 05:03:42PM +0100, Peter Zijlstra wrote:
 On Wed, 2010-03-24 at 16:01 +0100, Joerg Roedel wrote:
 
  What I meant was: perf-kernel puts the guest-name into every sample and
  perf-userspace accesses /sys/kvm/guest_name/fs/ later to resolve the
  symbols. I leave the question of how the guest-fs is exposed to the host
  out of this discussion. We should discuss this seperatly.
 
 I'd much prefer a pid like suggested later, keeps the samples smaller.
 
 But that said, we need guest kernel events like mmap and context
 switches too, otherwise we simply can't make sense of guest userspace
 addresses, we need to know the guest address space layout.

With the filesystem approach all we need is the pid of the guest
process. Then we can access proc/pid/maps of the guest and read out the
address space layout, no?

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm crashes with Assertion ... failed.

2010-03-24 Thread Avi Kivity

On 03/24/2010 06:20 PM, André Weidemann wrote:

Does this happen with a guest installed on kvm, or just with the guest
that (guessing from the name) was imported from vmware?



I booted the VM via PXE into an Ubuntu Live CD image. I only added the 
Windows disk image, so I could copy the resulting Excel file (from 
iozone) to this disk. The Windows 7 on this disk was installed under 
kvm 0.12.3.




What version of Ubuntu?  Can you post a way to reproduce this reliably 
(how you created the disk etc.)


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 06:20:38PM +0200, Avi Kivity wrote:
 On 03/24/2010 06:17 PM, Joerg Roedel wrote:
 But is this not only one entity more for
 sVirt to handle? I would leave that decision to the sVirt developers.
 Does attaching the same label as for the VM resources mean that root
 could not access it anymore?


 IIUC processes run under a context, and there's a policy somewhere that  
 tells you which context can access which label (and with what  
 permissions).  There was a server on the Internet once that gave you  
 root access and invited you to attack it.  No idea if anyone succeeded  
 or not (I got bored after about a minute).

 So it depends on the policy.  If you attach the same label, that means  
 all files with the same label have the same access permissions.  I think.

So if this is true we can introduce a 'trace' label and add all contexts
that should be allowed to trace to it.
But we probably should leave the details to the security experts ;-)

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 06:31 PM, Joerg Roedel wrote:

On Wed, Mar 24, 2010 at 06:20:38PM +0200, Avi Kivity wrote:
   

On 03/24/2010 06:17 PM, Joerg Roedel wrote:
 

But is this not only one entity more for
sVirt to handle? I would leave that decision to the sVirt developers.
Does attaching the same label as for the VM resources mean that root
could not access it anymore?

   

IIUC processes run under a context, and there's a policy somewhere that
tells you which context can access which label (and with what
permissions).  There was a server on the Internet once that gave you
root access and invited you to attack it.  No idea if anyone succeeded
or not (I got bored after about a minute).

So it depends on the policy.  If you attach the same label, that means
all files with the same label have the same access permissions.  I think.
 

So if this is true we can introduce a 'trace' label and add all contexts
that should be allowed to trace to it.
But we probably should leave the details to the security experts ;-)
   


That's just what I want to do.  Leave it in userspace and then they can 
deal with it without telling us about it.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 06:09:30PM +0200, Avi Kivity wrote:
 On 03/24/2010 05:59 PM, Joerg Roedel wrote:


 I am not tied to /sys/kvm. We could also use /proc/pid/kvm/ for
 example. This would keep anything in the process space (except for the
 global list of VMs which we should have anyway).


 How about ~/.qemu/guests/$pid?
  
 That makes it hard for perf to find it and even harder to get a list of
 all VMs.

 Looks trivial to find a guest, less so with enumerating (still doable).

Not so trival and even more likely to break. Even it perf has the pid of
the process and wants to find the directory it has to do:

1. Get the uid of the process
2. Find the username for the uid
3. Use the username to find the home-directory

Steps 2. and 3. need nsswitch and/or pam access to get this information
from whatever source the admin has configured. And depending on what the
source is it may be temporarily unavailable causing nasty timeouts. In
short, there are many weak parts in that chain making it more likely to
break.
A kernel-based approach with /proc/pid/kvm does not have those issues
(and to repeat myself, it is independent from the userspace being used).

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Joerg Roedel
On Wed, Mar 24, 2010 at 06:32:51PM +0200, Avi Kivity wrote:
 On 03/24/2010 06:31 PM, Joerg Roedel wrote:

 That's just what I want to do.  Leave it in userspace and then they can  
 deal with it without telling us about it.

They can't do that with a directory in /proc?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Peter Zijlstra
On Wed, 2010-03-24 at 17:23 +0100, Joerg Roedel wrote:
 On Wed, Mar 24, 2010 at 05:03:42PM +0100, Peter Zijlstra wrote:
  On Wed, 2010-03-24 at 16:01 +0100, Joerg Roedel wrote:
  
   What I meant was: perf-kernel puts the guest-name into every sample and
   perf-userspace accesses /sys/kvm/guest_name/fs/ later to resolve the
   symbols. I leave the question of how the guest-fs is exposed to the host
   out of this discussion. We should discuss this seperatly.
  
  I'd much prefer a pid like suggested later, keeps the samples smaller.
  
  But that said, we need guest kernel events like mmap and context
  switches too, otherwise we simply can't make sense of guest userspace
  addresses, we need to know the guest address space layout.
 
 With the filesystem approach all we need is the pid of the guest
 process. Then we can access proc/pid/maps of the guest and read out the
 address space layout, no?

No, what if it maps new things after you read it? But still getting the
pid of the guest process seems non trivial without guest kernel support.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 06:40 PM, Joerg Roedel wrote:



Looks trivial to find a guest, less so with enumerating (still doable).
 

Not so trival and even more likely to break. Even it perf has the pid of
the process and wants to find the directory it has to do:

1. Get the uid of the process
2. Find the username for the uid
3. Use the username to find the home-directory

Steps 2. and 3. need nsswitch and/or pam access to get this information
from whatever source the admin has configured. And depending on what the
source is it may be temporarily unavailable causing nasty timeouts. In
short, there are many weak parts in that chain making it more likely to
break.
   


It's true.  If the kernel provides something, there are fewer things 
that can break.  But if your system is so broken that you can't resolve 
uids, fix that before running perf.  Must we design perf for that case?


After all, 'ls -l' will break under the same circumstances.  It's hard 
to imagine doing useful work when that doesn't work.



A kernel-based approach with /proc/pid/kvm does not have those issues
(and to repeat myself, it is independent from the userspace being used).
   


It has other issues, which are IMO more problematic.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: allow bit 10 to be cleared in MSR_IA32_MC4_CTL

2010-03-24 Thread Andre Przywara
There is a quirk for AMD K8 CPUs in many Linux kernels (see
arch/x86/kernel/cpu/mcheck/mce.c:__mcheck_cpu_apply_quirks()) that
clears bit 10 in that MCE related MSR. KVM can only cope with all
zeros or all ones, so it will inject a #GP into the guest, which
will let it panic.
So lets add a quirk to the quirk and ignore this single cleared bit.
This fixes -cpu kvm64 on all machines and -cpu host on K8 machines
with some guest Linux kernels.

Signed-off-by: Andre Przywara andre.przyw...@amd.com
---
 arch/x86/kvm/x86.c |8 ++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 097ad3a..a58c634 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -910,9 +910,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
if (msr = MSR_IA32_MC0_CTL 
msr  MSR_IA32_MC0_CTL + 4 * bank_num) {
u32 offset = msr - MSR_IA32_MC0_CTL;
-   /* only 0 or all 1s can be written to IA32_MCi_CTL */
+   /* only 0 or all 1s can be written to IA32_MCi_CTL
+* some Linux kernels though clear bit 10 in bank 4 to
+* workaround a BIOS/GART TBL issue on AMD K8s, ignore
+* this to avoid an uncatched #GP in the guest
+*/
if ((offset  0x3) == 0 
-   data != 0  data != ~(u64)0)
+   data != 0  (data | (1  10)) != ~(u64)0)
return -1;
vcpu-arch.mce_banks[offset] = data;
break;
-- 
1.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 06:45 PM, Joerg Roedel wrote:



That's just what I want to do.  Leave it in userspace and then they can
deal with it without telling us about it.
 

They can't do that with a directory in /proc?

   


I don't know.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 06:47 PM, Avi Kivity wrote:


It's true.  If the kernel provides something, there are fewer things 
that can break.  But if your system is so broken that you can't 
resolve uids, fix that before running perf.  Must we design perf for 
that case?


After all, 'ls -l' will break under the same circumstances.  It's hard 
to imagine doing useful work when that doesn't work.



Also, perf itself will hang if it needs to access a file using autofs or 
nfs, and those are broken.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qemu: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Richard Henderson
On 03/24/2010 08:12 AM, Amos Kong wrote:
 
 When input some invialid word 'unknowcmd' through QMP port, qemu outputs this
 error message:
 parse error: invalid keyword `%s'
 This patch makes qemu output the content of invalid keyword, like:
 parse error: invalid keyword `unknowcmd'
 
 Signed-off-by: Amos Kong ak...@redhat.com

Acked-by: Richard Henderson r...@redhat.com

 ---
  json-parser.c |8 +++-
  1 files changed, 7 insertions(+), 1 deletions(-)
 
 diff --git a/json-parser.c b/json-parser.c
 index 579928f..b55d763 100644
 --- a/json-parser.c
 +++ b/json-parser.c
 @@ -12,6 +12,7 @@
   */
  
  #include stdbool.h
 +#include stdarg.h
  
  #include qemu-common.h
  #include qstring.h
 @@ -93,7 +94,12 @@ static int token_is_escape(QObject *obj, const char *value)
   */
  static void parse_error(JSONParserContext *ctxt, QObject *token, const char 
 *msg, ...)
  {
 -fprintf(stderr, parse error: %s\n, msg);
 +va_list ap;
 +va_start(ap, msg);
 +fprintf(stderr, parse error: );
 +vfprintf(stderr, msg, ap);
 +fprintf(stderr, \n);
 +va_end(ap);
  }
  
  /**

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Arnaldo Carvalho de Melo
Em Wed, Mar 24, 2010 at 06:09:30PM +0200, Avi Kivity escreveu:
 Doesn't perf already has a dependency on naming conventions for finding  
 debug information?

It looks at several places, from most symbol rich (/usr/lib/debug/, aka
-debuginfo packages, where we have full symtabs) to poorest (the
packaged binary, where we may just have a .dynsym).

In an ideal world, it would just get the build-id (a SHA1 cookie that is
in an ELF session inserted in every binary (aka DSOs), kernel module,
kallsyms or vmlinux file) and use that to look first in a local cache
(implemented in perf for a long time already) or in some symbol server.

For instance, for a random perf.data file I collected here in my machine
I have:

[a...@doppio linux-2.6-tip]$ perf buildid-list | grep libpthread
5c68f7afeb33309c78037e374b0deee84dd441f6 /lib64/libpthread-2.10.2.so
[a...@doppio linux-2.6-tip]$

So I don't have to access /lib64/libpthread-2.10.2.so directly, nor some
convention to get a debuginfo in a local file like:

/usr/lib/debug/lib64/libpthread-2.10.2.so.debug

Instead the tools look at:

[a...@doppio linux-2.6-tip]$ l 
~/.debug/.build-id/5c/68f7afeb33309c78037e374b0deee84dd441f6
lrwxrwxrwx 1 acme acme 73 2010-01-06 18:53 
/home/acme/.debug/.build-id/5c/68f7afeb33309c78037e374b0deee84dd441f6 - 
../../lib64/libpthread-2.10.2.so/5c68f7afeb33309c78037e374b0deee84dd441f6*

To find the file for that specific build-id, not the one installed in my
machine (or on the different machine, of a different architecture) that
may be completely unrelated, a new one, or one for a different arch.

- Arnaldo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Avi Kivity

On 03/24/2010 07:47 PM, Arnaldo Carvalho de Melo wrote:

Em Wed, Mar 24, 2010 at 06:09:30PM +0200, Avi Kivity escreveu:
   

Doesn't perf already has a dependency on naming conventions for finding
debug information?
 

It looks at several places, from most symbol rich (/usr/lib/debug/, aka
-debuginfo packages, where we have full symtabs) to poorest (the
packaged binary, where we may just have a .dynsym).

In an ideal world, it would just get the build-id (a SHA1 cookie that is
in an ELF session inserted in every binary (aka DSOs), kernel module,
kallsyms or vmlinux file) and use that to look first in a local cache
(implemented in perf for a long time already) or in some symbol server.

For instance, for a random perf.data file I collected here in my machine
I have:

[a...@doppio linux-2.6-tip]$ perf buildid-list | grep libpthread
5c68f7afeb33309c78037e374b0deee84dd441f6 /lib64/libpthread-2.10.2.so
[a...@doppio linux-2.6-tip]$

So I don't have to access /lib64/libpthread-2.10.2.so directly, nor some
convention to get a debuginfo in a local file like:

/usr/lib/debug/lib64/libpthread-2.10.2.so.debug

Instead the tools look at:

[a...@doppio linux-2.6-tip]$ l 
~/.debug/.build-id/5c/68f7afeb33309c78037e374b0deee84dd441f6
lrwxrwxrwx 1 acme acme 73 2010-01-06 18:53 
/home/acme/.debug/.build-id/5c/68f7afeb33309c78037e374b0deee84dd441f6 -  
../../lib64/libpthread-2.10.2.so/5c68f7afeb33309c78037e374b0deee84dd441f6*

To find the file for that specific build-id, not the one installed in my
machine (or on the different machine, of a different architecture) that
may be completely unrelated, a new one, or one for a different arch.
   


Thanks.  I believe qemu could easily act as a symbol server for this use 
case.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Unify KVM kernel-space and user-space code into a single project

2010-03-24 Thread Arnaldo Carvalho de Melo
Em Wed, Mar 24, 2010 at 08:20:10PM +0200, Avi Kivity escreveu:
 On 03/24/2010 07:47 PM, Arnaldo Carvalho de Melo wrote:
 Em Wed, Mar 24, 2010 at 06:09:30PM +0200, Avi Kivity escreveu:

 Doesn't perf already has a dependency on naming conventions for finding
 debug information?
  
 It looks at several places, from most symbol rich (/usr/lib/debug/, aka
 -debuginfo packages, where we have full symtabs) to poorest (the
 packaged binary, where we may just have a .dynsym).

 In an ideal world, it would just get the build-id (a SHA1 cookie that is
 in an ELF session inserted in every binary (aka DSOs), kernel module,
 kallsyms or vmlinux file) and use that to look first in a local cache
 (implemented in perf for a long time already) or in some symbol server.

 For instance, for a random perf.data file I collected here in my machine
 I have:

 [a...@doppio linux-2.6-tip]$ perf buildid-list | grep libpthread
 5c68f7afeb33309c78037e374b0deee84dd441f6 /lib64/libpthread-2.10.2.so
 [a...@doppio linux-2.6-tip]$

 So I don't have to access /lib64/libpthread-2.10.2.so directly, nor some
 convention to get a debuginfo in a local file like:

 /usr/lib/debug/lib64/libpthread-2.10.2.so.debug

 Instead the tools look at:

 [a...@doppio linux-2.6-tip]$ l 
 ~/.debug/.build-id/5c/68f7afeb33309c78037e374b0deee84dd441f6
 lrwxrwxrwx 1 acme acme 73 2010-01-06 18:53 
 /home/acme/.debug/.build-id/5c/68f7afeb33309c78037e374b0deee84dd441f6 -  
 ../../lib64/libpthread-2.10.2.so/5c68f7afeb33309c78037e374b0deee84dd441f6*

 To find the file for that specific build-id, not the one installed in my
 machine (or on the different machine, of a different architecture) that
 may be completely unrelated, a new one, or one for a different arch.

 Thanks.  I believe qemu could easily act as a symbol server for this use  
 case.

Agreed, but it doesn't even have to :-)

We just need to get the build-id in the PERF_RECORD_MMAP event somehow
and then get this symbol from elsewhere, say the same DVD/RHN
channel/Debian Repository/embedded developer toolkit image not
stripped/whatever.

Or it may already be in the local cache from last week's perf report
session :-)

- Arnaldo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] vhost-blk implementation

2010-03-24 Thread Christoph Hellwig
 Inspired by vhost-net implementation, I did initial prototype 
 of vhost-blk to see if it provides any benefits over QEMU virtio-blk.
 I haven't handled all the error cases, fixed naming conventions etc.,
 but the implementation is stable to play with. I tried not to deviate
 from vhost-net implementation where possible.

Can you also send the qemu side of it?

 with vhost-blk:
 
 
 # time dd if=/dev/vda of=/dev/null bs=128k iflag=direct
 64+0 records in
 64+0 records out
 8388608 bytes (84 GB) copied, 126.135 seconds, 665 MB/s
 
 real2m6.137s
 user0m0.281s
 sys 0m14.725s
 
 without vhost-blk: (virtio)
 ---
 
 # time dd if=/dev/vda of=/dev/null bs=128k iflag=direct
 64+0 records in
 64+0 records out
 8388608 bytes (84 GB) copied, 275.466 seconds, 305 MB/s
 
 real4m35.468s
 user0m0.373s
 sys 0m48.074s

Which caching mode is this?  I assume data=writeback, because otherwise
you'd be doing synchronous I/O directly from the handler.

 +static int do_handle_io(struct file *file, uint32_t type, uint64_t sector,
 + struct iovec *iov, int in)
 +{
 + loff_t pos = sector  8;
 + int ret = 0;
 +
 + if (type  VIRTIO_BLK_T_FLUSH)  {
 + ret = vfs_fsync(file, file-f_path.dentry, 1);
 + } else if (type  VIRTIO_BLK_T_OUT) {
 + ret = vfs_writev(file, iov, in, pos);
 + } else {
 + ret = vfs_readv(file, iov, in, pos);
 + }
 + return ret;

I have to admit I don't understand the vhost architecture at all, but
where do the actual data pointers used by the iovecs reside?
vfs_readv/writev expect both the iovec itself and the buffers
pointed to by it to reside in userspace, so just using kernel buffers
here will break badly on architectures with different user/kernel
mappings.  A lot of this is fixable using simple set_fs  co tricks,
but for direct I/O which uses get_user_pages even that will fail badly.

Also it seems like you're doing all the I/O synchronous here?  For
data=writeback operations that could explain the read speedup
as you're avoiding context switches, but for actual write I/O
which has to get data to disk (either directly from vfs_writev or
later through vfs_fsync) this seems like a really bad idea stealing
a lot of guest time that should happen in the background.


Other than that the code seems quite nice and simple, but one huge
problem is that it'll only support raw images, and thus misses out
on all the nice image formats used in qemu deployments, especially
qcow2.  It's also missing the ioctl magic we're having in various
places, both for controlling host devices like cdroms and SG
passthrough.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] vhost-blk implementation

2010-03-24 Thread Christoph Hellwig
On Tue, Mar 23, 2010 at 12:03:14PM +0200, Avi Kivity wrote:
 I also think it should be done at the bio layer.  File I/O is going to  
 be slower, if we do vhost-blk we should concentrate on maximum  
 performance.  The block layer also exposes more functionality we can use  
 (asynchronous barriers for example).

The block layer is more flexible, but that limits you to only stack
directly ontop of a block device, which is extremly inflexible.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] vhost-blk implementation

2010-03-24 Thread Badari Pulavarty

Christoph Hellwig wrote:
Inspired by vhost-net implementation, I did initial prototype 
of vhost-blk to see if it provides any benefits over QEMU virtio-blk.

I haven't handled all the error cases, fixed naming conventions etc.,
but the implementation is stable to play with. I tried not to deviate
from vhost-net implementation where possible.



Can you also send the qemu side of it?

  

with vhost-blk:


# time dd if=/dev/vda of=/dev/null bs=128k iflag=direct
64+0 records in
64+0 records out
8388608 bytes (84 GB) copied, 126.135 seconds, 665 MB/s

real2m6.137s
user0m0.281s
sys 0m14.725s

without vhost-blk: (virtio)
---

# time dd if=/dev/vda of=/dev/null bs=128k iflag=direct
64+0 records in
64+0 records out
8388608 bytes (84 GB) copied, 275.466 seconds, 305 MB/s

real4m35.468s
user0m0.373s
sys 0m48.074s



Which caching mode is this?  I assume data=writeback, because otherwise
you'd be doing synchronous I/O directly from the handler.
  


Yes. This is with default (writeback) cache model. As mentioned earlier, 
readhead is helping here

and most cases, data would be ready in the pagecache.
  

+static int do_handle_io(struct file *file, uint32_t type, uint64_t sector,
+   struct iovec *iov, int in)
+{
+   loff_t pos = sector  8;
+   int ret = 0;
+
+   if (type  VIRTIO_BLK_T_FLUSH)  {
+   ret = vfs_fsync(file, file-f_path.dentry, 1);
+   } else if (type  VIRTIO_BLK_T_OUT) {
+   ret = vfs_writev(file, iov, in, pos);
+   } else {
+   ret = vfs_readv(file, iov, in, pos);
+   }
+   return ret;



I have to admit I don't understand the vhost architecture at all, but
where do the actual data pointers used by the iovecs reside?
vfs_readv/writev expect both the iovec itself and the buffers
pointed to by it to reside in userspace, so just using kernel buffers
here will break badly on architectures with different user/kernel
mappings.  A lot of this is fixable using simple set_fs  co tricks,
but for direct I/O which uses get_user_pages even that will fail badly.
  
iovecs and buffers are user-space pointers (from the host kernel point 
of view). They are

guest address. So, I don't need to do any set_fs tricks.

Also it seems like you're doing all the I/O synchronous here?  For
data=writeback operations that could explain the read speedup
as you're avoiding context switches, but for actual write I/O
which has to get data to disk (either directly from vfs_writev or
later through vfs_fsync) this seems like a really bad idea stealing
a lot of guest time that should happen in the background.
  
Yes. QEMU virtio-blk is batching up all the writes and handing of the 
work to another
thread. When the writes() are complete, its sending a status completion. 
Since I am
doing everything synchronous (even though its write to pagecache) one 
request at a

time, that explains the slow down. We need to find a way to

1) batch IO writes together
2) hand off to another thread to do the IO, so that vhost-thread can handle
next set of requests
3) update the status on the completion

What do should I do here ? I can create bunch of kernel threads to do 
the IO for me.
Or some how fit and reuse AIO io_submit() mechanism. Whats the best way 
here ?

I hate do duplicate all the code VFS is doing.


Other than that the code seems quite nice and simple, but one huge
problem is that it'll only support raw images, and thus misses out
on all the nice image formats used in qemu deployments, especially
qcow2.  It's also missing the ioctl magic we're having in various
places, both for controlling host devices like cdroms and SG
passthrough.
  
True... unfortunately, I don't understand all of those (qcow2) details 
yet !! I need to read up on those,

to even make a comment :(

Thanks,
Badari


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] vhost-blk implementation

2010-03-24 Thread Badari Pulavarty

Christoph Hellwig wrote:
Inspired by vhost-net implementation, I did initial prototype 
of vhost-blk to see if it provides any benefits over QEMU virtio-blk.

I haven't handled all the error cases, fixed naming conventions etc.,
but the implementation is stable to play with. I tried not to deviate
from vhost-net implementation where possible.



Can you also send the qemu side of it?
  
Its pretty hacky and based it on old patch (vhost-net) from MST for 
simplicity.
I haven't focused on cleaning it up and I will re-base it on MST's 
latest code

once it gets into QEMU.

Thanks,
Badari

---
hw/virtio-blk.c |  199 
1 file changed, 199 insertions(+)

Index: vhost/hw/virtio-blk.c
===
--- vhost.orig/hw/virtio-blk.c  2010-02-25 16:47:04.0 -0500
+++ vhost/hw/virtio-blk.c   2010-03-17 14:07:26.477430740 -0400
@@ -18,6 +18,7 @@
#ifdef __linux__
# include scsi/sg.h
#endif
+#include kvm.h

typedef struct VirtIOBlock
{
@@ -28,8 +29,13 @@
char serial_str[BLOCK_SERIAL_STRLEN + 1];
QEMUBH *bh;
size_t config_size;
+uint8_t vhost_started;
} VirtIOBlock;

+typedef struct BDRVRawState {
+int fd;
+} BDRVRawState;
+
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
{
return (VirtIOBlock *)vdev;
@@ -501,6 +507,198 @@
return 0;
}

+#if 1
+#include linux/vhost.h
+#include sys/ioctl.h
+#include sys/eventfd.h
+#include vhost.h
+
+int vhost_blk_fd;
+
+struct slot_info {
+unsigned long phys_addr;
+unsigned long len;
+unsigned long userspace_addr;
+unsigned flags;
+int logging_count;
+};
+
+extern struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
+
+static int vhost_blk_start(struct VirtIODevice *vdev)
+{
+   target_phys_addr_t s, l, a;
+   int r, num, idx = 0;
+   struct vhost_vring_state state;
+   struct vhost_vring_file file;
+   struct vhost_vring_addr addr;
+   unsigned long long used_phys;
+   void *desc, *avail, *used;
+   int i, n =0;
+   struct VirtQueue *q = virtio_queue(vdev, idx);
+   VirtIOBlock *vb = to_virtio_blk(vdev);
+   struct vhost_memory *mem;
+   BDRVRawState *st = vb-bs-opaque;
+
+   vhost_blk_fd = open(/dev/vhost-blk, O_RDWR);
+   if (vhost_blk_fd  0) {
+   fprintf(stderr, unable to open vhost-blk\n);
+   return -errno;
+   }
+
+   r = ioctl(vhost_blk_fd, VHOST_SET_OWNER, NULL);
+if (r  0) {
+   fprintf(stderr, ioctl VHOST_SET_OWNER failed\n);
+return -errno;
+   }
+
+for (i = 0; i  KVM_MAX_NUM_MEM_REGIONS; ++i) {
+if (!slots[i].len ||
+   (slots[i].flags  KVM_MEM_LOG_DIRTY_PAGES)) {
+  continue;
+}
+++n;
+}
+
+mem = qemu_mallocz(offsetof(struct vhost_memory, regions) +
+   n * sizeof(struct vhost_memory_region));
+if (!mem)
+return -ENOMEM;
+
+mem-nregions = n;
+n = 0;
+for (i = 0; i  KVM_MAX_NUM_MEM_REGIONS; ++i) {
+if (!slots[i].len || (slots[i].flags 
+   KVM_MEM_LOG_DIRTY_PAGES)) {
+continue;
+}
+mem-regions[n].guest_phys_addr = slots[i].phys_addr;
+mem-regions[n].memory_size = slots[i].len;
+mem-regions[n].userspace_addr = slots[i].userspace_addr;
+++n;
+}
+
+r = ioctl(vhost_blk_fd, VHOST_SET_MEM_TABLE, mem);
+if (r  0)
+return -errno;
+
+   state.index = idx;
+   num = state.num = virtio_queue_get_num(vdev, idx);
+   r = ioctl(vhost_blk_fd, VHOST_SET_VRING_NUM, state);
+if (r) {
+   fprintf(stderr, ioctl VHOST_SET_VRING_NUM failed\n);
+return -errno;
+}
+
+   state.num = virtio_queue_last_avail_idx(vdev, idx);
+   r = ioctl(vhost_blk_fd, VHOST_SET_VRING_BASE, state);
+   if (r) {
+   fprintf(stderr, ioctl VHOST_SET_VRING_BASE failed\n);
+return -errno;
+   }
+
+   s = l = sizeof(struct vring_desc) * num;
+   a = virtio_queue_get_desc(vdev, idx);
+   desc = cpu_physical_memory_map(a, l, 0);
+   if (!desc || l != s) {
+r = -ENOMEM;
+goto fail_alloc;
+   }
+   s = l = offsetof(struct vring_avail, ring) +
+sizeof(u_int64_t) * num;
+a = virtio_queue_get_avail(vdev, idx);
+avail = cpu_physical_memory_map(a, l, 0);
+if (!avail || l != s) {
+r = -ENOMEM;
+goto fail_alloc;
+}
+s = l = offsetof(struct vring_used, ring) +
+sizeof(struct vring_used_elem) * num;
+used_phys = a = virtio_queue_get_used(vdev, idx);
+used = 

Re: [Qemu-devel] [PATCH] qemu: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Luiz Capitulino
On Wed, 24 Mar 2010 17:00:14 +0100
Markus Armbruster arm...@redhat.com wrote:

 Amos Kong ak...@redhat.com writes:
 
  When input some invialid word 'unknowcmd' through QMP port, qemu outputs 
  this
  error message:
  parse error: invalid keyword `%s'
  This patch makes qemu output the content of invalid keyword, like:
  parse error: invalid keyword `unknowcmd'
 
  Signed-off-by: Amos Kong ak...@redhat.com
 
 Looks good to me.
 
 Hint: it's best to put a version in the subject when you respin, like
 [PATCH v2] ...

 Yes, and maintainers may miss a patch down a thread (and it's a good
opportunity to fix the subject).
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/21] KVM: PPC: Make DSISR 32 bits wide

2010-03-24 Thread Alexander Graf
DSISR is only defined as 32 bits wide. So let's reflect that in the
structs too.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/include/asm/kvm_book3s.h   |2 +-
 arch/powerpc/include/asm/kvm_host.h |2 +-
 arch/powerpc/kvm/book3s_64_interrupts.S |2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 14d0262..9f5a992 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -84,8 +84,8 @@ struct kvmppc_vcpu_book3s {
u64 hid[6];
u64 gqr[8];
int slb_nr;
+   u32 dsisr;
u64 sdr1;
-   u64 dsisr;
u64 hior;
u64 msr_mask;
u64 vsid_first;
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 119deb4..0ebda67 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -260,7 +260,7 @@ struct kvm_vcpu_arch {
 
u32 last_inst;
 #ifdef CONFIG_PPC64
-   ulong fault_dsisr;
+   u32 fault_dsisr;
 #endif
ulong fault_dear;
ulong fault_esr;
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S 
b/arch/powerpc/kvm/book3s_64_interrupts.S
index c1584d0..faca876 100644
--- a/arch/powerpc/kvm/book3s_64_interrupts.S
+++ b/arch/powerpc/kvm/book3s_64_interrupts.S
@@ -171,7 +171,7 @@ kvmppc_handler_highmem:
std r3, VCPU_PC(r7)
std r4, VCPU_SHADOW_SRR1(r7)
std r5, VCPU_FAULT_DEAR(r7)
-   std r6, VCPU_FAULT_DSISR(r7)
+   stw r6, VCPU_FAULT_DSISR(r7)
 
ld  r5, VCPU_HFLAGS(r7)
rldicl. r5, r5, 0, 63   /* CR = ((r5  1) == 0) */
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/21] KVM: PPC: Split instruction reading out

2010-03-24 Thread Alexander Graf
The current check_ext function reads the instruction and then does
the checking. Let's split the reading out so we can reuse it for
different functions.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s.c |   24 
 1 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 9e0bc47..400ae0a 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -650,26 +650,34 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
kvmppc_recalc_shadow_msr(vcpu);
 }
 
-static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
 {
ulong srr0 = vcpu-arch.pc;
int ret;
 
-   /* Need to do paired single emulation? */
-   if (!(vcpu-arch.hflags  BOOK3S_HFLAG_PAIRED_SINGLE))
-   return EMULATE_DONE;
-
-   /* Read out the instruction */
ret = kvmppc_ld(vcpu, srr0, sizeof(u32), vcpu-arch.last_inst, false);
if (ret == -ENOENT) {
vcpu-arch.msr = kvmppc_set_field(vcpu-arch.msr, 33, 33, 1);
vcpu-arch.msr = kvmppc_set_field(vcpu-arch.msr, 34, 36, 0);
vcpu-arch.msr = kvmppc_set_field(vcpu-arch.msr, 42, 47, 0);
kvmppc_book3s_queue_irqprio(vcpu, 
BOOK3S_INTERRUPT_INST_STORAGE);
-   } else if(ret == EMULATE_DONE) {
+   return EMULATE_AGAIN;
+   }
+
+   return EMULATE_DONE;
+}
+
+static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+
+   /* Need to do paired single emulation? */
+   if (!(vcpu-arch.hflags  BOOK3S_HFLAG_PAIRED_SINGLE))
+   return EMULATE_DONE;
+
+   /* Read out the instruction */
+   if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
/* Need to emulate */
return EMULATE_FAIL;
-   }
 
return EMULATE_AGAIN;
 }
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/21] KVM: PPC: Make build work without CONFIG_VSX/ALTIVEC

2010-03-24 Thread Alexander Graf
The FPU/Altivec/VSX enablement also brought access to some structure
elements that are only defined when the respective config options
are enabled.

Unfortuately I forgot to check for the config options at some places,
so let's do that now.

Unbreaks the build when CONFIG_VSX is not set.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index d6105d9..7912d72 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -608,7 +608,9 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {
struct thread_struct *t = current-thread;
u64 *vcpu_fpr = vcpu-arch.fpr;
+#ifdef CONFIG_VSX
u64 *vcpu_vsx = vcpu-arch.vsr;
+#endif
u64 *thread_fpr = (u64*)t-fpr;
int i;
 
@@ -688,7 +690,9 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, 
unsigned int exit_nr,
 {
struct thread_struct *t = current-thread;
u64 *vcpu_fpr = vcpu-arch.fpr;
+#ifdef CONFIG_VSX
u64 *vcpu_vsx = vcpu-arch.vsr;
+#endif
u64 *thread_fpr = (u64*)t-fpr;
int i;
 
@@ -1219,8 +1223,12 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
 {
int ret;
struct thread_struct ext_bkp;
+#ifdef CONFIG_ALTIVEC
bool save_vec = current-thread.used_vr;
+#endif
+#ifdef CONFIG_VSX
bool save_vsx = current-thread.used_vsr;
+#endif
ulong ext_msr;
 
/* No need to go into the guest when all we do is going out */
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/21] KVM: PPC: Fix dcbz emulation

2010-03-24 Thread Alexander Graf
On most systems we need to emulate dcbz when running 32 bit guests. So
far we've been rather slack, not giving correct DSISR values to the guest.

This patch makes the emulation more accurate, introducing a difference
between page not mapped and write protection fault. While at it, it
also speeds up dcbz emulation by an order of magnitude by using kmap.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s.c|   56 +
 arch/powerpc/kvm/book3s_64_emulate.c |   19 +--
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 7912d72..1a12ef2 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -28,6 +28,7 @@
 #include asm/mmu_context.h
 #include linux/sched.h
 #include linux/vmalloc.h
+#include linux/highmem.h
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
@@ -368,34 +369,29 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
  */
 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 {
-   bool touched = false;
-   hva_t hpage;
+   struct page *hpage;
+   u64 hpage_offset;
u32 *page;
int i;
 
-   hpage = gfn_to_hva(vcpu-kvm, pte-raddr  PAGE_SHIFT);
-   if (kvm_is_error_hva(hpage))
+   hpage = gfn_to_page(vcpu-kvm, pte-raddr  PAGE_SHIFT);
+   if (is_error_page(hpage))
return;
 
-   hpage |= pte-raddr  ~PAGE_MASK;
-   hpage = ~0xFFFULL;
-
-   page = vmalloc(HW_PAGE_SIZE);
-
-   if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE))
-   goto out;
+   hpage_offset = pte-raddr  ~PAGE_MASK;
+   hpage_offset = ~0xFFFULL;
+   hpage_offset /= 4;
 
-   for (i=0; i  HW_PAGE_SIZE / 4; i++)
-   if ((page[i]  0xff0007ff) == INS_DCBZ) {
-   page[i] = 0xfff7; // reserved instruction, so we 
trap
-   touched = true;
-   }
+   get_page(hpage);
+   page = kmap_atomic(hpage, KM_USER0);
 
-   if (touched)
-   copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE);
+   /* patch dcbz into reserved instruction, so we trap */
+   for (i=hpage_offset; i  hpage_offset + (HW_PAGE_SIZE / 4); i++)
+   if ((page[i]  0xff0007ff) == INS_DCBZ)
+   page[i] = 0xfff7;
 
-out:
-   vfree(page);
+   kunmap_atomic(page, KM_USER0);
+   put_page(hpage);
 }
 
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
@@ -448,30 +444,21 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int 
size, void *ptr,
  bool data)
 {
struct kvmppc_pte pte;
-   hva_t hva = *eaddr;
 
vcpu-stat.st++;
 
if (kvmppc_xlate(vcpu, *eaddr, data, pte))
-   goto nopte;
+   return -ENOENT;
 
*eaddr = pte.raddr;
 
-   hva = kvmppc_pte_to_hva(vcpu, pte, false);
-   if (kvm_is_error_hva(hva))
-   goto mmio;
+   if (!pte.may_write)
+   return -EPERM;
 
-   if (copy_to_user((void __user *)hva, ptr, size)) {
-   printk(KERN_INFO kvmppc_st at 0x%lx failed\n, hva);
-   goto mmio;
-   }
+   if (kvm_write_guest(vcpu-kvm, pte.raddr, ptr, size))
+   return EMULATE_DO_MMIO;
 
return EMULATE_DONE;
-
-nopte:
-   return -ENOENT;
-mmio:
-   return EMULATE_DO_MMIO;
 }
 
 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
@@ -786,6 +773,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu 
*vcpu,
 * that no guest that needs the dcbz hack does NX.
 */
kvmppc_mmu_pte_flush(vcpu, vcpu-arch.pc, ~0xFFFULL);
+   r = RESUME_GUEST;
} else {
vcpu-arch.msr |= vcpu-arch.shadow_srr1  0x5800;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c 
b/arch/powerpc/kvm/book3s_64_emulate.c
index 1e5cf8d..bbd1590 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -189,6 +189,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
ulong ra = 0;
ulong addr, vaddr;
u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+   u32 dsisr;
+   int r;
 
if (get_ra(inst))
ra = kvmppc_get_gpr(vcpu, get_ra(inst));
@@ -198,14 +200,23 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
addr = 0x;
vaddr = addr;
 
-   if (kvmppc_st(vcpu, addr, 32, zeros, true)) {
+   r = kvmppc_st(vcpu, addr, 32, 

[PATCH 20/21] KVM: PPC: Make bools bitfields

2010-03-24 Thread Alexander Graf
Bool defaults to at least byte width. We usually only want to waste a single
bit on this. So let's move all the bool values to bitfields, potentially
saving memory.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/include/asm/kvm_book3s.h |   28 ++--
 arch/powerpc/include/asm/kvm_host.h   |6 +++---
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 8a6b4c5..ee79921 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -29,40 +29,40 @@ struct kvmppc_slb {
u64 vsid;
u64 orige;
u64 origv;
-   bool valid;
-   bool Ks;
-   bool Kp;
-   bool nx;
-   bool large; /* PTEs are 16MB */
-   bool tb;/* 1TB segment */
-   bool class;
+   bool valid  : 1;
+   bool Ks : 1;
+   bool Kp : 1;
+   bool nx : 1;
+   bool large  : 1;/* PTEs are 16MB */
+   bool tb : 1;/* 1TB segment */
+   bool class  : 1;
 };
 
 struct kvmppc_sr {
u32 raw;
u32 vsid;
-   bool Ks;
-   bool Kp;
-   bool nx;
-   bool valid;
+   bool Ks : 1;
+   bool Kp : 1;
+   bool nx : 1;
+   bool valid  : 1;
 };
 
 struct kvmppc_bat {
u64 raw;
u32 bepi;
u32 bepi_mask;
-   bool vs;
-   bool vp;
u32 brpn;
u8 wimg;
u8 pp;
+   bool vs : 1;
+   bool vp : 1;
 };
 
 struct kvmppc_sid_map {
u64 guest_vsid;
u64 guest_esid;
u64 host_vsid;
-   bool valid;
+   bool valid  : 1;
 };
 
 #define SID_MAP_BITS9
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 486f1ca..5869a48 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -127,9 +127,9 @@ struct kvmppc_pte {
u64 eaddr;
u64 vpage;
u64 raddr;
-   bool may_read;
-   bool may_write;
-   bool may_execute;
+   bool may_read   : 1;
+   bool may_write  : 1;
+   bool may_execute: 1;
 };
 
 struct kvmppc_mmu {
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/21] KVM: PPC: Add check if pte was mapped secondary

2010-03-24 Thread Alexander Graf
Some HTAB providers (namely the PS3) ignore the SECONDARY flag. They
just put an entry in the htab as secondary when they see fit.

So we need to check the return value of htab_insert to remember the
correct slot id so we can actually invalidate the entry again.

Fixes KVM on the PS3.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s_64_mmu_host.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 25bd4ed..a01e9c5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -270,6 +270,13 @@ map_again:
(rflags  HPTE_R_N) ? '-' : 'x',
orig_pte-eaddr, hpteg, va, orig_pte-vpage, 
hpaddr);
 
+   /* The ppc_md code may give us a secondary entry even though we
+  asked for a primary. Fix up. */
+   if ((ret  _PTEIDX_SECONDARY)  !(vflags  HPTE_V_SECONDARY)) {
+   hash = ~hash;
+   hpteg = ((hash  htab_hash_mask) * HPTES_PER_GROUP);
+   }
+
pte-slot = hpteg + (ret  7);
pte-host_va = va;
pte-pte = *orig_pte;
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 21/21] KVM: PPC: Disable MSR_FEx for Cell hosts

2010-03-24 Thread Alexander Graf
Cell can't handle MSR_FE0 and MSR_FE1 too well. It gets dog slow.
So let's just override the guest whenever we see one of the two and mask them
out. See commit ddf5f75a16b3e7460ffee881795aa168dffcd0cf for reference.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 1a12ef2..a7ab2ea 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -356,6 +356,10 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
!strcmp(cur_cpu_spec-platform, ppc970))
vcpu-arch.hflags |= BOOK3S_HFLAG_DCBZ32;
 
+   /* Cell performs badly if MSR_FEx are set. So let's hope nobody
+  really needs them in a VM on Cell and force disable them. */
+   if (!strcmp(cur_cpu_spec-platform, ppc-cell-be))
+   to_book3s(vcpu)-msr_mask = ~(MSR_FE0 | MSR_FE1);
 }
 
 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/21] KVM: PPC: Add emulation for dcba

2010-03-24 Thread Alexander Graf
Mac OS X uses the dcba instruction. According to the specification it doesn't
guarantee any functionality, so let's just emulate it as nop.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s_64_emulate.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_emulate.c 
b/arch/powerpc/kvm/book3s_64_emulate.c
index bbd1590..8f50776 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -37,6 +37,7 @@
 #define OP_31_XOP_SLBIA498
 #define OP_31_XOP_MFSR 595
 #define OP_31_XOP_MFSRIN   659
+#define OP_31_XOP_DCBA 758
 #define OP_31_XOP_SLBMFEV  851
 #define OP_31_XOP_EIOIO854
 #define OP_31_XOP_SLBMFEE  915
@@ -183,6 +184,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
kvmppc_set_gpr(vcpu, get_rt(inst), t);
}
break;
+   case OP_31_XOP_DCBA:
+   /* Gets treated as NOP */
+   break;
case OP_31_XOP_DCBZ:
{
ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/21] KVM: PPC: Add OSI hypercall interface

2010-03-24 Thread Alexander Graf
MOL uses its own hypercall interface to call back into userspace when
the guest wants to do something.

So let's implement that as an exit reason, specify it with a CAP and
only really use it when userspace wants us to.

The only user of it so far is MOL.

Signed-off-by: Alexander Graf ag...@suse.de

---

v1 - v2:

  - Add documentation for OSI exit struct

v2 - v3:

  - Document that EXIT_OSI is not migration safe
---
 Documentation/kvm/api.txt |   19 ---
 arch/powerpc/include/asm/kvm_book3s.h |5 +
 arch/powerpc/include/asm/kvm_host.h   |2 ++
 arch/powerpc/kvm/book3s.c |   24 ++--
 arch/powerpc/kvm/powerpc.c|   12 
 include/linux/kvm.h   |6 ++
 6 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 3da2240..f0a9337 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -895,9 +895,9 @@ executed a memory-mapped I/O instruction which could not be 
satisfied
 by kvm.  The 'data' member contains the written data if 'is_write' is
 true, and should be filled by application code otherwise.
 
-NOTE: For KVM_EXIT_IO and KVM_EXIT_MMIO, the corresponding operations
-are complete (and guest state is consistent) only after userspace has
-re-entered the kernel with KVM_RUN.  The kernel side will first finish
+NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding
+operations are complete (and guest state is consistent) only after userspace
+has re-entered the kernel with KVM_RUN.  The kernel side will first finish
 incomplete operations and then check for pending signals.  Userspace
 can re-enter the guest with an unmasked signal pending to complete
 pending operations.
@@ -952,6 +952,19 @@ s390 specific.
 
 powerpc specific.
 
+   /* KVM_EXIT_OSI */
+   struct {
+   __u64 gprs[32];
+   } osi;
+
+MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch
+hypercalls and exit with this exit struct that contains all the guest gprs.
+
+If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall.
+Userspace can now handle the hypercall and when it's done modify the gprs as
+necessary. Upon guest entry all guest GPRs will then be replaced by the values
+in this struct.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index bea7637..7e243b2 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -148,6 +148,11 @@ static inline ulong dsisr(void)
 
 extern void kvm_return_point(void);
 
+/* Magic register values loaded into r3 and r4 before the 'sc' assembly
+ * instruction for the OSI hypercalls */
+#define OSI_SC_MAGIC_R30x113724FA
+#define OSI_SC_MAGIC_R40x77810F9B
+
 #define INS_DCBZ   0x7c0007ec
 
 #endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 0ebda67..486f1ca 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -273,6 +273,8 @@ struct kvm_vcpu_arch {
u8 mmio_sign_extend;
u8 dcr_needed;
u8 dcr_is_write;
+   u8 osi_needed;
+   u8 osi_enabled;
 
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 130a9a1..d6105d9 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -871,12 +871,24 @@ program_interrupt:
break;
}
case BOOK3S_INTERRUPT_SYSCALL:
-#ifdef EXIT_DEBUG
-   printk(KERN_INFO Syscall Nr %d\n, (int)kvmppc_get_gpr(vcpu, 
0));
-#endif
-   vcpu-stat.syscall_exits++;
-   kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-   r = RESUME_GUEST;
+   // XXX make user settable
+   if (vcpu-arch.osi_enabled 
+   (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) 
+   (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+   u64 *gprs = run-osi.gprs;
+   int i;
+
+   run-exit_reason = KVM_EXIT_OSI;
+   for (i = 0; i  32; i++)
+   gprs[i] = kvmppc_get_gpr(vcpu, i);
+   vcpu-arch.osi_needed = 1;
+   r = RESUME_HOST_NV;
+
+   } else {
+   vcpu-stat.syscall_exits++;
+   kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+   r = RESUME_GUEST;
+   }
break;
case BOOK3S_INTERRUPT_FP_UNAVAIL:
case BOOK3S_INTERRUPT_ALTIVEC:
diff --git 

[PATCH 13/21] KVM: Add support for enabling capabilities per-vcpu

2010-03-24 Thread Alexander Graf
Some times we don't want all capabilities to be available to all
our vcpus. One example for that is the OSI interface, implemented
in the next patch.

In order to have a generic mechanism in how to enable capabilities
individually, this patch introduces a new ioctl that can be used
for this purpose. That way features we don't want in all guests or
userspace configurations can just not be enabled and we're good.

Signed-off-by: Alexander Graf ag...@suse.de

---

v1 - v2:

  - Add flags to enable_cap
  - Update documentation for kvm_enable_cap

v2 - v3:

  - Add CAP for ENABLE_CAP
  - Improve documentation for ENABLE_CAP
---
 Documentation/kvm/api.txt  |   35 +++
 arch/powerpc/kvm/powerpc.c |   27 +++
 include/linux/kvm.h|   12 
 3 files changed, 74 insertions(+), 0 deletions(-)

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index d170cb4..3da2240 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -749,6 +749,41 @@ Writes debug registers into the vcpu.
 See KVM_GET_DEBUGREGS for the data structure. The flags field is unused
 yet and must be cleared on entry.
 
+4.34 KVM_ENABLE_CAP
+
+Capability: KVM_CAP_ENABLE_CAP
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_enable_cap (in)
+Returns: 0 on success; -1 on error
+
+Not all extensions are enabled by default. Using this ioctl the application
+can enable an extension, making it available to the guest.
+
+On systems that do not support this ioctl, it always fails. On systems that
+do support it, it only works for extensions that are supported for enablement.
+
+To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
+be used.
+
+struct kvm_enable_cap {
+   /* in */
+   __u32 cap;
+
+The capability that is supposed to get enabled.
+
+   __u32 flags;
+
+A bitfield indicating future enhancements. Has to be 0 for now.
+
+   __u64 args[4];
+
+Arguments for enabling a feature. If a feature needs initial values to
+function properly, this is the place to put them.
+
+   __u8  pad[64];
+};
+
 
 5. The kvm_run structure
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index af873d9..2092157 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -149,6 +149,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_SEGSTATE:
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_UNSET_IRQ:
+   case KVM_CAP_ENABLE_CAP:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -464,6 +465,23 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct 
kvm_interrupt *irq)
return 0;
 }
 
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+struct kvm_enable_cap *cap)
+{
+   int r;
+
+   if (cap-flags)
+   return -EINVAL;
+
+   switch (cap-cap) {
+   default:
+   r = -EINVAL;
+   break;
+   }
+
+   return r;
+}
+
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 struct kvm_mp_state *mp_state)
 {
@@ -492,6 +510,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_interrupt(vcpu, irq);
break;
}
+   case KVM_ENABLE_CAP:
+   {
+   struct kvm_enable_cap cap;
+   r = -EFAULT;
+   if (copy_from_user(cap, argp, sizeof(cap)))
+   goto out;
+   r = kvm_vcpu_ioctl_enable_cap(vcpu, cap);
+   break;
+   }
default:
r = -EINVAL;
}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c36d093..d9e920e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -400,6 +400,15 @@ struct kvm_ioeventfd {
__u8  pad[36];
 };
 
+/* for KVM_ENABLE_CAP */
+struct kvm_enable_cap {
+   /* in */
+   __u32 cap;
+   __u32 flags;
+   __u64 args[4];
+   __u8  pad[64];
+};
+
 #define KVMIO 0xAE
 
 /*
@@ -508,6 +517,7 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_X86_ROBUST_SINGLESTEP 51
 #define KVM_CAP_PPC_UNSET_IRQ 53
+#define KVM_CAP_ENABLE_CAP 54
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -697,6 +707,8 @@ struct kvm_clock_data {
 /* Available with KVM_CAP_DEBUGREGS */
 #define KVM_GET_DEBUGREGS _IOR(KVMIO,  0xa1, struct kvm_debugregs)
 #define KVM_SET_DEBUGREGS _IOW(KVMIO,  0xa2, struct kvm_debugregs)
+/* No need for CAP, because then it just always fails */
+#define KVM_ENABLE_CAP_IOW(KVMIO,  0xa3, struct kvm_enable_cap)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1  0)
 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/21] KVM: PPC: Implement BAT reads

2010-03-24 Thread Alexander Graf
BATs can't only be written to, you can also read them out!
So let's implement emulation for reading BAT values again.

While at it, I also made BAT setting flush the segment cache,
so we're absolutely sure there's no MMU state left when writing
BATs.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s_64_emulate.c |   35 ++
 1 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_emulate.c 
b/arch/powerpc/kvm/book3s_64_emulate.c
index 8d7a78d..39d5003 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -239,6 +239,34 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct 
kvmppc_bat *bat, bool upper,
}
 }
 
+static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn)
+{
+   struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+   struct kvmppc_bat *bat;
+
+   switch (sprn) {
+   case SPRN_IBAT0U ... SPRN_IBAT3L:
+   bat = vcpu_book3s-ibat[(sprn - SPRN_IBAT0U) / 2];
+   break;
+   case SPRN_IBAT4U ... SPRN_IBAT7L:
+   bat = vcpu_book3s-ibat[4 + ((sprn - SPRN_IBAT4U) / 2)];
+   break;
+   case SPRN_DBAT0U ... SPRN_DBAT3L:
+   bat = vcpu_book3s-dbat[(sprn - SPRN_DBAT0U) / 2];
+   break;
+   case SPRN_DBAT4U ... SPRN_DBAT7L:
+   bat = vcpu_book3s-dbat[4 + ((sprn - SPRN_DBAT4U) / 2)];
+   break;
+   default:
+   BUG();
+   }
+
+   if (sprn % 2)
+   return bat-raw  32;
+   else
+   return bat-raw;
+}
+
 static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
 {
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
@@ -290,6 +318,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int 
sprn, int rs)
/* BAT writes happen so rarely that we're ok to flush
 * everything here */
kvmppc_mmu_pte_flush(vcpu, 0, 0);
+   kvmppc_mmu_flush_segments(vcpu);
break;
case SPRN_HID0:
to_book3s(vcpu)-hid[0] = spr_val;
@@ -373,6 +402,12 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int 
sprn, int rt)
int emulated = EMULATE_DONE;
 
switch (sprn) {
+   case SPRN_IBAT0U ... SPRN_IBAT3L:
+   case SPRN_IBAT4U ... SPRN_IBAT7L:
+   case SPRN_DBAT0U ... SPRN_DBAT3L:
+   case SPRN_DBAT4U ... SPRN_DBAT7L:
+   kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn));
+   break;
case SPRN_SDR1:
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)-sdr1);
break;
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/21] KVM: PPC: Make XER load 32 bit

2010-03-24 Thread Alexander Graf
We have a 32 bit value in the PACA to store XER in. We also do an stw
when storing XER in there. But then we load it with ld, completely
screwing it up on every entry.

Welcome to the Big Endian world.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s_64_slb.S |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 35b7627..0919679 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -145,7 +145,7 @@ slb_do_enter:
lwz r11, (PACA_KVM_CR)(r13)
mtcrr11
 
-   ld  r11, (PACA_KVM_XER)(r13)
+   lwz r11, (PACA_KVM_XER)(r13)
mtxer   r11
 
ld  r11, (PACA_KVM_R11)(r13)
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/21] KVM: PPC: Book3S_32 guest MMU fixes

2010-03-24 Thread Alexander Graf
This patch makes the VSID of mapped pages always reflecting all special cases
we have, like split mode.

It also changes the tlbie mask to 0x0000 according to the spec. The mask
we used before was incorrect.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/include/asm/kvm_book3s.h |1 +
 arch/powerpc/kvm/book3s_32_mmu.c  |   30 +++---
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 9f5a992..b47b2f5 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -44,6 +44,7 @@ struct kvmppc_sr {
bool Ks;
bool Kp;
bool nx;
+   bool valid;
 };
 
 struct kvmppc_bat {
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 1483a9b..7071e22 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -57,6 +57,8 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
 
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
  struct kvmppc_pte *pte, bool data);
+static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid,
+u64 *vsid);
 
 static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t 
eaddr)
 {
@@ -66,13 +68,14 @@ static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s 
*vcpu_book3s, gva_t e
 static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
 bool data)
 {
-   struct kvmppc_sr *sre = find_sr(to_book3s(vcpu), eaddr);
+   u64 vsid;
struct kvmppc_pte pte;
 
if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data))
return pte.vpage;
 
-   return (((u64)eaddr  12)  0x) | (((u64)sre-vsid)  16);
+   kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr  SID_SHIFT, vsid);
+   return (((u64)eaddr  12)  0x) | (vsid  16);
 }
 
 static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
@@ -142,8 +145,13 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu 
*vcpu, gva_t eaddr,
bat-bepi_mask);
}
if ((eaddr  bat-bepi_mask) == bat-bepi) {
+   u64 vsid;
+   kvmppc_mmu_book3s_32_esid_to_vsid(vcpu,
+   eaddr  SID_SHIFT, vsid);
+   vsid = 16;
+   pte-vpage = (((u64)eaddr  12)  0x) | vsid;
+
pte-raddr = bat-brpn | (eaddr  ~bat-bepi_mask);
-   pte-vpage = (eaddr  12) | VSID_BAT;
pte-may_read = bat-pp;
pte-may_write = bat-pp  1;
pte-may_execute = true;
@@ -302,6 +310,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu 
*vcpu, u32 srnum,
/* And then put in the new SR */
sre-raw = value;
sre-vsid = (value  0x0fff);
+   sre-valid = (value  0x8000) ? false : true;
sre-Ks = (value  0x4000) ? true : false;
sre-Kp = (value  0x2000) ? true : false;
sre-nx = (value  0x1000) ? true : false;
@@ -312,7 +321,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu 
*vcpu, u32 srnum,
 
 static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool 
large)
 {
-   kvmppc_mmu_pte_flush(vcpu, ea, ~0xFFFULL);
+   kvmppc_mmu_pte_flush(vcpu, ea, 0x0000);
 }
 
 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid,
@@ -333,15 +342,22 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct 
kvm_vcpu *vcpu, u64 esid,
break;
case MSR_DR|MSR_IR:
{
-   ulong ea;
-   ea = esid  SID_SHIFT;
-   *vsid = find_sr(to_book3s(vcpu), ea)-vsid;
+   ulong ea = esid  SID_SHIFT;
+   struct kvmppc_sr *sr = find_sr(to_book3s(vcpu), ea);
+
+   if (!sr-valid)
+   return -1;
+
+   *vsid = sr-vsid;
break;
}
default:
BUG();
}
 
+   if (vcpu-arch.msr  MSR_PR)
+   *vsid |= VSID_PR;
+
return 0;
 }
 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/21] KVM: PPC: Use ULL for big numbers

2010-03-24 Thread Alexander Graf
Some constants were bigger than ints. Let's mark them as such so we don't
accidently truncate them.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/include/asm/kvm_book3s.h |   12 ++--
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 7e243b2..8a6b4c5 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -100,12 +100,12 @@ struct kvmppc_vcpu_book3s {
 #define CONTEXT_GUEST  1
 #define CONTEXT_GUEST_END  2
 
-#define VSID_REAL_DR   0x7ff0
-#define VSID_REAL_IR   0x7fe0
-#define VSID_SPLIT_MASK0x7fe0
-#define VSID_REAL  0x7fc0
-#define VSID_BAT   0x7fb0
-#define VSID_PR0x8000
+#define VSID_REAL_DR   0x7ff0ULL
+#define VSID_REAL_IR   0x7fe0ULL
+#define VSID_SPLIT_MASK0x7fe0ULL
+#define VSID_REAL  0x7fc0ULL
+#define VSID_BAT   0x7fb0ULL
+#define VSID_PR0x8000ULL
 
 extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask);
 extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/21] KVM: PPC: Load VCPU for register fetching

2010-03-24 Thread Alexander Graf
When trying to read or store vcpu register data, we should also make
sure the vcpu is actually loaded, so we're 100% sure we get the correct
values.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 029e1be..585dc91 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -955,6 +955,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, 
struct kvm_regs *regs)
 {
int i;
 
+   vcpu_load(vcpu);
+
regs-pc = vcpu-arch.pc;
regs-cr = kvmppc_get_cr(vcpu);
regs-ctr = vcpu-arch.ctr;
@@ -975,6 +977,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, 
struct kvm_regs *regs)
for (i = 0; i  ARRAY_SIZE(regs-gpr); i++)
regs-gpr[i] = kvmppc_get_gpr(vcpu, i);
 
+   vcpu_put(vcpu);
+
return 0;
 }
 
@@ -982,6 +986,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
struct kvm_regs *regs)
 {
int i;
 
+   vcpu_load(vcpu);
+
vcpu-arch.pc = regs-pc;
kvmppc_set_cr(vcpu, regs-cr);
vcpu-arch.ctr = regs-ctr;
@@ -1001,6 +1007,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
struct kvm_regs *regs)
for (i = 0; i  ARRAY_SIZE(regs-gpr); i++)
kvmppc_set_gpr(vcpu, i, regs-gpr[i]);
 
+   vcpu_put(vcpu);
+
return 0;
 }
 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/21] KVM: PPC: Implement alignment interrupt

2010-03-24 Thread Alexander Graf
Mac OS X has some applications - namely the Finder - that require alignment
interrupts to work properly. So we need to implement them.

But the spec for 970 and 750 also looks different. While 750 requires the
DSISR and DAR fields to reflect some instruction bits (DSISR) and the fault
address (DAR), the 970 declares this as an optional feature. So we need
to reconstruct DSISR and DAR manually.

Signed-off-by: Alexander Graf ag...@suse.de

---

v2 - v3:

 - add emulation for stfs, stfd, lfd
 - add DAR emulation
---
 arch/powerpc/include/asm/kvm_book3s.h |2 +
 arch/powerpc/kvm/book3s.c |   10 
 arch/powerpc/kvm/book3s_64_emulate.c  |   75 +
 3 files changed, 87 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index b47b2f5..bea7637 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -131,6 +131,8 @@ extern void kvmppc_rmcall(ulong srr0, ulong srr1);
 extern void kvmppc_load_up_fpu(void);
 extern void kvmppc_load_up_altivec(void);
 extern void kvmppc_load_up_vsx(void);
+extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
+extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 585dc91..130a9a1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -905,6 +905,16 @@ program_interrupt:
}
break;
}
+   case BOOK3S_INTERRUPT_ALIGNMENT:
+   if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
+   to_book3s(vcpu)-dsisr = kvmppc_alignment_dsisr(vcpu,
+   vcpu-arch.last_inst);
+   vcpu-arch.dear = kvmppc_alignment_dar(vcpu,
+   vcpu-arch.last_inst);
+   kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+   }
+   r = RESUME_GUEST;
+   break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
case BOOK3S_INTERRUPT_TRACE:
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c 
b/arch/powerpc/kvm/book3s_64_emulate.c
index 39d5003..1e5cf8d 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -44,6 +44,11 @@
 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
 #define OP_31_XOP_DCBZ 1010
 
+#define OP_LFS 48
+#define OP_LFD 50
+#define OP_STFS52
+#define OP_STFD54
+
 #define SPRN_GQR0  912
 #define SPRN_GQR1  913
 #define SPRN_GQR2  914
@@ -474,3 +479,73 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int 
sprn, int rt)
return emulated;
 }
 
+u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
+{
+   u32 dsisr = 0;
+
+   /*
+* This is what the spec says about DSISR bits (not mentioned = 0):
+*
+* 12:13[DS]Set to bits 30:31
+* 15:16[X] Set to bits 29:30
+* 17   [X] Set to bit 25
+*  [D/DS]  Set to bit 5
+* 18:21[X] Set to bits 21:24
+*  [D/DS]  Set to bits 1:4
+* 22:26Set to bits 6:10 (RT/RS/FRT/FRS)
+* 27:31Set to bits 11:15 (RA)
+*/
+
+   switch (get_op(inst)) {
+   /* D-form */
+   case OP_LFS:
+   case OP_LFD:
+   case OP_STFD:
+   case OP_STFS:
+   dsisr |= (inst  12)  0x4000; /* bit 17 */
+   dsisr |= (inst  17)  0x3c00; /* bits 18:21 */
+   break;
+   /* X-form */
+   case 31:
+   dsisr |= (inst  14)  0x18000; /* bits 15:16 */
+   dsisr |= (inst  8)   0x04000; /* bit 17 */
+   dsisr |= (inst  3)   0x03c00; /* bits 18:21 */
+   break;
+   default:
+   printk(KERN_INFO KVM: Unaligned instruction 0x%x\n, inst);
+   break;
+   }
+
+   dsisr |= (inst  16)  0x03ff; /* bits 22:31 */
+
+   return dsisr;
+}
+
+ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
+{
+   ulong dar = 0;
+   ulong ra;
+
+   switch (get_op(inst)) {
+   case OP_LFS:
+   case OP_LFD:
+   case OP_STFD:
+   case OP_STFS:
+   ra = get_ra(inst);
+   if (ra)
+   dar = kvmppc_get_gpr(vcpu, ra);
+   dar += (s32)((s16)inst);
+   break;
+   case 31:
+   ra = get_ra(inst);
+   if (ra)
+   dar = kvmppc_get_gpr(vcpu, ra);
+   dar += 

[PATCH 00/21] KVM: PPC: MOL bringup patches v3

2010-03-24 Thread Alexander Graf
Mac-on-Linux has always lacked PPC64 host support. This is going to
change now!

This patchset contains minor patches to enable MOL, but is mostly about
bug fixes that came out of running Mac OS X. With this set and the
current svn version of MOL I have 10.4.11 running as a guest on a 970MP
as well as a PS3 host.


v1 - v2:

 - Add documentation for EXIT_OSI and ENABLE_CAP
 - Add flags to enable_cap
 - Add build fix for !CONFIG_VSX
 - Remove in-paca register check

v2 - v3:

 - Document that EXIT_OSI is not migration safe
 - Add CAP for ENABLE_CAP
 - Improve documentation for ENABLE_CAP
 - Add alignment emulation for stfs, stfd, lfd
 - Add alignment DAR emulation
 - Add CAP for unset irq
 - new: Fix dcbz emulation
 - new: Add emulation for dcba
 - new: Add check if pte was mapped secondary (PS3 fix)
 - new: Use ULL for big numbers
 - new: Make bools bitfields
 - new: Disable MSR_FEx for Cell hosts (PS3 speedup)


Alexander Graf (21):
  KVM: PPC: Ensure split mode works
  KVM: PPC: Allow userspace to unset the IRQ line
  KVM: PPC: Make DSISR 32 bits wide
  KVM: PPC: Book3S_32 guest MMU fixes
  KVM: PPC: Split instruction reading out
  KVM: PPC: Don't reload FPU with invalid values
  KVM: PPC: Load VCPU for register fetching
  KVM: PPC: Implement mfsr emulation
  KVM: PPC: Implement BAT reads
  KVM: PPC: Make XER load 32 bit
  KVM: PPC: Implement emulation for lbzux and lhax
  KVM: PPC: Implement alignment interrupt
  KVM: Add support for enabling capabilities per-vcpu
  KVM: PPC: Add OSI hypercall interface
  KVM: PPC: Make build work without CONFIG_VSX/ALTIVEC
  KVM: PPC: Fix dcbz emulation
  KVM: PPC: Add emulation for dcba
  KVM: PPC: Add check if pte was mapped secondary
  KVM: PPC: Use ULL for big numbers
  KVM: PPC: Make bools bitfields
  KVM: PPC: Disable MSR_FEx for Cell hosts

 Documentation/kvm/api.txt   |   54 -
 arch/powerpc/include/asm/kvm.h  |3 +
 arch/powerpc/include/asm/kvm_book3s.h   |   47 +---
 arch/powerpc/include/asm/kvm_host.h |   10 +-
 arch/powerpc/include/asm/kvm_ppc.h  |2 +
 arch/powerpc/kvm/book3s.c   |  191 +++---
 arch/powerpc/kvm/book3s_32_mmu.c|   30 -
 arch/powerpc/kvm/book3s_64_emulate.c|  146 +++-
 arch/powerpc/kvm/book3s_64_interrupts.S |2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c   |7 +
 arch/powerpc/kvm/book3s_64_slb.S|2 +-
 arch/powerpc/kvm/emulate.c  |   20 
 arch/powerpc/kvm/powerpc.c  |   45 +++-
 include/linux/kvm.h |   19 +++
 14 files changed, 468 insertions(+), 110 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/21] KVM: PPC: Allow userspace to unset the IRQ line

2010-03-24 Thread Alexander Graf
Userspace can tell us that it wants to trigger an interrupt. But
so far it can't tell us that it wants to stop triggering one.

So let's interpret the parameter to the ioctl that we have anyways
to tell us if we want to raise or lower the interrupt line.

Signed-off-by: Alexander Graf ag...@suse.de

v2 - v3:

 - Add CAP for unset irq
---
 arch/powerpc/include/asm/kvm.h |3 +++
 arch/powerpc/include/asm/kvm_ppc.h |2 ++
 arch/powerpc/kvm/book3s.c  |6 ++
 arch/powerpc/kvm/powerpc.c |6 +-
 include/linux/kvm.h|1 +
 5 files changed, 17 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 19bae31..6c5547d 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -84,4 +84,7 @@ struct kvm_guest_debug_arch {
 #define KVM_REG_QPR0x0040
 #define KVM_REG_FQPR   0x0060
 
+#define KVM_INTERRUPT_SET  -1U
+#define KVM_INTERRUPT_UNSET-2U
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index c7fcdd7..6a2464e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -92,6 +92,8 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
+extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq);
 
 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
   unsigned int op, int *advance);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index c2ffb91..9e0bc47 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -230,6 +230,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 }
 
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
+  struct kvm_interrupt *irq)
+{
+   kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+}
+
 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 {
int deliver = 1;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a0e3172..af873d9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -148,6 +148,7 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
case KVM_CAP_PPC_SEGSTATE:
case KVM_CAP_PPC_PAIRED_SINGLES:
+   case KVM_CAP_PPC_UNSET_IRQ:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -450,7 +451,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
-   kvmppc_core_queue_external(vcpu, irq);
+   if (irq-irq == KVM_INTERRUPT_UNSET)
+   kvmppc_core_dequeue_external(vcpu, irq);
+   else
+   kvmppc_core_queue_external(vcpu, irq);
 
if (waitqueue_active(vcpu-wq)) {
wake_up_interruptible(vcpu-wq);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ce28767..c36d093 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -507,6 +507,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_DEBUGREGS 50
 #endif
 #define KVM_CAP_X86_ROBUST_SINGLESTEP 51
+#define KVM_CAP_PPC_UNSET_IRQ 53
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/21] KVM: PPC: Ensure split mode works

2010-03-24 Thread Alexander Graf
On PowerPC we can go into MMU Split Mode. That means that either
data relocation is on but instruction relocation is off or vice
versa.

That mode didn't work properly, as we weren't always flushing
entries when going into a new split mode, potentially mapping
different code or data that we're supposed to.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/include/asm/kvm_book3s.h |9 +++---
 arch/powerpc/kvm/book3s.c |   46 +---
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index e6ea974..14d0262 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -99,10 +99,11 @@ struct kvmppc_vcpu_book3s {
 #define CONTEXT_GUEST  1
 #define CONTEXT_GUEST_END  2
 
-#define VSID_REAL  0xfff0
-#define VSID_REAL_DR   0xffe0
-#define VSID_REAL_IR   0xffd0
-#define VSID_BAT   0xffc0
+#define VSID_REAL_DR   0x7ff0
+#define VSID_REAL_IR   0x7fe0
+#define VSID_SPLIT_MASK0x7fe0
+#define VSID_REAL  0x7fc0
+#define VSID_BAT   0x7fb0
 #define VSID_PR0x8000
 
 extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 94c229d..c2ffb91 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -133,6 +133,14 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 
if (((vcpu-arch.msr  (MSR_IR|MSR_DR)) != (old_msr  (MSR_IR|MSR_DR))) 
||
(vcpu-arch.msr  MSR_PR) != (old_msr  MSR_PR)) {
+   bool dr = (vcpu-arch.msr  MSR_DR) ? true : false;
+   bool ir = (vcpu-arch.msr  MSR_IR) ? true : false;
+
+   /* Flush split mode PTEs */
+   if (dr != ir)
+   kvmppc_mmu_pte_vflush(vcpu, VSID_SPLIT_MASK,
+ VSID_SPLIT_MASK);
+
kvmppc_mmu_flush_segments(vcpu);
kvmppc_mmu_map_segment(vcpu, vcpu-arch.pc);
}
@@ -395,15 +403,7 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong 
eaddr, bool data,
} else {
pte-eaddr = eaddr;
pte-raddr = eaddr  0x;
-   pte-vpage = eaddr  12;
-   switch (vcpu-arch.msr  (MSR_DR|MSR_IR)) {
-   case 0:
-   pte-vpage |= VSID_REAL;
-   case MSR_DR:
-   pte-vpage |= VSID_REAL_DR;
-   case MSR_IR:
-   pte-vpage |= VSID_REAL_IR;
-   }
+   pte-vpage = VSID_REAL | eaddr  12;
pte-may_read = true;
pte-may_write = true;
pte-may_execute = true;
@@ -512,12 +512,10 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
int page_found = 0;
struct kvmppc_pte pte;
bool is_mmio = false;
+   bool dr = (vcpu-arch.msr  MSR_DR) ? true : false;
+   bool ir = (vcpu-arch.msr  MSR_IR) ? true : false;
 
-   if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) {
-   relocated = (vcpu-arch.msr  MSR_DR);
-   } else {
-   relocated = (vcpu-arch.msr  MSR_IR);
-   }
+   relocated = data ? dr : ir;
 
/* Resolve real address if translation turned on */
if (relocated) {
@@ -529,14 +527,18 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
pte.raddr = eaddr  0x;
pte.eaddr = eaddr;
pte.vpage = eaddr  12;
-   switch (vcpu-arch.msr  (MSR_DR|MSR_IR)) {
-   case 0:
-   pte.vpage |= VSID_REAL;
-   case MSR_DR:
-   pte.vpage |= VSID_REAL_DR;
-   case MSR_IR:
-   pte.vpage |= VSID_REAL_IR;
-   }
+   }
+
+   switch (vcpu-arch.msr  (MSR_DR|MSR_IR)) {
+   case 0:
+   pte.vpage |= VSID_REAL;
+   break;
+   case MSR_DR:
+   pte.vpage |= VSID_REAL_DR;
+   break;
+   case MSR_IR:
+   pte.vpage |= VSID_REAL_IR;
+   break;
}
 
if (vcpu-arch.mmu.is_dcbz32(vcpu) 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/21] KVM: PPC: Implement mfsr emulation

2010-03-24 Thread Alexander Graf
We emulate the mfsrin instruction already, that passes the SR number
in a register value. But we lacked support for mfsr that encoded the
SR number in the opcode.

So let's implement it.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s_64_emulate.c |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_emulate.c 
b/arch/powerpc/kvm/book3s_64_emulate.c
index c989214..8d7a78d 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -35,6 +35,7 @@
 #define OP_31_XOP_SLBMTE   402
 #define OP_31_XOP_SLBIE434
 #define OP_31_XOP_SLBIA498
+#define OP_31_XOP_MFSR 595
 #define OP_31_XOP_MFSRIN   659
 #define OP_31_XOP_SLBMFEV  851
 #define OP_31_XOP_EIOIO854
@@ -90,6 +91,18 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
case OP_31_XOP_MTMSR:
kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, 
get_rs(inst)));
break;
+   case OP_31_XOP_MFSR:
+   {
+   int srnum;
+
+   srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32);
+   if (vcpu-arch.mmu.mfsrin) {
+   u32 sr;
+   sr = vcpu-arch.mmu.mfsrin(vcpu, srnum);
+   kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+   }
+   break;
+   }
case OP_31_XOP_MFSRIN:
{
int srnum;
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/21] KVM: PPC: Implement emulation for lbzux and lhax

2010-03-24 Thread Alexander Graf
We get MMIOs with the weirdest instructions. But every time we do,
we need to improve our emulator to implement them.

So let's do that - this time it's lbzux and lhax's round.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/emulate.c |   20 
 1 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 2410ec2..dbb5d68 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -38,10 +38,12 @@
 #define OP_31_XOP_LBZX  87
 #define OP_31_XOP_STWX  151
 #define OP_31_XOP_STBX  215
+#define OP_31_XOP_LBZUX 119
 #define OP_31_XOP_STBUX 247
 #define OP_31_XOP_LHZX  279
 #define OP_31_XOP_LHZUX 311
 #define OP_31_XOP_MFSPR 339
+#define OP_31_XOP_LHAX  343
 #define OP_31_XOP_STHX  407
 #define OP_31_XOP_STHUX 439
 #define OP_31_XOP_MTSPR 467
@@ -173,6 +175,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct 
kvm_vcpu *vcpu)
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
 
+   case OP_31_XOP_LBZUX:
+   rt = get_rt(inst);
+   ra = get_ra(inst);
+   rb = get_rb(inst);
+
+   ea = kvmppc_get_gpr(vcpu, rb);
+   if (ra)
+   ea += kvmppc_get_gpr(vcpu, ra);
+
+   emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+   kvmppc_set_gpr(vcpu, ra, ea);
+   break;
+
case OP_31_XOP_STWX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@@ -202,6 +217,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct 
kvm_vcpu *vcpu)
kvmppc_set_gpr(vcpu, rs, ea);
break;
 
+   case OP_31_XOP_LHAX:
+   rt = get_rt(inst);
+   emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+   break;
+
case OP_31_XOP_LHZX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/21] KVM: PPC: Don't reload FPU with invalid values

2010-03-24 Thread Alexander Graf
When the guest activates the FPU, we load it up. That's fine when
it wasn't activated before on the host, but if it was we end up
reloading FPU values from last time the FPU was deactivated on the
host without writing the proper values back to the vcpu struct.

This patch checks if the FPU is enabled already and if so just doesn't
bother activating it, making FPU operations survive guest context switches.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/powerpc/kvm/book3s.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 400ae0a..029e1be 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -701,6 +701,11 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, 
unsigned int exit_nr,
return RESUME_GUEST;
}
 
+   /* We already own the ext */
+   if (vcpu-arch.guest_owned_ext  msr) {
+   return RESUME_GUEST;
+   }
+
 #ifdef DEBUG_EXT
printk(KERN_INFO Loading up ext 0x%lx\n, msr);
 #endif
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 8/8] test: long rmap chains

2010-03-24 Thread Marcelo Tosatti
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

Index: qemu-kvm/kvm/user/config-x86-common.mak
===
--- qemu-kvm.orig/kvm/user/config-x86-common.mak
+++ qemu-kvm/kvm/user/config-x86-common.mak
@@ -45,6 +45,9 @@ $(TEST_DIR)/vmexit.flat: $(cstart.o) $(T
 
 $(TEST_DIR)/slot_deletion.flat: $(cstart.o) $(TEST_DIR)/slot_deletion.o \
$(TEST_DIR)/print.o $(TEST_DIR)/vm.o
+
+$(TEST_DIR)/rmap_chain.flat: $(cstart.o) $(TEST_DIR)/rmap_chain.o \
+   $(TEST_DIR)/print.o $(TEST_DIR)/vm.o
  
 $(TEST_DIR)/test32.flat: $(TEST_DIR)/test32.o
 
Index: qemu-kvm/kvm/user/config-x86_64.mak
===
--- qemu-kvm.orig/kvm/user/config-x86_64.mak
+++ qemu-kvm/kvm/user/config-x86_64.mak
@@ -8,6 +8,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_D
   $(TEST_DIR)/simple.flat $(TEST_DIR)/stringio.flat \
   $(TEST_DIR)/memtest1.flat $(TEST_DIR)/emulator.flat \
   $(TEST_DIR)/hypercall.flat $(TEST_DIR)/apic.flat \
-  $(TEST_DIR)/slot_deletion.flat
+  $(TEST_DIR)/slot_deletion.flat $(TEST_DIR)/rmap_chain.flat
 
 include config-x86-common.mak
Index: qemu-kvm/kvm/user/test/x86/rmap_chain.c
===
--- /dev/null
+++ qemu-kvm/kvm/user/test/x86/rmap_chain.c
@@ -0,0 +1,53 @@
+/* test long rmap chains */
+
+#include libcflat.h
+#include vm.h
+#include smp.h
+
+void print(const char *s);
+
+static unsigned int inl(unsigned short port)
+{
+unsigned int val;
+asm volatile (inl %w1, %0:=a (val):Nd (port));
+return val;
+}
+
+int main (void)
+{
+int i;
+int nr_pages;
+void *target_page, *virt_addr;
+
+setup_vm();
+
+nr_pages = inl(0xd1) / PAGE_SIZE;
+nr_pages -= 1000;
+target_page = alloc_page();
+
+virt_addr = (void *) 0xfa000;
+for (i = 0; i  nr_pages; i++) {
+install_page(phys_to_virt(read_cr3()), virt_to_phys(target_page),
+ virt_addr);
+virt_addr += PAGE_SIZE;
+}
+printf(created %d mappings\n, nr_pages);
+
+virt_addr = (void *) 0xfa000;
+for (i = 0; i  nr_pages; i++) {
+unsigned long *touch = virt_addr;
+
+*touch = 0;
+virt_addr += PAGE_SIZE;
+}
+printf(instantiated mappings\n);
+
+virt_addr += PAGE_SIZE;
+install_pte(phys_to_virt(read_cr3()), 1, virt_addr,
+0 | PTE_PRESENT | PTE_WRITE, target_page);
+
+*(unsigned long *)virt_addr = 0;
+printf(SUCCESS\n);
+
+return 0;
+}


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 4/8] test: export vm helpers

2010-03-24 Thread Marcelo Tosatti
To be used by next patches. Also make install_pte take an argument 
indicating physical location of pagetable.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
 
Index: qemu-kvm/kvm/user/test/x86/vm.c
===
--- qemu-kvm.orig/kvm/user/test/x86/vm.c
+++ qemu-kvm/kvm/user/test/x86/vm.c
@@ -3,22 +3,9 @@
 
 void print(const char *s);
 
-#define PAGE_SIZE 4096ul
-#define LARGE_PAGE_SIZE (512 * PAGE_SIZE)
-
 static void *free = 0;
 static void *vfree_top = 0;
 
-static unsigned long virt_to_phys(const void *virt) 
-{ 
-return (unsigned long)virt;
-}
-
-static void *phys_to_virt(unsigned long phys)
-{
-return (void *)phys;
-}
-
 void *memset(void *data, int c, unsigned long len)
 {
 char *s = data;
@@ -61,15 +48,11 @@ void free_page(void *page)
 extern char edata;
 static unsigned long end_of_memory;
 
-#define PTE_PRESENT (1ull  0)
-#define PTE_PSE (1ull  7)
-#define PTE_WRITE   (1ull  1)
-#define PTE_ADDR(0xff000ull)
-
-static void install_pte(unsigned long *cr3, 
-   int pte_level, 
+void install_pte(unsigned long *cr3,
+   int pte_level,
void *virt,
-   unsigned long pte)
+   unsigned long pte,
+   unsigned long *pt_page)
 {
 int level;
 unsigned long *pt = cr3;
@@ -78,7 +61,11 @@ static void install_pte(unsigned long *c
 for (level = 4; level  pte_level; --level) {
offset = ((unsigned long)virt  ((level-1) * 9 + 12))  511;
if (!(pt[offset]  PTE_PRESENT)) {
-   unsigned long *new_pt = alloc_page();
+   unsigned long *new_pt = pt_page;
+if (!new_pt)
+new_pt = alloc_page();
+else
+pt_page = 0;
memset(new_pt, 0, PAGE_SIZE);
pt[offset] = virt_to_phys(new_pt) | PTE_PRESENT | PTE_WRITE;
}
@@ -108,58 +95,20 @@ static unsigned long get_pte(unsigned lo
 return pte;
 }
 
-static void install_large_page(unsigned long *cr3, 
-  unsigned long phys,
-  void *virt)
+void install_large_page(unsigned long *cr3,
+  unsigned long phys,
+  void *virt)
 {
-install_pte(cr3, 2, virt, phys | PTE_PRESENT | PTE_WRITE | PTE_PSE);
+install_pte(cr3, 2, virt, phys | PTE_PRESENT | PTE_WRITE | PTE_PSE, 0);
 }
 
-static void install_page(unsigned long *cr3, 
-unsigned long phys,
-void *virt)
+void install_page(unsigned long *cr3,
+  unsigned long phys,
+  void *virt)
 {
-install_pte(cr3, 1, virt, phys | PTE_PRESENT | PTE_WRITE);
-}
-
-static inline void load_cr3(unsigned long cr3)
-{
-asm ( mov %0, %%cr3 : : r(cr3) );
-}
-
-static inline unsigned long read_cr3()
-{
-unsigned long cr3;
-
-asm volatile ( mov %%cr3, %0 : =r(cr3) );
-return cr3;
+install_pte(cr3, 1, virt, phys | PTE_PRESENT | PTE_WRITE, 0);
 }
 
-static inline void load_cr0(unsigned long cr0)
-{
-asm volatile ( mov %0, %%cr0 : : r(cr0) );
-}
-
-static inline unsigned long read_cr0()
-{
-unsigned long cr0;
-
-asm volatile ( mov %%cr0, %0 : =r(cr0) );
-return cr0;
-}
-
-static inline void load_cr4(unsigned long cr4)
-{
-asm volatile ( mov %0, %%cr4 : : r(cr4) );
-}
-
-static inline unsigned long read_cr4()
-{
-unsigned long cr4;
-
-asm volatile ( mov %%cr4, %0 : =r(cr4) );
-return cr4;
-}
 
 struct gdt_table_descr
 {
Index: qemu-kvm/kvm/user/test/x86/vm.h
===
--- qemu-kvm.orig/kvm/user/test/x86/vm.h
+++ qemu-kvm/kvm/user/test/x86/vm.h
@@ -1,10 +1,72 @@
 #ifndef VM_H
 #define VM_H
 
+#define PAGE_SIZE 4096ul
+#define LARGE_PAGE_SIZE (512 * PAGE_SIZE)
+
+#define PTE_PRESENT (1ull  0)
+#define PTE_PSE (1ull  7)
+#define PTE_WRITE   (1ull  1)
+#define PTE_ADDR(0xff000ull)
+
 void setup_vm();
 
 void *vmalloc(unsigned long size);
 void vfree(void *mem);
 void *vmap(unsigned long long phys, unsigned long size);
 
+void install_pte(unsigned long *cr3,
+int pte_level,
+void *virt,
+unsigned long pte,
+unsigned long *pt_page);
+
+void *alloc_page();
+
+void install_large_page(unsigned long *cr3,unsigned long phys,
+   void *virt);
+void install_page(unsigned long *cr3, unsigned long phys, void *virt);
+
+static inline unsigned long virt_to_phys(const void *virt)
+{
+return (unsigned long)virt;
+}
+
+static inline void *phys_to_virt(unsigned long phys)
+{
+return (void *)phys;
+}
+
+
+static inline void load_cr3(unsigned long cr3)
+{
+asm ( mov %0, %%cr3 : : r(cr3) );
+}
+
+static inline unsigned long read_cr3()
+{
+unsigned long cr3;
+
+asm volatile ( mov %%cr3, %0 : 

[patch 7/8] test: bump max vcpus to 64

2010-03-24 Thread Marcelo Tosatti
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

Index: qemu-kvm/kvm/user/test/x86/cstart64.S
===
--- qemu-kvm.orig/kvm/user/test/x86/cstart64.S
+++ qemu-kvm/kvm/user/test/x86/cstart64.S
@@ -6,7 +6,7 @@ boot_idt = 0
 
 ipi_vector = 0x20
 
-max_cpus = 4
+max_cpus = 64
 
 .bss
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 6/8] test: parallel faults vs slot deletion

2010-03-24 Thread Marcelo Tosatti
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

Index: qemu-kvm/kvm/user/config-x86-common.mak
===
--- qemu-kvm.orig/kvm/user/config-x86-common.mak
+++ qemu-kvm/kvm/user/config-x86-common.mak
@@ -42,6 +42,9 @@ $(TEST_DIR)/sieve.flat: $(cstart.o) $(TE
$(TEST_DIR)/print.o $(TEST_DIR)/vm.o
  
 $(TEST_DIR)/vmexit.flat: $(cstart.o) $(TEST_DIR)/vmexit.o
+
+$(TEST_DIR)/slot_deletion.flat: $(cstart.o) $(TEST_DIR)/slot_deletion.o \
+   $(TEST_DIR)/print.o $(TEST_DIR)/vm.o
  
 $(TEST_DIR)/test32.flat: $(TEST_DIR)/test32.o
 
Index: qemu-kvm/kvm/user/config-x86_64.mak
===
--- qemu-kvm.orig/kvm/user/config-x86_64.mak
+++ qemu-kvm/kvm/user/config-x86_64.mak
@@ -7,6 +7,7 @@ CFLAGS += -D__x86_64__
 tests = $(TEST_DIR)/access.flat $(TEST_DIR)/sieve.flat \
   $(TEST_DIR)/simple.flat $(TEST_DIR)/stringio.flat \
   $(TEST_DIR)/memtest1.flat $(TEST_DIR)/emulator.flat \
-  $(TEST_DIR)/hypercall.flat $(TEST_DIR)/apic.flat
+  $(TEST_DIR)/hypercall.flat $(TEST_DIR)/apic.flat \
+  $(TEST_DIR)/slot_deletion.flat
 
 include config-x86-common.mak
Index: qemu-kvm/kvm/user/test/x86/slot_deletion.c
===
--- /dev/null
+++ qemu-kvm/kvm/user/test/x86/slot_deletion.c
@@ -0,0 +1,130 @@
+/* test parallel faults vs slot deletion */
+
+#include libcflat.h
+#include vm.h
+#include smp.h
+
+static unsigned int inl(unsigned short port)
+{
+unsigned int val;
+asm volatile (inl %w1, %0:=a (val):Nd (port));
+return val;
+}
+
+static void outl(unsigned int data, unsigned short port)
+{
+asm volatile (outl %0, %1::a (data), d (port));
+}
+
+static int write_mem_slot (unsigned long start, unsigned long end)
+{
+outl(start, 0x2018);
+outl((unsigned long) start  32, 0x201c);
+
+outl(end, 0x2020);
+outl((unsigned long) end  32, 0x2024);
+return 0;
+}
+
+#define CMD_CREATE_SLOT 0x0
+#define CMD_DELETE_SLOT 0x1
+
+int create_mem_slot(unsigned long start, unsigned long end)
+{
+write_mem_slot (start, end);
+outl(CMD_CREATE_SLOT, 0x2028);
+return 0;
+}
+
+int delete_mem_slot(unsigned long start, unsigned long end)
+{
+write_mem_slot (start, end);
+outl(CMD_DELETE_SLOT, 0x2028);
+return 0;
+}
+
+void map_addr_with_pte_phys(void *virt_addr, unsigned long pte_phys,
+   void *target_page)
+{
+/* 1:1 map the pagetable inside memslot */
+install_page(phys_to_virt(read_cr3()), pte_phys, (void *)pte_phys);
+install_pte(phys_to_virt(read_cr3()), 1, virt_addr,
+   virt_to_phys(target_page) | PTE_PRESENT | PTE_WRITE,
+   (void *) pte_phys);
+}
+
+#define define_barrier(x) int count_##x = 0;\
+static void barrier_##x(void) { \
+count_##x++;\
+while (count_##x  cpu_count());\
+}
+
+define_barrier(cr3);
+define_barrier(fault);
+define_barrier(done);
+
+static void fault_vaddr(void *data)
+{
+unsigned long *target_map = data;
+
+barrier_fault();
+*target_map = 0;
+barrier_done();
+}
+
+void run_test(void)
+{
+unsigned long start, end, pte_phys;
+void *target_page, *virt_addr;
+int i;
+
+start = inl(0xd1);
+end = start + (PAGE_SIZE * 1000);
+create_mem_slot(start, end);
+target_page = alloc_page();
+
+pte_phys = start;
+virt_addr = (void *) 0xfa000;
+for (i = 2; i = cpu_count(); i++) {
+map_addr_with_pte_phys(virt_addr, pte_phys, target_page);
+pte_phys += PAGE_SIZE;
+virt_addr += PAGE_SIZE * 512;
+}
+
+count_fault = 0;
+count_done = 0;
+pte_phys = start;
+virt_addr = (void *)0xfa000;
+for (i = 2; i = cpu_count(); i++) {
+on_cpu_noipi(i-1, fault_vaddr, virt_addr);
+pte_phys += PAGE_SIZE;
+virt_addr += PAGE_SIZE * 512;
+}
+
+barrier_fault();
+delete_mem_slot(start, end);
+barrier_done();
+}
+
+static void setup_cr3 (void *cr3)
+{
+load_cr3(virt_to_phys(cr3));
+barrier_cr3();
+}
+
+int main (void)
+{
+int i;
+
+setup_vm();
+for (i = 2; i = cpu_count(); i++)
+on_cpu_noipi(i-1, setup_cr3, (void *)read_cr3());
+
+barrier_cr3();
+
+for (i = 0; i  100; i++)
+run_test();
+
+printf(SUCCESS\n);
+return 0;
+}


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >