Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-15 Thread Carsten Otte
New version below. Changes:
- __pmdp_for_addr and ptep_for_addr now take a vma as argument
- check if a vma exists has moved to gmap_fault and kvm_s390_keyop
- kvm_s390_keyop verifies that a vma is writable so that it's safe to
  set the SWC bit

Subject: [PATCH] kvm-s390: storage key interface
From: Carsten Otte co...@de.ibm.com

This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte co...@de.ibm.com
---
---
 Documentation/virtual/kvm/api.txt |   38 ++
 arch/s390/include/asm/kvm_host.h  |4 +
 arch/s390/include/asm/pgtable.h   |1 
 arch/s390/kvm/kvm-s390.c  |  103 --
 arch/s390/mm/pgtable.c|   70 +++--
 include/linux/kvm.h   |7 ++
 6 files changed, 202 insertions(+), 21 deletions(-)

Index: linux-2.5-cecsim/Documentation/virtual/kvm/api.txt
===
--- linux-2.5-cecsim.orig/Documentation/virtual/kvm/api.txt
+++ linux-2.5-cecsim/Documentation/virtual/kvm/api.txt
@@ -1494,6 +1494,44 @@ table upfront. This is useful to handle 
 controlled virtual machines to fault in the virtual cpu's lowcore pages
 prior to calling the KVM_RUN ioctl.
 
+4.67 KVM_S390_KEYOP
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_keyop (in+out)
+Returns: 0 in case of success
+
+The parameter looks like this:
+   struct kvm_s390_keyop {
+   __u64 user_addr;
+   __u8  key;
+   __u8  operation;
+   };
+
+user_addr  contains the userspace address of a memory page
+keycontains the guest visible storage key as defined by the
+   z Architecture Principles of Operation book, including key
+   value for key controlled storage protection, the fetch
+   protection bit, and the reference and change indicator bits
+operation  indicates the key operation that should be performed
+
+The following operations are supported:
+KVM_S390_KEYOP_SSKE:
+   This operation behaves just like the set storage key extended (SSKE)
+   instruction would, if it were issued by the guest. The storage key
+   provided in key is placed in the guest visible storage key.
+KVM_S390_KEYOP_ISKE:
+   This operation behaves just like the insert storage key extended (ISKE)
+   instruction would, if it were issued by the guest. After this call,
+   the guest visible storage key is presented in the key field.
+KVM_S390_KEYOP_RRBE:
+   This operation behaves just like the reset referenced bit extended
+   (RRBE) instruction would, if it were issued by the guest. The guest
+   visible reference bit is cleared, and the value presented in the key
+   field after this call has the reference bit set to 1 in case the
+   guest view of the reference bit was 1 prior to this call.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
Index: linux-2.5-cecsim/arch/s390/include/asm/kvm_host.h
===
--- linux-2.5-cecsim.orig/arch/s390/include/asm/kvm_host.h
+++ linux-2.5-cecsim/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,10 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
+
 struct sca_entry {
atomic_t scn;
__u32   reserved;
Index: linux-2.5-cecsim/arch/s390/include/asm/pgtable.h
===
--- linux-2.5-cecsim.orig/arch/s390/include/asm/pgtable.h
+++ linux-2.5-cecsim/arch/s390/include/asm/pgtable.h
@@ -1237,6 +1237,7 @@ static inline pte_t mk_swap_pte(unsigned
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern pte_t *ptep_for_addr(unsigned long addr, struct vm_area_struct *);
 
 /*
  * No page table caches to initialise
Index: linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
===
--- linux-2.5-cecsim.orig/arch/s390/kvm/kvm-s390.c
+++ linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,127 @@ void kvm_arch_exit(void)
 {
 }
 
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+   struct vm_area_struct *vma;
+   unsigned long addr = kop-user_addr;
+   pte_t *ptep;
+   pgste_t pgste;
+   int r;
+   unsigned long skey;
+   unsigned long bits;
+
+   /* make sure this process is a hypervisor */
+   r = -EINVAL;
+  

Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-15 Thread Heiko Carstens
On Thu, Dec 15, 2011 at 11:28:03AM +0100, Carsten Otte wrote:
 New version below. Changes:
 - __pmdp_for_addr and ptep_for_addr now take a vma as argument
 - check if a vma exists has moved to gmap_fault and kvm_s390_keyop
 - kvm_s390_keyop verifies that a vma is writable so that it's safe to
   set the SWC bit

oh.. cool.

[...]

 + spin_lock(current-mm-page_table_lock);
 + pgste = pgste_get_lock(ptep);
 +
 + switch (kop-operation) {
 + case KVM_S390_KEYOP_SSKE:
 + if (!(vma-vm_flags  (VM_WRITE | VM_MAYWRITE))) {
 + r = -EACCES;
 + break;
 + }

Why again is this needed? Or put in other words: what prevents a guest to
change the storage key contents via sske of a page that is mapped read-only
into the guest address space?
As far as I can see: nothing. Interestingly I could -in theory- do some nice
stuff like:
- map a file from a read-only filesystem (which doesn't have a writepage
  aops function) into guest address space
- let the guest set the change bit in the storage key of a page that belongs
  to that file mapping via sske
- watch the fun that happens when the host tries to write the page back

But of course I could be totally wrong ;)

This doesn't have to do anything with your patch, it's just that I think
you shouldn't check if the vma is writable or not. It doesn't matter.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-15 Thread Christian Borntraeger
On 15/12/11 17:11, Heiko Carstens wrote:
 Why again is this needed? Or put in other words: what prevents a guest to
 change the storage key contents via sske of a page that is mapped read-only
 into the guest address space?
 As far as I can see: nothing. Interestingly I could -in theory- do some nice
 stuff like:
 - map a file from a read-only filesystem (which doesn't have a writepage
   aops function) into guest address space
 - let the guest set the change bit in the storage key of a page that belongs
   to that file mapping via sske
 - watch the fun that happens when the host tries to write the page back

Huh?
The guest itself can neither set the dirty bit of the real storage key nor
set the dirty bit the host change bit of the pgste via guest SSKE. The 
transition 
0-1 will only be done in the guest change bit of the pgste. (Otherwise
we would not have a separate guest/host view of change/referenced)

This interface here is for userspace (to change the guest storage key on behalf
of the guest, e.g. for life guest relocation). Since we might have to touch the
real storage key and this is host code millicode will not protect us from doing
stupid things like it does for guest code, we better check before we touch the 
real storage key.

Or did I misread your question?

Christian 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-15 Thread Martin Schwidefsky
On Thu, 15 Dec 2011 11:28:03 +0100
Carsten Otte co...@de.ibm.com wrote:

 + case KVM_S390_KEYOP_SSKE:
 + if (!(vma-vm_flags  (VM_WRITE | VM_MAYWRITE))) {
 + r = -EACCES;
 + break;
 + }

Unfortunately I just realized while discussing with Heiko that a check
for VM_WRITE is not enough. We could still have a read-only pte that
points to a file backed page which is purely read-only. A write access
is allowed but would cause copy-on-write. But we set the storage key
of the original page which would make the read-only page dirty.
We need to solved the race on the dirty bit in a clean way, otherwise
there always will be a corner case.

-- 
blue skies,
   Martin.

Reality continues to ruin my life. - Calvin.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-15 Thread Heiko Carstens
On Thu, Dec 15, 2011 at 05:49:19PM +0100, Christian Borntraeger wrote:
 On 15/12/11 17:11, Heiko Carstens wrote:
  Why again is this needed? Or put in other words: what prevents a guest to
  change the storage key contents via sske of a page that is mapped read-only
  into the guest address space?
  As far as I can see: nothing. Interestingly I could -in theory- do some nice
  stuff like:
  - map a file from a read-only filesystem (which doesn't have a writepage
aops function) into guest address space
  - let the guest set the change bit in the storage key of a page that belongs
to that file mapping via sske
  - watch the fun that happens when the host tries to write the page back
 
 Huh?
 The guest itself can neither set the dirty bit of the real storage key nor
 set the dirty bit the host change bit of the pgste via guest SSKE. The 
 transition 
 0-1 will only be done in the guest change bit of the pgste. (Otherwise
 we would not have a separate guest/host view of change/referenced)

Yeah, I had a major braino..

 This interface here is for userspace (to change the guest storage key on 
 behalf
 of the guest, e.g. for life guest relocation). Since we might have to touch 
 the
 real storage key and this is host code millicode will not protect us from 
 doing
 stupid things like it does for guest code, we better check before we touch 
 the 
 real storage key.
 
 Or did I misread your question?

No, you did not. However, I still think it's wrong to have an early exit if
the vma is not writable. Since the guest can set the guest change bit, but it
is is not possible with this interface, but I can see now that the purpose was
to avoid an overindication of the change bit.
oh well...

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-14 Thread Carsten Otte
This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte co...@de.ibm.com
---
---
 Documentation/virtual/kvm/api.txt |   38 +
 arch/s390/include/asm/kvm_host.h  |4 +
 arch/s390/include/asm/pgtable.h   |1 
 arch/s390/kvm/kvm-s390.c  |  110 --
 arch/s390/mm/pgtable.c|   64 +++---
 include/linux/kvm.h   |7 ++
 6 files changed, 203 insertions(+), 21 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1514,6 +1514,44 @@ table upfront. This is useful to handle
 controlled virtual machines to fault in the virtual cpu's lowcore pages
 prior to calling the KVM_RUN ioctl.
 
+4.67 KVM_S390_KEYOP
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_keyop (in+out)
+Returns: 0 in case of success
+
+The parameter looks like this:
+   struct kvm_s390_keyop {
+   __u64 user_addr;
+   __u8  key;
+   __u8  operation;
+   };
+
+user_addr  contains the userspace address of a memory page
+keycontains the guest visible storage key as defined by the
+   z Architecture Principles of Operation book, including key
+   value for key controlled storage protection, the fetch
+   protection bit, and the reference and change indicator bits
+operation  indicates the key operation that should be performed
+
+The following operations are supported:
+KVM_S390_KEYOP_SSKE:
+   This operation behaves just like the set storage key extended (SSKE)
+   instruction would, if it were issued by the guest. The storage key
+   provided in key is placed in the guest visible storage key.
+KVM_S390_KEYOP_ISKE:
+   This operation behaves just like the insert storage key extended (ISKE)
+   instruction would, if it were issued by the guest. After this call,
+   the guest visible storage key is presented in the key field.
+KVM_S390_KEYOP_RRBE:
+   This operation behaves just like the reset referenced bit extended
+   (RRBE) instruction would, if it were issued by the guest. The guest
+   visible reference bit is cleared, and the value presented in the key
+   field after this call has the reference bit set to 1 in case the
+   guest view of the reference bit was 1 prior to this call.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,10 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
+
 struct sca_entry {
atomic_t scn;
__u32   reserved;
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1254,6 +1254,7 @@ static inline pte_t mk_swap_pte(unsigned
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern pte_t *ptep_for_addr(unsigned long addr);
 
 /*
  * No page table caches to initialise
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,117 @@ void kvm_arch_exit(void)
 {
 }
 
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+   unsigned long addr = kop-user_addr;
+   pte_t *ptep;
+   pgste_t pgste;
+   int r;
+   unsigned long skey;
+   unsigned long bits;
+
+   /* make sure this process is a hypervisor */
+   r = -EINVAL;
+   if (!mm_has_pgste(current-mm))
+   goto out;
+
+   r = -EFAULT;
+   if (addr = PGDIR_SIZE)
+   goto out;
+
+   down_read(current-mm-mmap_sem);
+   ptep = ptep_for_addr(addr);
+   if (IS_ERR(ptep)) {
+   r = PTR_ERR(ptep);
+   goto out_unlock;
+   }
+
+   spin_lock(current-mm-page_table_lock);
+   pgste = pgste_get_lock(ptep);
+
+   switch (kop-operation) {
+   case KVM_S390_KEYOP_SSKE:
+   pgste = pgste_update_all(ptep, pgste);
+   /* set the real key back w/o rc bits */
+   skey = kop-key  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   if (pte_present(*ptep)) {
+   page_set_storage_key(pte_val(*ptep), skey, 1);
+   /* avoid race clobbering changed bit */
+   pte_val(*ptep) |= _PAGE_SWC;
+   }
+   /* put acc+f plus guest referenced and changed into the pgste */
+   pgste_val(pgste) = 

Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-12 Thread Martin Schwidefsky
On Sat, 10 Dec 2011 13:35:39 +0100
Carsten Otte co...@de.ibm.com wrote:

 --- a/arch/s390/mm/pgtable.c
 +++ b/arch/s390/mm/pgtable.c
 @@ -393,6 +393,33 @@ out_unmap:
  }
  EXPORT_SYMBOL_GPL(gmap_map_segment);
 
 +static pmd_t *__pmdp_for_addr(struct mm_struct *mm, unsigned long addr)
 +{
 + struct vm_area_struct *vma;
 + pgd_t *pgd;
 + pud_t *pud;
 + pmd_t *pmd;
 +
 + vma = find_vma(mm, addr);
 + if (!vma || (vma-vm_start  addr))
 + return ERR_PTR(-EFAULT);
 +
 + pgd = pgd_offset(mm, addr);
 + pud = pud_alloc(mm, pgd, addr);
 + if (!pud)
 + return ERR_PTR(-ENOMEM);
 +
 + pmd = pmd_alloc(mm, pud, addr);
 + if (!pmd)
 + return ERR_PTR(-ENOMEM);
 +
 + if (!pmd_present(*pmd) 
 + __pte_alloc(mm, vma, pmd, addr))
 + return ERR_PTR(-ENOMEM);
 +
 + return pmd;
 +}
 +
  /*
   * this function is assumed to be called with mmap_sem held
   */

The __pmdp_for_addr function is fine for the usage in __gmap_fault.

 @@ -806,6 +820,26 @@ int s390_enable_sie(void)
  }
  EXPORT_SYMBOL_GPL(s390_enable_sie);
 
 +pte_t *ptep_for_addr(unsigned long addr)
 +{
 + pmd_t *pmd;
 + pte_t *pte;
 +
 + down_read(current-mm-mmap_sem);
 +
 + pmd = __pmdp_for_addr(current-mm, addr);
 + if (IS_ERR(pmd)) {
 + pte = (pte_t *)pmd;
 + goto up_out;
 + }
 +
 + pte = pte_offset(pmd, addr);
 +up_out:
 + up_read(current-mm-mmap_sem);
 + return pte;
 +}
 +EXPORT_SYMBOL_GPL(ptep_for_addr);
 +
  #if defined(CONFIG_DEBUG_PAGEALLOC)  defined(CONFIG_HIBERNATION)
  bool kernel_page_present(struct page *page)
  {

There is a fundamental locking sanfu. The pointer the the page table
entry is only valid until the mmap_sem is released. The down_read/up_read
has to be done in the caller.

-- 
blue skies,
   Martin.

Reality continues to ruin my life. - Calvin.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-11 Thread Heiko Carstens
On Sat, Dec 10, 2011 at 01:35:39PM +0100, Carsten Otte wrote:
 This patch introduces an interface to access the guest visible
 storage keys. It supports three operations that model the behavior
 that SSKE/ISKE/RRBE instructions would have if they were issued by
 the guest. These instructions are all documented in the z architecture
 principles of operation book.
 
 Signed-off-by: Carsten Otte co...@de.ibm.com

[...]

 --- a/arch/s390/kvm/kvm-s390.c
 +++ b/arch/s390/kvm/kvm-s390.c
 @@ -112,13 +112,115 @@ void kvm_arch_exit(void)
  {
  }
 
 +static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
 +{
 + unsigned long addr = kop-user_addr;
 + pte_t *ptep;
 + pgste_t pgste;
 + int r;
 + unsigned long skey;
 + unsigned long bits;
 +
 + /* make sure this process is a hypervisor */
 + r = -EINVAL;
 + if (!mm_has_pgste(current-mm))
 + goto out;
 +
 + r = -EFAULT;
 + if (addr = PGDIR_SIZE)
 + goto out;
 +
 + spin_lock(current-mm-page_table_lock);
 + ptep = ptep_for_addr(addr);

Locking is broken; following order is possible:

kvm_s390_keyop()- spin_lock(current-mm-page_table_lock)
- ptep_for_addr()  - down_read(current-mm-mmap_sem)
  --- Bug 1, we might schedule here
- __pmdp_for_addr()
- __pte_alloc()- spin_lock(mm-page_table_lock)
  --- Bug 2, deadlock

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-10 Thread Carsten Otte

On 09.12.2011 14:46, heica...@linux.vnet.ibm.com wrote:

On Fri, Dec 09, 2011 at 01:49:35PM +0100, Carsten Otte wrote:

This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otteco...@de.ibm.com
---


[...]


+   spin_lock(current-mm-page_table_lock);
+   ptep = ptep_for_addr(addr);
+   if (!ptep)
+   goto out_unlock;


FWIW, this is also a bit odd: if the guest would perform a storage key
operation on such an address it would succeed. If the host will do it,
it will fail (which doesn't match your description above).
No?

Good catch, will fix.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-10 Thread Carsten Otte
This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte co...@de.ibm.com
---
---
 Documentation/virtual/kvm/api.txt |   38 +
 arch/s390/include/asm/kvm_host.h  |4 +
 arch/s390/include/asm/pgtable.h   |1 
 arch/s390/kvm/kvm-s390.c  |  108 --
 arch/s390/mm/pgtable.c|   70 ++--
 include/linux/kvm.h   |7 ++
 6 files changed, 207 insertions(+), 21 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1514,6 +1514,44 @@ table upfront. This is useful to handle
 controlled virtual machines to fault in the virtual cpu's lowcore pages
 prior to calling the KVM_RUN ioctl.
 
+4.67 KVM_S390_KEYOP
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_keyop (in+out)
+Returns: 0 in case of success
+
+The parameter looks like this:
+   struct kvm_s390_keyop {
+   __u64 user_addr;
+   __u8  key;
+   __u8  operation;
+   };
+
+user_addr  contains the userspace address of a memory page
+keycontains the guest visible storage key as defined by the
+   z Architecture Principles of Operation book, including key
+   value for key controlled storage protection, the fetch
+   protection bit, and the reference and change indicator bits
+operation  indicates the key operation that should be performed
+
+The following operations are supported:
+KVM_S390_KEYOP_SSKE:
+   This operation behaves just like the set storage key extended (SSKE)
+   instruction would, if it were issued by the guest. The storage key
+   provided in key is placed in the guest visible storage key.
+KVM_S390_KEYOP_ISKE:
+   This operation behaves just like the insert storage key extended (ISKE)
+   instruction would, if it were issued by the guest. After this call,
+   the guest visible storage key is presented in the key field.
+KVM_S390_KEYOP_RRBE:
+   This operation behaves just like the reset referenced bit extended
+   (RRBE) instruction would, if it were issued by the guest. The guest
+   visible reference bit is cleared, and the value presented in the key
+   field after this call has the reference bit set to 1 in case the
+   guest view of the reference bit was 1 prior to this call.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,10 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
+
 struct sca_entry {
atomic_t scn;
__u32   reserved;
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1254,6 +1254,7 @@ static inline pte_t mk_swap_pte(unsigned
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern pte_t *ptep_for_addr(unsigned long addr);
 
 /*
  * No page table caches to initialise
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,115 @@ void kvm_arch_exit(void)
 {
 }
 
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+   unsigned long addr = kop-user_addr;
+   pte_t *ptep;
+   pgste_t pgste;
+   int r;
+   unsigned long skey;
+   unsigned long bits;
+
+   /* make sure this process is a hypervisor */
+   r = -EINVAL;
+   if (!mm_has_pgste(current-mm))
+   goto out;
+
+   r = -EFAULT;
+   if (addr = PGDIR_SIZE)
+   goto out;
+
+   spin_lock(current-mm-page_table_lock);
+   ptep = ptep_for_addr(addr);
+   if (IS_ERR(ptep)) {
+   r = PTR_ERR(ptep);
+   goto out_unlock;
+   }
+
+   pgste = pgste_get_lock(ptep);
+
+   switch (kop-operation) {
+   case KVM_S390_KEYOP_SSKE:
+   pgste = pgste_update_all(ptep, pgste);
+   /* set the real key back w/o rc bits */
+   skey = kop-key  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   if (pte_present(*ptep)) {
+   page_set_storage_key(pte_val(*ptep), skey, 1);
+   /* avoid race clobbering changed bit */
+   pte_val(*ptep) |= _PAGE_SWC;
+   }
+   /* put acc+f plus guest referenced and changed into the pgste */
+   pgste_val(pgste) = ~(RCP_ACC_BITS | RCP_FP_BIT | RCP_GR_BIT
+  

[patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-09 Thread Carsten Otte
This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte co...@de.ibm.com
---
---
 Documentation/virtual/kvm/api.txt |   38 +
 arch/s390/include/asm/kvm_host.h  |4 +
 arch/s390/include/asm/pgtable.h   |1 
 arch/s390/kvm/kvm-s390.c  |  106 --
 arch/s390/mm/pgtable.c|   70 ++---
 include/linux/kvm.h   |7 ++
 6 files changed, 205 insertions(+), 21 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1514,6 +1514,44 @@ table upfront. This is useful to handle
 controlled virtual machines to fault in the virtual cpu's lowcore pages
 prior to calling the KVM_RUN ioctl.
 
+4.67 KVM_S390_KEYOP
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_keyop (in+out)
+Returns: 0 in case of success
+
+The parameter looks like this:
+   struct kvm_s390_keyop {
+   __u64 user_addr;
+   __u8  key;
+   __u8  operation;
+   };
+
+user_addr  contains the userspace address of a memory page
+keycontains the guest visible storage key as defined by the
+   z Architecture Principles of Operation book, including key
+   value for key controlled storage protection, the fetch
+   protection bit, and the reference and change indicator bits
+operation  indicates the key operation that should be performed
+
+The following operations are supported:
+KVM_S390_KEYOP_SSKE:
+   This operation behaves just like the set storage key extended (SSKE)
+   instruction would, if it were issued by the guest. The storage key
+   provided in key is placed in the guest visible storage key.
+KVM_S390_KEYOP_ISKE:
+   This operation behaves just like the insert storage key extended (ISKE)
+   instruction would, if it were issued by the guest. After this call,
+   the guest visible storage key is presented in the key field.
+KVM_S390_KEYOP_RRBE:
+   This operation behaves just like the reset referenced bit extended
+   (RRBE) instruction would, if it were issued by the guest. The guest
+   visible reference bit is cleared, and the value presented in the key
+   field after this call has the reference bit set to 1 in case the
+   guest view of the reference bit was 1 prior to this call.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,10 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
+
 struct sca_entry {
atomic_t scn;
__u32   reserved;
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1254,6 +1254,7 @@ static inline pte_t mk_swap_pte(unsigned
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern pte_t *ptep_for_addr(unsigned long addr);
 
 /*
  * No page table caches to initialise
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,113 @@ void kvm_arch_exit(void)
 {
 }
 
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+   unsigned long addr = kop-user_addr;
+   pte_t *ptep;
+   pgste_t pgste;
+   int r;
+   unsigned long skey;
+   unsigned long bits;
+
+   /* make sure this process is a hypervisor */
+   r = -EINVAL;
+   if (!mm_has_pgste(current-mm))
+   goto out;
+
+   r = -ENXIO;
+   if (addr = PGDIR_SIZE)
+   goto out;
+
+   spin_lock(current-mm-page_table_lock);
+   ptep = ptep_for_addr(addr);
+   if (!ptep)
+   goto out_unlock;
+
+   pgste = pgste_get_lock(ptep);
+
+   switch (kop-operation) {
+   case KVM_S390_KEYOP_SSKE:
+   pgste = pgste_update_all(ptep, pgste);
+   /* set the real key back w/o rc bits */
+   skey = kop-key  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   if (pte_present(*ptep)) {
+   page_set_storage_key(pte_val(*ptep), skey, 1);
+   /* avoid race clobbering changed bit */
+   pte_val(*ptep) |= _PAGE_SWC;
+   }
+   /* put acc+f plus guest refereced and changed into the pgste */
+   pgste_val(pgste) = ~(RCP_ACC_BITS | RCP_FP_BIT | RCP_GR_BIT
+| RCP_GC_BIT);
+ 

Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-09 Thread Heiko Carstens
On Fri, Dec 09, 2011 at 12:23:36PM +0100, Carsten Otte wrote:
 This patch introduces an interface to access the guest visible
 storage keys. It supports three operations that model the behavior
 that SSKE/ISKE/RRBE instructions would have if they were issued by
 the guest. These instructions are all documented in the z architecture
 principles of operation book.
 
 Signed-off-by: Carsten Otte co...@de.ibm.com

[...]

 +static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
 +{
 + unsigned long addr = kop-user_addr;
 + pte_t *ptep;
 + pgste_t pgste;
 + int r;
 + unsigned long skey;
 + unsigned long bits;
 +
 + /* make sure this process is a hypervisor */
 + r = -EINVAL;
 + if (!mm_has_pgste(current-mm))
 + goto out;
 +
 + r = -ENXIO;
 + if (addr = PGDIR_SIZE)
 + goto out;

imho this should be -EFAULT.

 + spin_lock(current-mm-page_table_lock);
 + ptep = ptep_for_addr(addr);
 + if (!ptep)
 + goto out_unlock;

ptep is a pointer and may contain an error code, like you implemented it
below. Therefore you need to check for IS_ERR() here.

 +static pmd_t *__pmdp_for_addr(struct mm_struct *mm, unsigned long addr)
 +{
 + struct vm_area_struct *vma;
 + pgd_t *pgd;
 + pud_t *pud;
 + pmd_t *pmd;
 +
 + vma = find_vma(mm, addr);
 + if (!vma)
 + return ERR_PTR(-EINVAL);

-EFAULT imho.

Also, why is this check good enough? As far as I remember find_vma() only
guarantees that addr  vma_end, (if vma != NULL), but it does not guarantee
that addr = vma_start.

 - vma = find_vma(mm, vmaddr);
 - if (!vma || vma-vm_start  vmaddr)
 - return -EFAULT;

... you used to check for that and also used the proper return code, btw.
Or is there a different reason why the above code is correct?

 +pte_t *ptep_for_addr(unsigned long addr)
 +{
 + pmd_t *pmd;
 + pte_t *rc;

Would you mind renaming rc into pte?

 +
 + down_read(current-mm-mmap_sem);
 +
 + pmd = __pmdp_for_addr(current-mm, addr);
 + if (IS_ERR(pmd)) {
 + rc = (pte_t *)pmd;
 + goto up_out;
 + }
 +
 + rc = pte_offset(pmd, addr);
 +up_out:
 + up_read(current-mm-mmap_sem);
 + return rc;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-09 Thread Carsten Otte
This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte co...@de.ibm.com
---
---
 Documentation/virtual/kvm/api.txt |   38 +
 arch/s390/include/asm/kvm_host.h  |4 +
 arch/s390/include/asm/pgtable.h   |1 
 arch/s390/kvm/kvm-s390.c  |  110 --
 arch/s390/mm/pgtable.c|   70 +---
 include/linux/kvm.h   |7 ++
 6 files changed, 209 insertions(+), 21 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1514,6 +1514,44 @@ table upfront. This is useful to handle
 controlled virtual machines to fault in the virtual cpu's lowcore pages
 prior to calling the KVM_RUN ioctl.
 
+4.67 KVM_S390_KEYOP
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_keyop (in+out)
+Returns: 0 in case of success
+
+The parameter looks like this:
+   struct kvm_s390_keyop {
+   __u64 user_addr;
+   __u8  key;
+   __u8  operation;
+   };
+
+user_addr  contains the userspace address of a memory page
+keycontains the guest visible storage key as defined by the
+   z Architecture Principles of Operation book, including key
+   value for key controlled storage protection, the fetch
+   protection bit, and the reference and change indicator bits
+operation  indicates the key operation that should be performed
+
+The following operations are supported:
+KVM_S390_KEYOP_SSKE:
+   This operation behaves just like the set storage key extended (SSKE)
+   instruction would, if it were issued by the guest. The storage key
+   provided in key is placed in the guest visible storage key.
+KVM_S390_KEYOP_ISKE:
+   This operation behaves just like the insert storage key extended (ISKE)
+   instruction would, if it were issued by the guest. After this call,
+   the guest visible storage key is presented in the key field.
+KVM_S390_KEYOP_RRBE:
+   This operation behaves just like the reset referenced bit extended
+   (RRBE) instruction would, if it were issued by the guest. The guest
+   visible reference bit is cleared, and the value presented in the key
+   field after this call has the reference bit set to 1 in case the
+   guest view of the reference bit was 1 prior to this call.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,10 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
+
 struct sca_entry {
atomic_t scn;
__u32   reserved;
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1254,6 +1254,7 @@ static inline pte_t mk_swap_pte(unsigned
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern pte_t *ptep_for_addr(unsigned long addr);
 
 /*
  * No page table caches to initialise
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,117 @@ void kvm_arch_exit(void)
 {
 }
 
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+   unsigned long addr = kop-user_addr;
+   pte_t *ptep;
+   pgste_t pgste;
+   int r;
+   unsigned long skey;
+   unsigned long bits;
+
+   /* make sure this process is a hypervisor */
+   r = -EINVAL;
+   if (!mm_has_pgste(current-mm))
+   goto out;
+
+   r = -EFAULT;
+   if (addr = PGDIR_SIZE)
+   goto out;
+
+   spin_lock(current-mm-page_table_lock);
+   ptep = ptep_for_addr(addr);
+   if (!ptep)
+   goto out_unlock;
+   if (IS_ERR(ptep)) {
+   r = PTR_ERR(ptep);
+   goto out_unlock;
+   }
+
+   pgste = pgste_get_lock(ptep);
+
+   switch (kop-operation) {
+   case KVM_S390_KEYOP_SSKE:
+   pgste = pgste_update_all(ptep, pgste);
+   /* set the real key back w/o rc bits */
+   skey = kop-key  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   if (pte_present(*ptep)) {
+   page_set_storage_key(pte_val(*ptep), skey, 1);
+   /* avoid race clobbering changed bit */
+   pte_val(*ptep) |= _PAGE_SWC;
+   }
+   /* put acc+f plus guest refereced and changed into the pgste */
+   pgste_val(pgste) = 

Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-09 Thread Carsten Otte

On 09.12.2011 13:52, Joachim von Buttlar wrote:

Shouldn't it be:   page_set_storage_key(pte_val(*ptep), skey |
_PAGE_CHANGED, 1);

+/* avoid race clobbering 
changed bit
*/
+pte_val(*ptep) |= _PAGE_SWC;

No, the guest GR/GC bits get set to the value userspace wants down
below (this is set storage key after all), and for the host we turn on 
Martins _PAGE_SWC software bit in the pte to make sure we don't 
underindicate changed. As far as I can tell, this should be just fine.



Typo:/* put acc+f plus guest referenced and changed into the

will fix.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-09 Thread Heiko Carstens
On Fri, Dec 09, 2011 at 01:49:35PM +0100, Carsten Otte wrote:
 This patch introduces an interface to access the guest visible
 storage keys. It supports three operations that model the behavior
 that SSKE/ISKE/RRBE instructions would have if they were issued by
 the guest. These instructions are all documented in the z architecture
 principles of operation book.
 
 Signed-off-by: Carsten Otte co...@de.ibm.com
 ---

[...]

 + spin_lock(current-mm-page_table_lock);
 + ptep = ptep_for_addr(addr);
 + if (!ptep)
 + goto out_unlock;

FWIW, this is also a bit odd: if the guest would perform a storage key
operation on such an address it would succeed. If the host will do it,
it will fail (which doesn't match your description above).
No?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-08 Thread Carsten Otte
This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte co...@de.ibm.com
---
---
 Documentation/virtual/kvm/api.txt |   38 ++
 arch/s390/include/asm/kvm_host.h  |4 +
 arch/s390/include/asm/pgtable.h   |1 
 arch/s390/kvm/kvm-s390.c  |  103 --
 arch/s390/mm/pgtable.c|   70 +++--
 include/linux/kvm.h   |7 ++
 6 files changed, 202 insertions(+), 21 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1514,6 +1514,44 @@ table upfront. This is useful to handle
 controlled virtual machines to fault in the virtual cpu's lowcore pages
 prior to calling the KVM_RUN ioctl.
 
+4.67 KVM_S390_KEYOP
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_keyop (in+out)
+Returns: 0 in case of success
+
+The parameter looks like this:
+   struct kvm_s390_keyop {
+   __u64 user_addr;
+   __u8  key;
+   __u8  operation;
+   };
+
+user_addr  contains the userspace address of a memory page
+keycontains the guest visible storage key as defined by the
+   z Architecture Principles of Operation book, including key
+   value for key controlled storage protection, the fetch
+   protection bit, and the reference and change indicator bits
+operation  indicates the key operation that should be performed
+
+The following operations are supported:
+KVM_S390_KEYOP_SSKE:
+   This operation behaves just like the set storage key extended (SSKE)
+   instruction would, if it were issued by the guest. The storage key
+   provided in key is placed in the guest visible storage key.
+KVM_S390_KEYOP_ISKE:
+   This operation behaves just like the insert storage key extended (ISKE)
+   instruction would, if it were issued by the guest. After this call,
+   the guest visible storage key is presented in the key field.
+KVM_S390_KEYOP_RRBE:
+   This operation behaves just like the reset referenced bit extended
+   (RRBE) instruction would, if it were issued by the guest. The guest
+   visible reference bit is cleared, and the value presented in the key
+   field after this call has the reference bit set to 1 in case the
+   guest view of the reference bit was 1 prior to this call.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,10 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
+
 struct sca_entry {
atomic_t scn;
__u32   reserved;
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1254,6 +1254,7 @@ static inline pte_t mk_swap_pte(unsigned
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern pte_t *ptep_for_addr(unsigned long addr);
 
 /*
  * No page table caches to initialise
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,110 @@ void kvm_arch_exit(void)
 {
 }
 
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+   unsigned long addr = kop-user_addr;
+   pte_t *ptep;
+   pgste_t pgste;
+   int r;
+   unsigned long skey;
+   unsigned long bits;
+
+   /* make sure this process is a hypervisor */
+   r = -EINVAL;
+   if (!mm_has_pgste(current-mm))
+   goto out;
+
+   r = -ENXIO;
+   if (addr = PGDIR_SIZE)
+   goto out;
+
+   spin_lock(current-mm-page_table_lock);
+   ptep = ptep_for_addr(addr);
+   if (!ptep)
+   goto out_unlock;
+
+   pgste = pgste_get_lock(ptep);
+
+   switch (kop-operation) {
+   case KVM_S390_KEYOP_SSKE:
+   pgste = pgste_update_all(ptep, pgste);
+   /* set the real key back w/o rc bits */
+   skey = kop-key  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   if (pte_present(*ptep))
+   page_set_storage_key(pte_val(*ptep), skey, 1);
+   /* put acc+f plus guest refereced and changed into the pgste */
+   pgste_val(pgste) = ~(RCP_ACC_BITS | RCP_FP_BIT | RCP_GR_BIT
+| RCP_GC_BIT);
+   bits = (kop-key  (_PAGE_ACC_BITS | _PAGE_FP_BIT));
+   pgste_val(pgste) |= bits  56;
+   bits = (kop-key  

[patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-01 Thread Carsten Otte
This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte co...@de.ibm.com
---
Index: linux-2.5-cecsim/arch/s390/include/asm/kvm_host.h
===
--- linux-2.5-cecsim.orig/arch/s390/include/asm/kvm_host.h
+++ linux-2.5-cecsim/arch/s390/include/asm/kvm_host.h
@@ -25,6 +25,9 @@
 #define KVM_PRIVATE_MEM_SLOTS 4
 
 #define KVM_SIE_PAGE_OFFSET 1
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
 
 struct sca_entry {
atomic_t scn;
Index: linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
===
--- linux-2.5-cecsim.orig/arch/s390/kvm/kvm-s390.c
+++ linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,144 @@ void kvm_arch_exit(void)
 {
 }
 
+pte_t *ptep_for_addr(unsigned long addr)
+{
+   struct vm_area_struct *vma;
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *rc;
+
+   down_read(current-mm-mmap_sem);
+
+   rc = NULL;
+   vma = find_vma(current-mm, addr);
+   if (!vma)
+   goto up_out;
+
+   pgd = pgd_offset(current-mm, addr);
+   pud = pud_alloc(current-mm, pgd, addr);
+   if (!pud)
+   goto up_out;
+
+   pmd = pmd_alloc(current-mm, pud, addr);
+   if (!pmd)
+   goto up_out;
+
+   if (!pmd_present(*pmd) 
+   __pte_alloc(current-mm, vma, pmd, addr))
+   goto up_out;
+
+   rc = pte_offset(pmd, addr);
+up_out:
+   up_read(current-mm-mmap_sem);
+   return rc;
+}
+
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+   unsigned long addr = kop-user_addr;
+   pte_t *ptep;
+   pgste_t pgste;
+   int r;
+   unsigned long skey;
+   unsigned long bits;
+
+   /* make sure this process is a hypervisor */
+   r = -EINVAL;
+   if (!mm_has_pgste(current-mm))
+   goto out;
+
+   r = -ENXIO;
+   if (addr = PGDIR_SIZE)
+   goto out;
+
+   spin_lock(current-mm-page_table_lock);
+   ptep = ptep_for_addr(addr);
+   if (!ptep)
+   goto out_unlock;
+
+   pgste = pgste_get_lock(ptep);
+
+   switch (kop-operation) {
+   case KVM_S390_KEYOP_SSKE:
+   pgste = pgste_update_all(ptep, pgste);
+   /* set the real key back w/o rc bits */
+   skey = kop-key  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   if (pte_present(*ptep))
+   page_set_storage_key(pte_val(*ptep), skey, 1);
+   /* put acc+f plus guest refereced and changed into the pgste */
+   pgste_val(pgste) = ~(RCP_ACC_BITS | RCP_FP_BIT | RCP_GR_BIT
+| RCP_GC_BIT);
+   bits = (kop-key  (_PAGE_ACC_BITS | _PAGE_FP_BIT));
+   pgste_val(pgste) |= bits  56;
+   bits = (kop-key  (_PAGE_CHANGED | _PAGE_REFERENCED));
+   pgste_val(pgste) |= bits  48;
+   r = 0;
+   break;
+   case KVM_S390_KEYOP_ISKE:
+   if (pte_present(*ptep)) {
+   skey = page_get_storage_key(pte_val(*ptep));
+   kop-key = skey  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   } else {
+   skey = 0;
+   kop-key = (pgste_val(pgste)  56) 
+  (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   }
+   kop-key |= skey  (_PAGE_CHANGED | _PAGE_REFERENCED);
+   kop-key |= (pgste_val(pgste)  48) 
+   (_PAGE_CHANGED | _PAGE_REFERENCED);
+   r = 0;
+   break;
+   case KVM_S390_KEYOP_RRBE:
+   pgste = pgste_update_all(ptep, pgste);
+   kop-key = 0;
+   if (pgste_val(pgste)  RCP_GR_BIT)
+   kop-key |= _PAGE_REFERENCED;
+   pgste_val(pgste) = ~RCP_GR_BIT;
+   r = 0;
+   break;
+   default:
+   r = -EINVAL;
+   }
+   pgste_set_unlock(ptep, pgste);
+
+out_unlock:
+   spin_unlock(current-mm-page_table_lock);
+out:
+   return r;
+}
+
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
 {
-   if (ioctl == KVM_S390_ENABLE_SIE)
-   return s390_enable_sie();
-   return -EINVAL;
+   void __user *argp = (void __user *)arg;
+   int r;
+
+   switch (ioctl) {
+   case KVM_S390_ENABLE_SIE:
+   r = s390_enable_sie();
+   break;
+   case KVM_S390_KEYOP: {
+   struct kvm_s390_keyop kop;
+   r =