Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-11 Thread Alexey Kardashevskiy
On 07/10/2013 03:32 AM, Alexander Graf wrote:
 On 07/06/2013 05:07 PM, Alexey Kardashevskiy wrote:
 This adds special support for huge pages (16MB).  The reference
 counting cannot be easily done for such pages in real mode (when
 MMU is off) so we added a list of huge pages.  It is populated in
 virtual mode and get_page is called just once per a huge page.
 Real mode handlers check if the requested page is huge and in the list,
 then no reference counting is done, otherwise an exit to virtual mode
 happens.  The list is released at KVM exit.  At the moment the fastest
 card available for tests uses up to 9 huge pages so walking through this
 list is not very expensive.  However this can change and we may want
 to optimize this.

 Signed-off-by: Paul Mackerraspau...@samba.org
 Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru

 ---

 Changes:
 2013/06/27:
 * list of huge pages replaces with hashtable for better performance
 
 So the only thing your patch description really talks about is not true
 anymore?
 
 * spinlock removed from real mode and only protects insertion of new
 huge [ages descriptors into the hashtable

 2013/06/05:
 * fixed compile error when CONFIG_IOMMU_API=n

 2013/05/20:
 * the real mode handler now searches for a huge page by gpa (used to be pte)
 * the virtual mode handler prints warning if it is called twice for the same
 huge page as the real mode handler is expected to fail just once - when a
 huge
 page is not in the list yet.
 * the huge page is refcounted twice - when added to the hugepage list and
 when used in the virtual mode hcall handler (can be optimized but it will
 make the patch less nice).

 Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru
 ---
   arch/powerpc/include/asm/kvm_host.h |  25 +
   arch/powerpc/kernel/iommu.c |   6 ++-
   arch/powerpc/kvm/book3s_64_vio.c| 104
 +---
   arch/powerpc/kvm/book3s_64_vio_hv.c |  21 ++--
   4 files changed, 146 insertions(+), 10 deletions(-)

 diff --git a/arch/powerpc/include/asm/kvm_host.h
 b/arch/powerpc/include/asm/kvm_host.h
 index 53e61b2..a7508cf 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -30,6 +30,7 @@
   #includelinux/kvm_para.h
   #includelinux/list.h
   #includelinux/atomic.h
 +#includelinux/hashtable.h
   #includeasm/kvm_asm.h
   #includeasm/processor.h
   #includeasm/page.h
 @@ -182,10 +183,34 @@ struct kvmppc_spapr_tce_table {
   u32 window_size;
   struct iommu_group *grp;/* used for IOMMU groups */
   struct vfio_group *vfio_grp;/* used for IOMMU groups */
 +DECLARE_HASHTABLE(hash_tab, ilog2(64));/* used for IOMMU groups */
 +spinlock_t hugepages_write_lock;/* used for IOMMU groups */
   struct { struct { unsigned long put, indir, stuff; } rm, vm; } stat;
   struct page *pages[0];
   };

 +/*
 + * The KVM guest can be backed with 16MB pages.
 + * In this case, we cannot do page counting from the real mode
 + * as the compound pages are used - they are linked in a list
 + * with pointers as virtual addresses which are inaccessible
 + * in real mode.
 + *
 + * The code below keeps a 16MB pages list and uses page struct
 + * in real mode if it is already locked in RAM and inserted into
 + * the list or switches to the virtual mode where it can be
 + * handled in a usual manner.
 + */
 +#define KVMPPC_SPAPR_HUGEPAGE_HASH(gpa)hash_32(gpa  24, 32)
 +
 +struct kvmppc_spapr_iommu_hugepage {
 +struct hlist_node hash_node;
 +unsigned long gpa;/* Guest physical address */
 +unsigned long hpa;/* Host physical address */
 +struct page *page;/* page struct of the very first subpage */
 +unsigned long size;/* Huge page size (always 16MB at the moment) */
 +};
 +
   struct kvmppc_linear_info {
   void*base_virt;
   unsigned long base_pfn;
 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
 index 51678ec..e0b6eca 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -999,7 +999,8 @@ int iommu_free_tces(struct iommu_table *tbl, unsigned
 long entry,
   if (!pg) {
   ret = -EAGAIN;
   } else if (PageCompound(pg)) {
 -ret = -EAGAIN;
 +/* Hugepages will be released at KVM exit */
 +ret = 0;
   } else {
   if (oldtce  TCE_PCI_WRITE)
   SetPageDirty(pg);
 @@ -1009,6 +1010,9 @@ int iommu_free_tces(struct iommu_table *tbl,
 unsigned long entry,
   struct page *pg = pfn_to_page(oldtce  PAGE_SHIFT);
   if (!pg) {
   ret = -EAGAIN;
 +} else if (PageCompound(pg)) {
 +/* Hugepages will be released at KVM exit */
 +ret = 0;
   } else {
   if (oldtce  TCE_PCI_WRITE)
   SetPageDirty(pg);
 diff --git 

Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-11 Thread Alexander Graf

On 11.07.2013, at 10:57, Alexey Kardashevskiy wrote:

 On 07/10/2013 03:32 AM, Alexander Graf wrote:
 On 07/06/2013 05:07 PM, Alexey Kardashevskiy wrote:
 This adds special support for huge pages (16MB).  The reference
 counting cannot be easily done for such pages in real mode (when
 MMU is off) so we added a list of huge pages.  It is populated in
 virtual mode and get_page is called just once per a huge page.
 Real mode handlers check if the requested page is huge and in the list,
 then no reference counting is done, otherwise an exit to virtual mode
 happens.  The list is released at KVM exit.  At the moment the fastest
 card available for tests uses up to 9 huge pages so walking through this
 list is not very expensive.  However this can change and we may want
 to optimize this.
 
 Signed-off-by: Paul Mackerraspau...@samba.org
 Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru
 
 ---
 
 Changes:
 2013/06/27:
 * list of huge pages replaces with hashtable for better performance
 
 So the only thing your patch description really talks about is not true
 anymore?
 
 * spinlock removed from real mode and only protects insertion of new
 huge [ages descriptors into the hashtable
 
 2013/06/05:
 * fixed compile error when CONFIG_IOMMU_API=n
 
 2013/05/20:
 * the real mode handler now searches for a huge page by gpa (used to be pte)
 * the virtual mode handler prints warning if it is called twice for the same
 huge page as the real mode handler is expected to fail just once - when a
 huge
 page is not in the list yet.
 * the huge page is refcounted twice - when added to the hugepage list and
 when used in the virtual mode hcall handler (can be optimized but it will
 make the patch less nice).
 
 Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru
 ---
  arch/powerpc/include/asm/kvm_host.h |  25 +
  arch/powerpc/kernel/iommu.c |   6 ++-
  arch/powerpc/kvm/book3s_64_vio.c| 104
 +---
  arch/powerpc/kvm/book3s_64_vio_hv.c |  21 ++--
  4 files changed, 146 insertions(+), 10 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h
 b/arch/powerpc/include/asm/kvm_host.h
 index 53e61b2..a7508cf 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -30,6 +30,7 @@
  #includelinux/kvm_para.h
  #includelinux/list.h
  #includelinux/atomic.h
 +#includelinux/hashtable.h
  #includeasm/kvm_asm.h
  #includeasm/processor.h
  #includeasm/page.h
 @@ -182,10 +183,34 @@ struct kvmppc_spapr_tce_table {
  u32 window_size;
  struct iommu_group *grp;/* used for IOMMU groups */
  struct vfio_group *vfio_grp;/* used for IOMMU groups */
 +DECLARE_HASHTABLE(hash_tab, ilog2(64));/* used for IOMMU groups */
 +spinlock_t hugepages_write_lock;/* used for IOMMU groups */
  struct { struct { unsigned long put, indir, stuff; } rm, vm; } stat;
  struct page *pages[0];
  };
 
 +/*
 + * The KVM guest can be backed with 16MB pages.
 + * In this case, we cannot do page counting from the real mode
 + * as the compound pages are used - they are linked in a list
 + * with pointers as virtual addresses which are inaccessible
 + * in real mode.
 + *
 + * The code below keeps a 16MB pages list and uses page struct
 + * in real mode if it is already locked in RAM and inserted into
 + * the list or switches to the virtual mode where it can be
 + * handled in a usual manner.
 + */
 +#define KVMPPC_SPAPR_HUGEPAGE_HASH(gpa)hash_32(gpa  24, 32)
 +
 +struct kvmppc_spapr_iommu_hugepage {
 +struct hlist_node hash_node;
 +unsigned long gpa;/* Guest physical address */
 +unsigned long hpa;/* Host physical address */
 +struct page *page;/* page struct of the very first subpage */
 +unsigned long size;/* Huge page size (always 16MB at the moment) */
 +};
 +
  struct kvmppc_linear_info {
  void*base_virt;
  unsigned long base_pfn;
 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
 index 51678ec..e0b6eca 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -999,7 +999,8 @@ int iommu_free_tces(struct iommu_table *tbl, unsigned
 long entry,
  if (!pg) {
  ret = -EAGAIN;
  } else if (PageCompound(pg)) {
 -ret = -EAGAIN;
 +/* Hugepages will be released at KVM exit */
 +ret = 0;
  } else {
  if (oldtce  TCE_PCI_WRITE)
  SetPageDirty(pg);
 @@ -1009,6 +1010,9 @@ int iommu_free_tces(struct iommu_table *tbl,
 unsigned long entry,
  struct page *pg = pfn_to_page(oldtce  PAGE_SHIFT);
  if (!pg) {
  ret = -EAGAIN;
 +} else if (PageCompound(pg)) {
 +/* Hugepages will be released at KVM exit */
 +ret = 0;
  } else {
  if (oldtce  TCE_PCI_WRITE)
  

Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-11 Thread Benjamin Herrenschmidt
On Thu, 2013-07-11 at 11:52 +0200, Alexander Graf wrote:
  Where exactly (it is rather SPAPR_TCE_IOMMU but does not really
 matter)?
  Select it on KVM_BOOK3S_64? CONFIG_KVM_BOOK3S_64_HV?
  CONFIG_KVM_BOOK3S_64_PR? PPC_BOOK3S_64?
 
 I'd say the most logical choice would be to check the Makefile and see
 when it gets compiled. For those cases we want it enabled.

What *what* gets compiled ? You know our Makefile, it's crap :-)

We enable built-in things when CONFIG_KVM=m (which means you cannot take
a kernel build with CONFIG_KVM not set, enable CONFIG_KVM=m, and just
build the module, it won't work).

We could use KVM_BOOK3S_64 maybe ?

  I am trying to imagine a configuration where we really do not want
  IOMMU_API. Ben mentioned PPC32 and embedded PPC64 and that's it so
 any of
  BOOK3S (KVM_BOOK3S_64 is the best) should be fine, no?
 
 book3s_32 doesn't want this, but any book3s_64 implementation could
 potentially use it, yes. That's pretty much what the Makefile tells
 you too :).

Not really no. But that would do. You could have give a more useful
answer in the first place though rather than stringing him along.

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-11 Thread Alexander Graf

On 11.07.2013, at 14:37, Benjamin Herrenschmidt wrote:

 On Thu, 2013-07-11 at 11:52 +0200, Alexander Graf wrote:
 Where exactly (it is rather SPAPR_TCE_IOMMU but does not really
 matter)?
 Select it on KVM_BOOK3S_64? CONFIG_KVM_BOOK3S_64_HV?
 CONFIG_KVM_BOOK3S_64_PR? PPC_BOOK3S_64?
 
 I'd say the most logical choice would be to check the Makefile and see
 when it gets compiled. For those cases we want it enabled.
 
 What *what* gets compiled ? You know our Makefile, it's crap :-)
 
 We enable built-in things when CONFIG_KVM=m (which means you cannot take
 a kernel build with CONFIG_KVM not set, enable CONFIG_KVM=m, and just
 build the module, it won't work).
 
 We could use KVM_BOOK3S_64 maybe ?

If either a =m or a =y option selects a =y option, it gets selected regardless, 
no? So it shouldn't really matter where we attach it FWIW.

 
 I am trying to imagine a configuration where we really do not want
 IOMMU_API. Ben mentioned PPC32 and embedded PPC64 and that's it so
 any of
 BOOK3S (KVM_BOOK3S_64 is the best) should be fine, no?
 
 book3s_32 doesn't want this, but any book3s_64 implementation could
 potentially use it, yes. That's pretty much what the Makefile tells
 you too :).
 
 Not really no. But that would do. You could have give a more useful
 answer in the first place though rather than stringing him along.

Sorry, I figured it was obvious.


Alex

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-11 Thread Benjamin Herrenschmidt
On Thu, 2013-07-11 at 14:50 +0200, Alexander Graf wrote:
  Not really no. But that would do. You could have give a more useful
  answer in the first place though rather than stringing him along.
 
 Sorry, I figured it was obvious.

It wasn't no, because of the mess with modules and the nasty Makefile we
have in there. Even I had to scratch my head for a bit :-)

Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-11 Thread Alexander Graf

On 11.07.2013, at 15:41, chandrashekar shastri wrote:

 Hi All,
 
 I complied the latest kernel 3.10.0+ pulled from the git on top of  
 3.10.0-rc5+ by enabling the new Virtualiztaion features. The compliation was 
 sucessfull, when I rebooted the machine it fails to boot with error as  
 systemd [1] : Failed to mount /dev : no such device.
 
 Is it problem with the KVM module?

Very unlikely. You're probably missing generic config options in your .config 
file. But this is very off topic for a) this thread and b) these mailing lists.


Alex

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-11 Thread Alexey Kardashevskiy
On 07/11/2013 11:41 PM, chandrashekar shastri wrote:
 Hi All,
 
 I complied the latest kernel 3.10.0+ pulled from the git on top of 
 3.10.0-rc5+ by enabling the new Virtualiztaion features. The compliation
 was sucessfull, when I rebooted the machine it fails to boot with error as
  systemd [1] : Failed to mount /dev : no such device.
 
 Is it problem with the KVM module?


Wrong thread actually, would be better if you started the new one.

And you may want to try this - http://patchwork.ozlabs.org/patch/256027/


-- 
Alexey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-11 Thread chandrashekar shastri

Hi All,

I complied the latest kernel 3.10.0+ pulled from the git on top of  
3.10.0-rc5+ by enabling the new Virtualiztaion features. The compliation 
was sucessfull, when I rebooted the machine it fails to boot with error 
as  systemd [1] : Failed to mount /dev : no such device.


Is it problem with the KVM module?

Thanks,
Shastri

On 07/11/2013 06:26 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-07-11 at 14:50 +0200, Alexander Graf wrote:

Not really no. But that would do. You could have give a more useful
answer in the first place though rather than stringing him along.

Sorry, I figured it was obvious.

It wasn't no, because of the mess with modules and the nasty Makefile we
have in there. Even I had to scratch my head for a bit :-)

Ben.


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-10 Thread Alexander Graf

On 10.07.2013, at 01:29, Alexey Kardashevskiy wrote:

 On 07/10/2013 03:32 AM, Alexander Graf wrote:
 On 07/06/2013 05:07 PM, Alexey Kardashevskiy wrote:
 This adds special support for huge pages (16MB).  The reference
 counting cannot be easily done for such pages in real mode (when
 MMU is off) so we added a list of huge pages.  It is populated in
 virtual mode and get_page is called just once per a huge page.
 Real mode handlers check if the requested page is huge and in the list,
 then no reference counting is done, otherwise an exit to virtual mode
 happens.  The list is released at KVM exit.  At the moment the fastest
 card available for tests uses up to 9 huge pages so walking through this
 list is not very expensive.  However this can change and we may want
 to optimize this.
 
 Signed-off-by: Paul Mackerraspau...@samba.org
 Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru
 
 ---
 
 Changes:
 2013/06/27:
 * list of huge pages replaces with hashtable for better performance
 
 So the only thing your patch description really talks about is not true
 anymore?
 
 * spinlock removed from real mode and only protects insertion of new
 huge [ages descriptors into the hashtable
 
 2013/06/05:
 * fixed compile error when CONFIG_IOMMU_API=n
 
 2013/05/20:
 * the real mode handler now searches for a huge page by gpa (used to be pte)
 * the virtual mode handler prints warning if it is called twice for the same
 huge page as the real mode handler is expected to fail just once - when a
 huge
 page is not in the list yet.
 * the huge page is refcounted twice - when added to the hugepage list and
 when used in the virtual mode hcall handler (can be optimized but it will
 make the patch less nice).
 
 Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru
 ---
  arch/powerpc/include/asm/kvm_host.h |  25 +
  arch/powerpc/kernel/iommu.c |   6 ++-
  arch/powerpc/kvm/book3s_64_vio.c| 104
 +---
  arch/powerpc/kvm/book3s_64_vio_hv.c |  21 ++--
  4 files changed, 146 insertions(+), 10 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h
 b/arch/powerpc/include/asm/kvm_host.h
 index 53e61b2..a7508cf 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -30,6 +30,7 @@
  #includelinux/kvm_para.h
  #includelinux/list.h
  #includelinux/atomic.h
 +#includelinux/hashtable.h
  #includeasm/kvm_asm.h
  #includeasm/processor.h
  #includeasm/page.h
 @@ -182,10 +183,34 @@ struct kvmppc_spapr_tce_table {
  u32 window_size;
  struct iommu_group *grp;/* used for IOMMU groups */
  struct vfio_group *vfio_grp;/* used for IOMMU groups */
 +DECLARE_HASHTABLE(hash_tab, ilog2(64));/* used for IOMMU groups */
 +spinlock_t hugepages_write_lock;/* used for IOMMU groups */
  struct { struct { unsigned long put, indir, stuff; } rm, vm; } stat;
  struct page *pages[0];
  };
 
 +/*
 + * The KVM guest can be backed with 16MB pages.
 + * In this case, we cannot do page counting from the real mode
 + * as the compound pages are used - they are linked in a list
 + * with pointers as virtual addresses which are inaccessible
 + * in real mode.
 + *
 + * The code below keeps a 16MB pages list and uses page struct
 + * in real mode if it is already locked in RAM and inserted into
 + * the list or switches to the virtual mode where it can be
 + * handled in a usual manner.
 + */
 +#define KVMPPC_SPAPR_HUGEPAGE_HASH(gpa)hash_32(gpa  24, 32)
 +
 +struct kvmppc_spapr_iommu_hugepage {
 +struct hlist_node hash_node;
 +unsigned long gpa;/* Guest physical address */
 +unsigned long hpa;/* Host physical address */
 +struct page *page;/* page struct of the very first subpage */
 +unsigned long size;/* Huge page size (always 16MB at the moment) */
 +};
 +
  struct kvmppc_linear_info {
  void*base_virt;
  unsigned long base_pfn;
 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
 index 51678ec..e0b6eca 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -999,7 +999,8 @@ int iommu_free_tces(struct iommu_table *tbl, unsigned
 long entry,
  if (!pg) {
  ret = -EAGAIN;
  } else if (PageCompound(pg)) {
 -ret = -EAGAIN;
 +/* Hugepages will be released at KVM exit */
 +ret = 0;
  } else {
  if (oldtce  TCE_PCI_WRITE)
  SetPageDirty(pg);
 @@ -1009,6 +1010,9 @@ int iommu_free_tces(struct iommu_table *tbl,
 unsigned long entry,
  struct page *pg = pfn_to_page(oldtce  PAGE_SHIFT);
  if (!pg) {
  ret = -EAGAIN;
 +} else if (PageCompound(pg)) {
 +/* Hugepages will be released at KVM exit */
 +ret = 0;
  } else {
  if (oldtce  TCE_PCI_WRITE)
  

Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-10 Thread Benjamin Herrenschmidt
On Wed, 2013-07-10 at 12:33 +0200, Alexander Graf wrote:
 
 It's not exactly obvious that you're calling it with writing == 1 :).
 Can you create a new local variable is_write in the calling
 function, set that to 1 before the call to get_user_pages_fast and
 pass it in instead of the 1? The compiler should easily optimize all
 of that away, but it makes the code by far easier to read.

Ugh ?

Nobody else does that  (look at futex :-)

Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-10 Thread Alexander Graf

On 10.07.2013, at 12:39, Benjamin Herrenschmidt wrote:

 On Wed, 2013-07-10 at 12:33 +0200, Alexander Graf wrote:
 
 It's not exactly obvious that you're calling it with writing == 1 :).
 Can you create a new local variable is_write in the calling
 function, set that to 1 before the call to get_user_pages_fast and
 pass it in instead of the 1? The compiler should easily optimize all
 of that away, but it makes the code by far easier to read.
 
 Ugh ?
 
 Nobody else does that  (look at futex :-)

Yeah, that's fortunately code that I don't have to read :).


Alex

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-10 Thread Alexander Graf

On 10.07.2013, at 12:40, Alexander Graf wrote:

 
 On 10.07.2013, at 12:39, Benjamin Herrenschmidt wrote:
 
 On Wed, 2013-07-10 at 12:33 +0200, Alexander Graf wrote:
 
 It's not exactly obvious that you're calling it with writing == 1 :).
 Can you create a new local variable is_write in the calling
 function, set that to 1 before the call to get_user_pages_fast and
 pass it in instead of the 1? The compiler should easily optimize all
 of that away, but it makes the code by far easier to read.
 
 Ugh ?
 
 Nobody else does that  (look at futex :-)
 
 Yeah, that's fortunately code that I don't have to read :).

The proper alternative would be to pass an enum for read/write into the 
function rather than an int. But that'd be a pretty controversial, big change 
that I'd rather not put on Alexey. With a local variable we're nicely 
self-contained readable ;)


Alex

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-09 Thread Alexander Graf

On 07/06/2013 05:07 PM, Alexey Kardashevskiy wrote:

This adds special support for huge pages (16MB).  The reference
counting cannot be easily done for such pages in real mode (when
MMU is off) so we added a list of huge pages.  It is populated in
virtual mode and get_page is called just once per a huge page.
Real mode handlers check if the requested page is huge and in the list,
then no reference counting is done, otherwise an exit to virtual mode
happens.  The list is released at KVM exit.  At the moment the fastest
card available for tests uses up to 9 huge pages so walking through this
list is not very expensive.  However this can change and we may want
to optimize this.

Signed-off-by: Paul Mackerraspau...@samba.org
Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru

---

Changes:
2013/06/27:
* list of huge pages replaces with hashtable for better performance


So the only thing your patch description really talks about is not true 
anymore?



* spinlock removed from real mode and only protects insertion of new
huge [ages descriptors into the hashtable

2013/06/05:
* fixed compile error when CONFIG_IOMMU_API=n

2013/05/20:
* the real mode handler now searches for a huge page by gpa (used to be pte)
* the virtual mode handler prints warning if it is called twice for the same
huge page as the real mode handler is expected to fail just once - when a huge
page is not in the list yet.
* the huge page is refcounted twice - when added to the hugepage list and
when used in the virtual mode hcall handler (can be optimized but it will
make the patch less nice).

Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru
---
  arch/powerpc/include/asm/kvm_host.h |  25 +
  arch/powerpc/kernel/iommu.c |   6 ++-
  arch/powerpc/kvm/book3s_64_vio.c| 104 +---
  arch/powerpc/kvm/book3s_64_vio_hv.c |  21 ++--
  4 files changed, 146 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 53e61b2..a7508cf 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
  #includelinux/kvm_para.h
  #includelinux/list.h
  #includelinux/atomic.h
+#includelinux/hashtable.h
  #includeasm/kvm_asm.h
  #includeasm/processor.h
  #includeasm/page.h
@@ -182,10 +183,34 @@ struct kvmppc_spapr_tce_table {
u32 window_size;
struct iommu_group *grp;/* used for IOMMU groups */
struct vfio_group *vfio_grp;/* used for IOMMU groups */
+   DECLARE_HASHTABLE(hash_tab, ilog2(64)); /* used for IOMMU groups */
+   spinlock_t hugepages_write_lock;/* used for IOMMU groups */
struct { struct { unsigned long put, indir, stuff; } rm, vm; } stat;
struct page *pages[0];
  };

+/*
+ * The KVM guest can be backed with 16MB pages.
+ * In this case, we cannot do page counting from the real mode
+ * as the compound pages are used - they are linked in a list
+ * with pointers as virtual addresses which are inaccessible
+ * in real mode.
+ *
+ * The code below keeps a 16MB pages list and uses page struct
+ * in real mode if it is already locked in RAM and inserted into
+ * the list or switches to the virtual mode where it can be
+ * handled in a usual manner.
+ */
+#define KVMPPC_SPAPR_HUGEPAGE_HASH(gpa)hash_32(gpa  24, 32)
+
+struct kvmppc_spapr_iommu_hugepage {
+   struct hlist_node hash_node;
+   unsigned long gpa;  /* Guest physical address */
+   unsigned long hpa;  /* Host physical address */
+   struct page *page;  /* page struct of the very first subpage */
+   unsigned long size; /* Huge page size (always 16MB at the moment) */
+};
+
  struct kvmppc_linear_info {
void*base_virt;
unsigned longbase_pfn;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 51678ec..e0b6eca 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -999,7 +999,8 @@ int iommu_free_tces(struct iommu_table *tbl, unsigned long 
entry,
if (!pg) {
ret = -EAGAIN;
} else if (PageCompound(pg)) {
-   ret = -EAGAIN;
+   /* Hugepages will be released at KVM exit */
+   ret = 0;
} else {
if (oldtce  TCE_PCI_WRITE)
SetPageDirty(pg);
@@ -1009,6 +1010,9 @@ int iommu_free_tces(struct iommu_table *tbl, unsigned 
long entry,
struct page *pg = pfn_to_page(oldtce  PAGE_SHIFT);
if (!pg) {
ret = -EAGAIN;
+   } else if (PageCompound(pg)) {
+   /* Hugepages will be released at KVM exit */
+   ret = 0;
} 

Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-09 Thread Alexey Kardashevskiy
On 07/10/2013 03:32 AM, Alexander Graf wrote:
 On 07/06/2013 05:07 PM, Alexey Kardashevskiy wrote:
 This adds special support for huge pages (16MB).  The reference
 counting cannot be easily done for such pages in real mode (when
 MMU is off) so we added a list of huge pages.  It is populated in
 virtual mode and get_page is called just once per a huge page.
 Real mode handlers check if the requested page is huge and in the list,
 then no reference counting is done, otherwise an exit to virtual mode
 happens.  The list is released at KVM exit.  At the moment the fastest
 card available for tests uses up to 9 huge pages so walking through this
 list is not very expensive.  However this can change and we may want
 to optimize this.

 Signed-off-by: Paul Mackerraspau...@samba.org
 Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru

 ---

 Changes:
 2013/06/27:
 * list of huge pages replaces with hashtable for better performance
 
 So the only thing your patch description really talks about is not true
 anymore?
 
 * spinlock removed from real mode and only protects insertion of new
 huge [ages descriptors into the hashtable

 2013/06/05:
 * fixed compile error when CONFIG_IOMMU_API=n

 2013/05/20:
 * the real mode handler now searches for a huge page by gpa (used to be pte)
 * the virtual mode handler prints warning if it is called twice for the same
 huge page as the real mode handler is expected to fail just once - when a
 huge
 page is not in the list yet.
 * the huge page is refcounted twice - when added to the hugepage list and
 when used in the virtual mode hcall handler (can be optimized but it will
 make the patch less nice).

 Signed-off-by: Alexey Kardashevskiya...@ozlabs.ru
 ---
   arch/powerpc/include/asm/kvm_host.h |  25 +
   arch/powerpc/kernel/iommu.c |   6 ++-
   arch/powerpc/kvm/book3s_64_vio.c| 104
 +---
   arch/powerpc/kvm/book3s_64_vio_hv.c |  21 ++--
   4 files changed, 146 insertions(+), 10 deletions(-)

 diff --git a/arch/powerpc/include/asm/kvm_host.h
 b/arch/powerpc/include/asm/kvm_host.h
 index 53e61b2..a7508cf 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -30,6 +30,7 @@
   #includelinux/kvm_para.h
   #includelinux/list.h
   #includelinux/atomic.h
 +#includelinux/hashtable.h
   #includeasm/kvm_asm.h
   #includeasm/processor.h
   #includeasm/page.h
 @@ -182,10 +183,34 @@ struct kvmppc_spapr_tce_table {
   u32 window_size;
   struct iommu_group *grp;/* used for IOMMU groups */
   struct vfio_group *vfio_grp;/* used for IOMMU groups */
 +DECLARE_HASHTABLE(hash_tab, ilog2(64));/* used for IOMMU groups */
 +spinlock_t hugepages_write_lock;/* used for IOMMU groups */
   struct { struct { unsigned long put, indir, stuff; } rm, vm; } stat;
   struct page *pages[0];
   };

 +/*
 + * The KVM guest can be backed with 16MB pages.
 + * In this case, we cannot do page counting from the real mode
 + * as the compound pages are used - they are linked in a list
 + * with pointers as virtual addresses which are inaccessible
 + * in real mode.
 + *
 + * The code below keeps a 16MB pages list and uses page struct
 + * in real mode if it is already locked in RAM and inserted into
 + * the list or switches to the virtual mode where it can be
 + * handled in a usual manner.
 + */
 +#define KVMPPC_SPAPR_HUGEPAGE_HASH(gpa)hash_32(gpa  24, 32)
 +
 +struct kvmppc_spapr_iommu_hugepage {
 +struct hlist_node hash_node;
 +unsigned long gpa;/* Guest physical address */
 +unsigned long hpa;/* Host physical address */
 +struct page *page;/* page struct of the very first subpage */
 +unsigned long size;/* Huge page size (always 16MB at the moment) */
 +};
 +
   struct kvmppc_linear_info {
   void*base_virt;
   unsigned long base_pfn;
 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
 index 51678ec..e0b6eca 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -999,7 +999,8 @@ int iommu_free_tces(struct iommu_table *tbl, unsigned
 long entry,
   if (!pg) {
   ret = -EAGAIN;
   } else if (PageCompound(pg)) {
 -ret = -EAGAIN;
 +/* Hugepages will be released at KVM exit */
 +ret = 0;
   } else {
   if (oldtce  TCE_PCI_WRITE)
   SetPageDirty(pg);
 @@ -1009,6 +1010,9 @@ int iommu_free_tces(struct iommu_table *tbl,
 unsigned long entry,
   struct page *pg = pfn_to_page(oldtce  PAGE_SHIFT);
   if (!pg) {
   ret = -EAGAIN;
 +} else if (PageCompound(pg)) {
 +/* Hugepages will be released at KVM exit */
 +ret = 0;
   } else {
   if (oldtce  TCE_PCI_WRITE)
   SetPageDirty(pg);
 diff --git 

[PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-07-06 Thread Alexey Kardashevskiy
This adds special support for huge pages (16MB).  The reference
counting cannot be easily done for such pages in real mode (when
MMU is off) so we added a list of huge pages.  It is populated in
virtual mode and get_page is called just once per a huge page.
Real mode handlers check if the requested page is huge and in the list,
then no reference counting is done, otherwise an exit to virtual mode
happens.  The list is released at KVM exit.  At the moment the fastest
card available for tests uses up to 9 huge pages so walking through this
list is not very expensive.  However this can change and we may want
to optimize this.

Signed-off-by: Paul Mackerras pau...@samba.org
Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru

---

Changes:
2013/06/27:
* list of huge pages replaces with hashtable for better performance
* spinlock removed from real mode and only protects insertion of new
huge [ages descriptors into the hashtable

2013/06/05:
* fixed compile error when CONFIG_IOMMU_API=n

2013/05/20:
* the real mode handler now searches for a huge page by gpa (used to be pte)
* the virtual mode handler prints warning if it is called twice for the same
huge page as the real mode handler is expected to fail just once - when a huge
page is not in the list yet.
* the huge page is refcounted twice - when added to the hugepage list and
when used in the virtual mode hcall handler (can be optimized but it will
make the patch less nice).

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
 arch/powerpc/include/asm/kvm_host.h |  25 +
 arch/powerpc/kernel/iommu.c |   6 ++-
 arch/powerpc/kvm/book3s_64_vio.c| 104 +---
 arch/powerpc/kvm/book3s_64_vio_hv.c |  21 ++--
 4 files changed, 146 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 53e61b2..a7508cf 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #include linux/kvm_para.h
 #include linux/list.h
 #include linux/atomic.h
+#include linux/hashtable.h
 #include asm/kvm_asm.h
 #include asm/processor.h
 #include asm/page.h
@@ -182,10 +183,34 @@ struct kvmppc_spapr_tce_table {
u32 window_size;
struct iommu_group *grp;/* used for IOMMU groups */
struct vfio_group *vfio_grp;/* used for IOMMU groups */
+   DECLARE_HASHTABLE(hash_tab, ilog2(64)); /* used for IOMMU groups */
+   spinlock_t hugepages_write_lock;/* used for IOMMU groups */
struct { struct { unsigned long put, indir, stuff; } rm, vm; } stat;
struct page *pages[0];
 };
 
+/*
+ * The KVM guest can be backed with 16MB pages.
+ * In this case, we cannot do page counting from the real mode
+ * as the compound pages are used - they are linked in a list
+ * with pointers as virtual addresses which are inaccessible
+ * in real mode.
+ *
+ * The code below keeps a 16MB pages list and uses page struct
+ * in real mode if it is already locked in RAM and inserted into
+ * the list or switches to the virtual mode where it can be
+ * handled in a usual manner.
+ */
+#define KVMPPC_SPAPR_HUGEPAGE_HASH(gpa)hash_32(gpa  24, 32)
+
+struct kvmppc_spapr_iommu_hugepage {
+   struct hlist_node hash_node;
+   unsigned long gpa;  /* Guest physical address */
+   unsigned long hpa;  /* Host physical address */
+   struct page *page;  /* page struct of the very first subpage */
+   unsigned long size; /* Huge page size (always 16MB at the moment) */
+};
+
 struct kvmppc_linear_info {
void*base_virt;
unsigned longbase_pfn;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 51678ec..e0b6eca 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -999,7 +999,8 @@ int iommu_free_tces(struct iommu_table *tbl, unsigned long 
entry,
if (!pg) {
ret = -EAGAIN;
} else if (PageCompound(pg)) {
-   ret = -EAGAIN;
+   /* Hugepages will be released at KVM exit */
+   ret = 0;
} else {
if (oldtce  TCE_PCI_WRITE)
SetPageDirty(pg);
@@ -1009,6 +1010,9 @@ int iommu_free_tces(struct iommu_table *tbl, unsigned 
long entry,
struct page *pg = pfn_to_page(oldtce  PAGE_SHIFT);
if (!pg) {
ret = -EAGAIN;
+   } else if (PageCompound(pg)) {
+   /* Hugepages will be released at KVM exit */
+   ret = 0;
} else {
if (oldtce  TCE_PCI_WRITE)
SetPageDirty(pg);
diff --git 

Re: [PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-06-27 Thread Scott Wood

On 06/27/2013 12:02:36 AM, Alexey Kardashevskiy wrote:

+/*
+ * The KVM guest can be backed with 16MB pages.
+ * In this case, we cannot do page counting from the real mode
+ * as the compound pages are used - they are linked in a list
+ * with pointers as virtual addresses which are inaccessible
+ * in real mode.
+ *
+ * The code below keeps a 16MB pages list and uses page struct
+ * in real mode if it is already locked in RAM and inserted into
+ * the list or switches to the virtual mode where it can be
+ * handled in a usual manner.
+ */
+#define KVMPPC_HUGEPAGE_HASH(gpa)  hash_32(gpa  24, 32)
+
+struct kvmppc_iommu_hugepage {
+   struct hlist_node hash_node;
+   unsigned long gpa;  /* Guest physical address */
+   unsigned long hpa;  /* Host physical address */
+	struct page *page;	/* page struct of the very first  
subpage */
+	unsigned long size;	/* Huge page size (always 16MB at the  
moment) */

+};


Shouldn't this be namespaced to something like book3s or spapr?

-Scott
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 8/8] KVM: PPC: Add hugepage support for IOMMU in-kernel handling

2013-06-26 Thread Alexey Kardashevskiy
This adds special support for huge pages (16MB).  The reference
counting cannot be easily done for such pages in real mode (when
MMU is off) so we added a list of huge pages.  It is populated in
virtual mode and get_page is called just once per a huge page.
Real mode handlers check if the requested page is huge and in the list,
then no reference counting is done, otherwise an exit to virtual mode
happens.  The list is released at KVM exit.  At the moment the fastest
card available for tests uses up to 9 huge pages so walking through this
list is not very expensive.  However this can change and we may want
to optimize this.

Signed-off-by: Paul Mackerras pau...@samba.org
Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru

---

Changes:
2013/06/27:
* list of huge pages replaces with hashtable for better performance
* spinlock removed from real mode and only protects insertion of new
huge [ages descriptors into the hashtable

2013/06/05:
* fixed compile error when CONFIG_IOMMU_API=n

2013/05/20:
* the real mode handler now searches for a huge page by gpa (used to be pte)
* the virtual mode handler prints warning if it is called twice for the same
huge page as the real mode handler is expected to fail just once - when a huge
page is not in the list yet.
* the huge page is refcounted twice - when added to the hugepage list and
when used in the virtual mode hcall handler (can be optimized but it will
make the patch less nice).
---
 arch/powerpc/include/asm/kvm_host.h |   25 +
 arch/powerpc/kvm/book3s_64_vio.c|   95 +--
 arch/powerpc/kvm/book3s_64_vio_hv.c |   24 +++--
 3 files changed, 138 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 716ab18..0ad6189 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #include linux/kvm_para.h
 #include linux/list.h
 #include linux/atomic.h
+#include linux/hashtable.h
 #include asm/kvm_asm.h
 #include asm/processor.h
 #include asm/page.h
@@ -182,9 +183,33 @@ struct kvmppc_spapr_tce_table {
u32 window_size;
struct iommu_group *grp;/* used for IOMMU groups */
struct file *vfio_filp; /* used for IOMMU groups */
+   DECLARE_HASHTABLE(hash_tab, ilog2(64)); /* used for IOMMU groups */
+   spinlock_t hugepages_write_lock;/* used for IOMMU groups */
struct page *pages[0];
 };
 
+/*
+ * The KVM guest can be backed with 16MB pages.
+ * In this case, we cannot do page counting from the real mode
+ * as the compound pages are used - they are linked in a list
+ * with pointers as virtual addresses which are inaccessible
+ * in real mode.
+ *
+ * The code below keeps a 16MB pages list and uses page struct
+ * in real mode if it is already locked in RAM and inserted into
+ * the list or switches to the virtual mode where it can be
+ * handled in a usual manner.
+ */
+#define KVMPPC_HUGEPAGE_HASH(gpa)  hash_32(gpa  24, 32)
+
+struct kvmppc_iommu_hugepage {
+   struct hlist_node hash_node;
+   unsigned long gpa;  /* Guest physical address */
+   unsigned long hpa;  /* Host physical address */
+   struct page *page;  /* page struct of the very first subpage */
+   unsigned long size; /* Huge page size (always 16MB at the moment) */
+};
+
 struct kvmppc_linear_info {
void*base_virt;
unsigned longbase_pfn;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index a5d0195..6cedfe9 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -47,6 +47,78 @@
 #define TCES_PER_PAGE  (PAGE_SIZE / sizeof(u64))
 #define ERROR_ADDR  ((void *)~(unsigned long)0x0)
 
+#ifdef CONFIG_IOMMU_API
+/* Adds a new huge page descriptor to the hashtable */
+static long kvmppc_iommu_hugepage_try_add(
+   struct kvmppc_spapr_tce_table *tt,
+   pte_t pte, unsigned long hva, unsigned long gpa,
+   unsigned long pg_size)
+{
+   long ret = 0;
+   struct kvmppc_iommu_hugepage *hp;
+   struct page *pg;
+   unsigned key = KVMPPC_HUGEPAGE_HASH(gpa);
+
+   spin_lock(tt-hugepages_write_lock);
+   hash_for_each_possible_rcu(tt-hash_tab, hp, hash_node, key) {
+   if (KVMPPC_HUGEPAGE_HASH(hp-gpa) != key)
+   continue;
+   if ((gpa  hp-gpa) || (gpa = hp-gpa + hp-size))
+   continue;
+   goto unlock_exit;
+   }
+
+   hva = hva  ~(pg_size - 1);
+   ret = get_user_pages_fast(hva, 1, true/*write*/, pg);
+   if ((ret != 1) || !pg) {
+   ret = -EFAULT;
+   goto unlock_exit;
+   }
+   ret = 0;
+
+   hp = kzalloc(sizeof(*hp), GFP_KERNEL);
+   if (!hp) {
+   ret = -ENOMEM;
+   goto unlock_exit;
+   }
+
+   hp-page = pg;
+