On 11.05.2011, at 12:46, Paul Mackerras wrote:

> From: David Gibson <d...@au1.ibm.com>
> 
> This improves I/O performance for guests using the PAPR paravirtualization
> interface by making the H_PUT_TCE hcall faster, by implementing it in
> real mode.  H_PUT_TCE is used for updating virtual IOMMU tables, and is
> used both for virtual I/O and for real I/O in the PAPR interface.
> 
> Since this moves the IOMMU tables into the kernel, we define a new
> KVM_CREATE_SPAPR_TCE ioctl to allow qemu to create the tables.
> The ioctl returns a file descriptor which can be used to mmap the
> newly created table.
> 
> Signed-off-by: Paul Mackerras <pau...@samba.org>
> ---
> arch/powerpc/include/asm/kvm.h           |    9 +++
> arch/powerpc/include/asm/kvm_book3s_64.h |    2 +
> arch/powerpc/include/asm/kvm_host.h      |    9 +++
> arch/powerpc/include/asm/kvm_ppc.h       |    2 +
> arch/powerpc/kvm/Makefile                |    3 +-
> arch/powerpc/kvm/book3s_64_vio_hv.c      |   73 +++++++++++++++++++
> arch/powerpc/kvm/book3s_hv.c             |  116 +++++++++++++++++++++++++++++-
> arch/powerpc/kvm/book3s_hv_rmhandlers.S  |    2 +-
> arch/powerpc/kvm/powerpc.c               |   18 +++++
> include/linux/kvm.h                      |    5 ++

This one definitely needs documentation :).

> 10 files changed, 236 insertions(+), 3 deletions(-)
> create mode 100644 arch/powerpc/kvm/book3s_64_vio_hv.c
> 
> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
> index 18ea696..a9e641b 100644
> --- a/arch/powerpc/include/asm/kvm.h
> +++ b/arch/powerpc/include/asm/kvm.h
> @@ -22,6 +22,9 @@
> 
> #include <linux/types.h>
> 
> +/* Select powerpc specific features in <linux/kvm.h> */
> +#define __KVM_HAVE_SPAPR_TCE
> +
> struct kvm_regs {
>       __u64 pc;
>       __u64 cr;
> @@ -88,4 +91,10 @@ struct kvm_guest_debug_arch {
> #define KVM_INTERRUPT_UNSET   -2U
> #define KVM_INTERRUPT_SET_LEVEL       -3U
> 
> +/* for KVM_CAP_SPAPR_TCE */
> +struct kvm_create_spapr_tce {
> +     __u64 liobn;
> +     __u32 window_size;
> +};
> +
> #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
> b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 4cadd61..e1a096b 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -25,4 +25,6 @@ static inline struct kvmppc_book3s_shadow_vcpu 
> *to_svcpu(struct kvm_vcpu *vcpu)
>       return &get_paca()->shadow_vcpu;
> }
> 
> +#define SPAPR_TCE_SHIFT              12
> +
> #endif /* __ASM_KVM_BOOK3S_64_H__ */
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index af6703e..cda183e 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -144,6 +144,14 @@ struct kvmppc_pginfo {
>       atomic_t refcnt;
> };
> 
> +struct kvmppc_spapr_tce_table {
> +     struct list_head list;
> +     struct kvm *kvm;
> +     u64 liobn;
> +     u32 window_size;
> +     struct page *pages[0];
> +};
> +
> struct kvm_arch {
>       unsigned long hpt_virt;
>       unsigned long ram_npages;
> @@ -157,6 +165,7 @@ struct kvm_arch {
>       unsigned long host_sdr1;
>       int tlbie_lock;
>       unsigned short last_vcpu[NR_CPUS];
> +     struct list_head spapr_tce_tables;
> };
> 
> struct kvmppc_pte {
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
> b/arch/powerpc/include/asm/kvm_ppc.h
> index b4ee11a..de683fa 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -117,6 +117,8 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
> extern void kvmppc_map_vrma(struct kvm *kvm,
>                           struct kvm_userspace_memory_region *mem);
> extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
> +extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> +                             struct kvm_create_spapr_tce *args);
> extern int kvmppc_core_init_vm(struct kvm *kvm);
> extern void kvmppc_core_destroy_vm(struct kvm *kvm);
> extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
> index 37c1a60..8ba062f 100644
> --- a/arch/powerpc/kvm/Makefile
> +++ b/arch/powerpc/kvm/Makefile
> @@ -59,7 +59,8 @@ kvm-book3s_64_hv-objs := \
>       book3s.o \
>       book3s_hv.o \
>       book3s_hv_interrupts.o \
> -     book3s_64_mmu_hv.o
> +     book3s_64_mmu_hv.o \
> +     book3s_64_vio_hv.o
> kvm-objs-$(CONFIG_KVM_BOOK3S_64_HV) := $(kvm-book3s_64_hv-objs)
> 
> kvm-book3s_32-objs := \
> diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c 
> b/arch/powerpc/kvm/book3s_64_vio_hv.c
> new file mode 100644
> index 0000000..ea0f8c5
> --- /dev/null
> +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
> @@ -0,0 +1,73 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
> + *
> + * Copyright 2010 Paul Mackerras, IBM Corp. <pau...@au1.ibm.com>
> + * Copyright 2011 David Gibson, IBM Corporation <d...@au1.ibm.com>
> + */
> +
> +#include <linux/types.h>
> +#include <linux/string.h>
> +#include <linux/kvm.h>
> +#include <linux/kvm_host.h>
> +#include <linux/highmem.h>
> +#include <linux/gfp.h>
> +#include <linux/slab.h>
> +#include <linux/hugetlb.h>
> +#include <linux/list.h>
> +
> +#include <asm/tlbflush.h>
> +#include <asm/kvm_ppc.h>
> +#include <asm/kvm_book3s.h>
> +#include <asm/mmu-hash64.h>
> +#include <asm/hvcall.h>
> +#include <asm/synch.h>
> +#include <asm/ppc-opcode.h>
> +#include <asm/kvm_host.h>
> +#include <asm/udbg.h>
> +
> +#define TCES_PER_PAGE        (PAGE_SIZE / sizeof(u64))
> +

It would be great to somehow mark code that runs in real mode as such - either 
by an attribute in the function header or by a simple comment.

> +long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
> +                   unsigned long ioba, unsigned long tce)
> +{
> +     struct kvm *kvm = vcpu->kvm;
> +     struct kvmppc_spapr_tce_table *stt;
> +
> +     /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
> +     /*          liobn, ioba, tce); */
> +
> +     list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
> +             if (stt->liobn == liobn) {
> +                     unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
> +                     struct page *page;
> +                     u64 *tbl;
> +
> +                     /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p  
> window_size=0x%x\n", */
> +                     /*          liobn, stt, stt->window_size); */
> +                     if (ioba >= stt->window_size)
> +                             return H_PARAMETER;
> +
> +                     page = stt->pages[idx / TCES_PER_PAGE];
> +                     tbl = (u64 *)page_address(page);
> +
> +                     /* FIXME: Need to validate the TCE itself */
> +                     /* udbg_printf("tce @ %p\n", &tbl[idx % 
> TCES_PER_PAGE]); */
> +                     tbl[idx % TCES_PER_PAGE] = tce;
> +                     return H_SUCCESS;
> +             }
> +     }
> +
> +     /* Didn't find the liobn, punt it to userspace */
> +     return H_TOO_HARD;
> +}
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 377a35a..eed2c10 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -506,6 +506,116 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct 
> kvm_vcpu *vcpu)
>       return r;
> }
> 
> +static long kvmppc_stt_npages(unsigned long window_size)
> +{
> +     return ALIGN((window_size >> SPAPR_TCE_SHIFT)
> +                  * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
> +}
> +
> +static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
> +{
> +     struct kvm *kvm = stt->kvm;
> +     int i;
> +
> +     mutex_lock(&kvm->lock);
> +     list_del(&stt->list);
> +     for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
> +             __free_page(stt->pages[i]);
> +     kfree(stt);
> +     mutex_unlock(&kvm->lock);
> +
> +     kvm_put_kvm(kvm);
> +}
> +
> +static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault 
> *vmf)
> +{
> +     struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
> +     struct page *page;
> +
> +     if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
> +             return VM_FAULT_SIGBUS;
> +
> +     page = stt->pages[vmf->pgoff];
> +     get_page(page);
> +     vmf->page = page;
> +     return 0;
> +}
> +
> +static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
> +     .fault = kvm_spapr_tce_fault,
> +};
> +
> +static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +     vma->vm_ops = &kvm_spapr_tce_vm_ops;
> +     return 0;
> +}
> +
> +static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
> +{
> +     struct kvmppc_spapr_tce_table *stt = filp->private_data;
> +
> +     release_spapr_tce_table(stt);
> +     return 0;
> +}
> +
> +static struct file_operations kvm_spapr_tce_fops = {
> +     .mmap           = kvm_spapr_tce_mmap,
> +     .release        = kvm_spapr_tce_release,
> +};
> +
> +long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> +                                struct kvm_create_spapr_tce *args)
> +{
> +     struct kvmppc_spapr_tce_table *stt = NULL;
> +     long npages;
> +     int ret = -ENOMEM;
> +     int i;
> +
> +     /* Check this LIOBN hasn't been previously allocated */
> +     list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
> +             if (stt->liobn == args->liobn)
> +                     return -EBUSY;
> +     }
> +
> +     npages = kvmppc_stt_npages(args->window_size);
> +
> +     stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
> +                   GFP_KERNEL);
> +     if (!stt)
> +             goto fail;
> +
> +     stt->liobn = args->liobn;
> +     stt->window_size = args->window_size;
> +     stt->kvm = kvm;
> +
> +     for (i = 0; i < npages; i++) {
> +             stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +             if (!stt->pages[i])
> +                     goto fail;
> +     }
> +
> +     kvm_get_kvm(kvm);
> +
> +     mutex_lock(&kvm->lock);
> +     list_add(&stt->list, &kvm->arch.spapr_tce_tables);
> +
> +     mutex_unlock(&kvm->lock);
> +
> +     return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> +                             stt, O_RDONLY);
> +
> +fail:
> +     if (stt) {
> +             for (i = 0; i < npages; i++)
> +                     if (stt->pages[i])
> +                             __free_page(stt->pages[i]);
> +
> +             kfree(stt);
> +     }
> +     return ret;
> +}
> +
> int kvmppc_core_prepare_memory_region(struct kvm *kvm,
>                               struct kvm_userspace_memory_region *mem)
> {
> @@ -527,13 +637,17 @@ int kvmppc_core_init_vm(struct kvm *kvm)
> 
>       /* Allocate hashed page table */
>       r = kvmppc_alloc_hpt(kvm);
> +     if (r)
> +             return r;
> 
> -     return r;
> +     INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
> +     return 0;
> }
> 
> void kvmppc_core_destroy_vm(struct kvm *kvm)
> {
>       kvmppc_free_hpt(kvm);
> +     WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
> }
> 
> /* These are stubs for now */
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index e8a8f3c..95f6386 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -722,7 +722,7 @@ hcall_real_table:
>       .long   0               /* 0x14 - H_CLEAR_REF */
>       .long   .kvmppc_h_protect - hcall_real_table
>       .long   0               /* 0x1c - H_GET_TCE */
> -     .long   0               /* 0x20 - H_SET_TCE */
> +     .long   .kvmppc_h_put_tce - hcall_real_table
>       .long   0               /* 0x24 - H_SET_SPRG0 */
>       .long   .kvmppc_h_set_dabr - hcall_real_table
>       .long   0               /* 0x2c */
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 7bfe413..10f777a 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -196,6 +196,11 @@ int kvm_dev_ioctl_check_extension(long ext)
>               r = KVM_COALESCED_MMIO_PAGE_OFFSET;
>               break;
> #endif
> +#ifdef CONFIG_KVM_BOOK3S_64_HV
> +     case KVM_CAP_SPAPR_TCE:
> +             r = 1;
> +             break;
> +#endif
>       default:
>               r = 0;
>               break;
> @@ -628,6 +633,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
> 
>               break;
>       }
> +#ifdef CONFIG_KVM_BOOK3S_64_HV
> +     case KVM_CREATE_SPAPR_TCE: {
> +             struct kvm_create_spapr_tce create_tce;
> +             struct kvm *kvm = filp->private_data;
> +
> +             r = -EFAULT;
> +             if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
> +                     goto out;
> +             r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
> +             goto out;
> +     }

I'm not sure I fully understand how this is supposed to work. If the tables are 
kept inside the kernel, how does userspace get to know where to DMA to?


Alex

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to