from:"Wu, Feng"



> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org [mailto:linux-kernel-
> ow...@vger.kernel.org] On Behalf Of Yang Zhang
> Sent: Monday, December 21, 2015 10:06 AM
> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> rkrc...@redhat.com
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
> 
> On 2015/12/21 9:50, Wu, Feng wrote:
> >
> >
> >> -Original Message-
> >> From: Yang Zhang [mailto:yang.zhang...@gmail.com]
> >> Sent: Monday, December 21, 2015 9:46 AM
> >> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> >> rkrc...@redhat.com
> >> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> >> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> >> priority interrupts
> >>
> >> On 2015/12/16 9:37, Feng Wu wrote:
> >>> Use vector-hashing to deliver lowest-priority interrupts, As an
> >>> example, modern Intel CPUs in server platform use this method to
> >>> handle lowest-priority interrupts.
> >>>
> >>> Signed-off-by: Feng Wu <feng...@intel.com>
> >>> ---
> >>>arch/x86/kvm/irq_comm.c | 27 ++-
> >>>arch/x86/kvm/lapic.c| 57
> >> -
> >>>arch/x86/kvm/lapic.h|  2 ++
> >>>arch/x86/kvm/x86.c  |  9 
> >>>arch/x86/kvm/x86.h  |  1 +
> >>>5 files changed, 81 insertions(+), 15 deletions(-)
> >>>
> >>>bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
> *src,
> >>>   struct kvm_lapic_irq *irq, int *r, unsigned long 
> >>> *dest_map)
> >>>{
> >>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
> >> *kvm, struct kvm_lapic *src,
> >>>   dst = map->logical_map[cid];
> >>>
> >>>   if (kvm_lowest_prio_delivery(irq)) {
> >>> - int l = -1;
> >>> - for_each_set_bit(i, , 16) {
> >>> - if (!dst[i])
> >>> - continue;
> >>> - if (l < 0)
> >>> - l = i;
> >>> - else if (kvm_apic_compare_prio(dst[i]->vcpu,
> >> dst[l]->vcpu) < 0)
> >>> - l = i;
> >>> + if (!kvm_vector_hashing_enabled()) {
> >>> + int l = -1;
> >>> + for_each_set_bit(i, , 16) {
> >>> + if (!dst[i])
> >>> + continue;
> >>> + if (l < 0)
> >>> + l = i;
> >>> + else if (kvm_apic_compare_prio(dst[i]-
> >>> vcpu, dst[l]->vcpu) < 0)
> >>> + l = i;
> >>> + }
> >>> + bitmap = (l >= 0) ? 1 << l : 0;
> >>> + } else {
> >>> + int idx = 0;
> >>> + unsigned int dest_vcpus = 0;
> >>> +
> >>> + for_each_set_bit(i, , 16) {
> >>> + if (!dst[i]
> >> && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>
> >> It should be or(||) not and (&&).
> >
> > Oh, you are right! My negligence! Thanks for pointing this out, Yang!
> 
> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?

If the lapic is not enabled, I think we cannot recognize it as a candidate, can 
we?
Maybe Radim can confirm this, Radim, what is your option?

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts



> -Original Message-
> From: Yang Zhang [mailto:yang.zhang...@gmail.com]
> Sent: Monday, December 21, 2015 10:01 AM
> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> rkrc...@redhat.com
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> posted-interrupts
> 
> On 2015/12/21 9:55, Wu, Feng wrote:
> >
> >
> >> -Original Message-
> >> From: linux-kernel-ow...@vger.kernel.org [mailto:linux-kernel-
> >> ow...@vger.kernel.org] On Behalf Of Yang Zhang
> >> Sent: Monday, December 21, 2015 9:50 AM
> >> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> >> rkrc...@redhat.com
> >> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> >> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> >> posted-interrupts
> >>
> >> On 2015/12/16 9:37, Feng Wu wrote:
> >>> Use vector-hashing to deliver lowest-priority interrupts for
> >>> VT-d posted-interrupts.
> >>>
> >>> Signed-off-by: Feng Wu <feng...@intel.com>
> >>> ---
> >>>arch/x86/kvm/lapic.c | 67
> >> 
> >>>arch/x86/kvm/lapic.h |  2 ++
> >>>arch/x86/kvm/vmx.c   | 12 --
> >>>3 files changed, 79 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>> index e29001f..d4f2c8f 100644
> >>> --- a/arch/x86/kvm/lapic.c
> >>> +++ b/arch/x86/kvm/lapic.c
> >>> @@ -854,6 +854,73 @@ out:
> >>>}
> >>>
> >>>/*
> >>> + * This routine handles lowest-priority interrupts using vector-hashing
> >>> + * mechanism. As an example, modern Intel CPUs use this method to
> handle
> >>> + * lowest-priority interrupts.
> >>> + *
> >>> + * Here is the details about the vector-hashing mechanism:
> >>> + * 1. For lowest-priority interrupts, store all the possible destination
> >>> + *vCPUs in an array.
> >>> + * 2. Use "guest vector % max number of destination vCPUs" to find the
> right
> >>> + *destination vCPU in the array for the lowest-priority interrupt.
> >>> + */
> >>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> >>> +   struct kvm_lapic_irq *irq)
> >>> +{
> >>> + struct kvm_apic_map *map;
> >>> + struct kvm_vcpu *vcpu = NULL;
> >>> +
> >>> + if (irq->shorthand)
> >>> + return NULL;
> >>> +
> >>> + rcu_read_lock();
> >>> + map = rcu_dereference(kvm->arch.apic_map);
> >>> +
> >>> + if (!map)
> >>> + goto out;
> >>> +
> >>> + if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> >>> + kvm_lowest_prio_delivery(irq)) {
> >>> + u16 cid;
> >>> + int i, idx = 0;
> >>> + unsigned long bitmap = 1;
> >>> + unsigned int dest_vcpus = 0;
> >>> + struct kvm_lapic **dst = NULL;
> >>> +
> >>> +
> >>> + if (!kvm_apic_logical_map_valid(map))
> >>> + goto out;
> >>> +
> >>> + apic_logical_id(map, irq->dest_id, , (u16 *));
> >>> +
> >>> + if (cid >= ARRAY_SIZE(map->logical_map))
> >>> + goto out;
> >>> +
> >>> + dst = map->logical_map[cid];
> >>> +
> >>> + for_each_set_bit(i, , 16) {
> >>> + if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>> + clear_bit(i, );
> >>> + continue;
> >>> + }
> >>> + }
> >>> +
> >>> + dest_vcpus = hweight16(bitmap);
> >>> +
> >>> + if (dest_vcpus != 0) {
> >>> + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> >>> +  , 16);
> >>> + vcpu = dst[idx-1]->vcpu;
> >>> + }
> >>> + }
> >>> +
> >>> +out:
> >>> + rcu_read_unlock();
> &

RE: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

Hi Radim/Paolo,

> -Original Message-
> From: Yang Zhang [mailto:yang.zhang...@gmail.com]
> Sent: Tuesday, December 22, 2015 3:14 PM
> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> rkrc...@redhat.com
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org; Jiang Liu
> (jiang@linux.intel.com) <jiang@linux.intel.com>
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
> 
> On 2015/12/22 14:59, Wu, Feng wrote:
> >
> >
> >> -Original Message-
> >> From: Yang Zhang [mailto:yang.zhang...@gmail.com]
> >> Sent: Tuesday, December 22, 2015 2:49 PM
> >> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> >> rkrc...@redhat.com
> >> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org; Jiang Liu
> >> (jiang@linux.intel.com) <jiang@linux.intel.com>
> >> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> >> priority interrupts
> >>
> >>>>>>
> >>>>>> On 2015/12/16 9:37, Feng Wu wrote:
> >>>>>>> Use vector-hashing to deliver lowest-priority interrupts, As an
> >>>>>>> example, modern Intel CPUs in server platform use this method to
> >>>>>>> handle lowest-priority interrupts.
> >>>>>>>
> >>>>>>> Signed-off-by: Feng Wu <feng...@intel.com>
> >>>>>>> ---
> >>>>>>>  arch/x86/kvm/irq_comm.c | 27 ++-
> >>>>>>>  arch/x86/kvm/lapic.c| 57
> >>>>>> -
> >>>>>>>  arch/x86/kvm/lapic.h|  2 ++
> >>>>>>>  arch/x86/kvm/x86.c  |  9 
> >>>>>>>  arch/x86/kvm/x86.h  |  1 +
> >>>>>>>  5 files changed, 81 insertions(+), 15 deletions(-)
> >>>>>>>
> >>>>>>>  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct
> kvm_lapic
> >>>> *src,
> >>>>>>>   struct kvm_lapic_irq *irq, int *r, unsigned long
> >> *dest_map)
> >>>>>>>  {
> >>>>>>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct
> kvm
> >>>>>> *kvm, struct kvm_lapic *src,
> >>>>>>>   dst = map->logical_map[cid];
> >>>>>>>
> >>>>>>>   if (kvm_lowest_prio_delivery(irq)) {
> >>>>>>> - int l = -1;
> >>>>>>> - for_each_set_bit(i, , 16) {
> >>>>>>> - if (!dst[i])
> >>>>>>> - continue;
> >>>>>>> - if (l < 0)
> >>>>>>> - l = i;
> >>>>>>> - else if (kvm_apic_compare_prio(dst[i]-
> >vcpu,
> >>>>>> dst[l]->vcpu) < 0)
> >>>>>>> - l = i;
> >>>>>>> + if (!kvm_vector_hashing_enabled()) {
> >>>>>>> + int l = -1;
> >>>>>>> + for_each_set_bit(i, , 16) {
> >>>>>>> + if (!dst[i])
> >>>>>>> + continue;
> >>>>>>> + if (l < 0)
> >>>>>>> + l = i;
> >>>>>>> + else if
> (kvm_apic_compare_prio(dst[i]-
> >>>>>>> vcpu, dst[l]->vcpu) < 0)
> >>>>>>> + l = i;
> >>>>>>> + }
> >>>>>>> + bitmap = (l >= 0) ? 1 << l : 0;
> >>>>>>> + } else {
> >>>>>>> + int idx = 0;
> >>>>>>> + unsigned int dest_vcpus = 0;
> >>>>>>> +
> >>>>>>> + for_each_set_bit(i, , 16) {
> >>>>>>> + if (!dst[i]
> >>>>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>>>>>
> >>>>>> It should be or(||) not and (&&).
> >>>>>
> >>>>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
> >>>>
> >>>> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?
> >>>
> >>> If the lapic is not enabled, I think we cannot recognize it as a 
> >>> candidate, can
> >> we?
> >>> Maybe Radim can confirm this, Radim, what is your option?
> >>
> >> Lapic can be disable by hw or sw. Here we only need to check the hw is
> >> enough which is already covered while injecting the interrupt into
> >> guest. I remember we(Glab, Macelo and me) have discussed it several ago,
> >> but i cannot find the mail thread.
> >
> > But if the lapic is disabled by software, we cannot still inject interrupts 
> > to
> > it, can we?
> 
> Yes, We cannot inject the normal interrupt. But this already covered by
> current logic and add a check here seems meaningless. Conversely, it may
> do bad thing..
> 

Let's wait for Radim/Paolo's opinions about this.

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts



> -Original Message-
> From: Yang Zhang [mailto:yang.zhang...@gmail.com]
> Sent: Tuesday, December 22, 2015 2:49 PM
> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> rkrc...@redhat.com
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org; Jiang Liu
> (jiang@linux.intel.com) <jiang@linux.intel.com>
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
> 
> >>>>
> >>>> On 2015/12/16 9:37, Feng Wu wrote:
> >>>>> Use vector-hashing to deliver lowest-priority interrupts, As an
> >>>>> example, modern Intel CPUs in server platform use this method to
> >>>>> handle lowest-priority interrupts.
> >>>>>
> >>>>> Signed-off-by: Feng Wu <feng...@intel.com>
> >>>>> ---
> >>>>> arch/x86/kvm/irq_comm.c | 27 ++-
> >>>>> arch/x86/kvm/lapic.c| 57
> >>>> -
> >>>>> arch/x86/kvm/lapic.h|  2 ++
> >>>>> arch/x86/kvm/x86.c  |  9 
> >>>>> arch/x86/kvm/x86.h  |  1 +
> >>>>> 5 files changed, 81 insertions(+), 15 deletions(-)
> >>>>>
> >>>>> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
> >> *src,
> >>>>> struct kvm_lapic_irq *irq, int *r, unsigned long
> *dest_map)
> >>>>> {
> >>>>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
> >>>> *kvm, struct kvm_lapic *src,
> >>>>> dst = map->logical_map[cid];
> >>>>>
> >>>>> if (kvm_lowest_prio_delivery(irq)) {
> >>>>> -   int l = -1;
> >>>>> -   for_each_set_bit(i, , 16) {
> >>>>> -   if (!dst[i])
> >>>>> -   continue;
> >>>>> -   if (l < 0)
> >>>>> -   l = i;
> >>>>> -   else if 
> >>>>> (kvm_apic_compare_prio(dst[i]->vcpu,
> >>>> dst[l]->vcpu) < 0)
> >>>>> -   l = i;
> >>>>> +   if (!kvm_vector_hashing_enabled()) {
> >>>>> +   int l = -1;
> >>>>> +   for_each_set_bit(i, , 16) {
> >>>>> +   if (!dst[i])
> >>>>> +   continue;
> >>>>> +   if (l < 0)
> >>>>> +   l = i;
> >>>>> +   else if 
> >>>>> (kvm_apic_compare_prio(dst[i]-
> >>>>> vcpu, dst[l]->vcpu) < 0)
> >>>>> +   l = i;
> >>>>> +   }
> >>>>> +   bitmap = (l >= 0) ? 1 << l : 0;
> >>>>> +   } else {
> >>>>> +   int idx = 0;
> >>>>> +   unsigned int dest_vcpus = 0;
> >>>>> +
> >>>>> +   for_each_set_bit(i, , 16) {
> >>>>> +   if (!dst[i]
> >>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>>>
> >>>> It should be or(||) not and (&&).
> >>>
> >>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
> >>
> >> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?
> >
> > If the lapic is not enabled, I think we cannot recognize it as a candidate, 
> > can
> we?
> > Maybe Radim can confirm this, Radim, what is your option?
> 
> Lapic can be disable by hw or sw. Here we only need to check the hw is
> enough which is already covered while injecting the interrupt into
> guest. I remember we(Glab, Macelo and me) have discussed it several ago,
> but i cannot find the mail thread.

But if the lapic is disabled by software, we cannot still inject interrupts to
it, can we?

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-12-20 Thread Wu, Feng



> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org [mailto:linux-kernel-
> ow...@vger.kernel.org] On Behalf Of Yang Zhang
> Sent: Monday, December 21, 2015 9:50 AM
> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> rkrc...@redhat.com
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> posted-interrupts
> 
> On 2015/12/16 9:37, Feng Wu wrote:
> > Use vector-hashing to deliver lowest-priority interrupts for
> > VT-d posted-interrupts.
> >
> > Signed-off-by: Feng Wu <feng...@intel.com>
> > ---
> >   arch/x86/kvm/lapic.c | 67
> 
> >   arch/x86/kvm/lapic.h |  2 ++
> >   arch/x86/kvm/vmx.c   | 12 --
> >   3 files changed, 79 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index e29001f..d4f2c8f 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -854,6 +854,73 @@ out:
> >   }
> >
> >   /*
> > + * This routine handles lowest-priority interrupts using vector-hashing
> > + * mechanism. As an example, modern Intel CPUs use this method to handle
> > + * lowest-priority interrupts.
> > + *
> > + * Here is the details about the vector-hashing mechanism:
> > + * 1. For lowest-priority interrupts, store all the possible destination
> > + *vCPUs in an array.
> > + * 2. Use "guest vector % max number of destination vCPUs" to find the 
> > right
> > + *destination vCPU in the array for the lowest-priority interrupt.
> > + */
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> > + struct kvm_lapic_irq *irq)
> > +{
> > +   struct kvm_apic_map *map;
> > +   struct kvm_vcpu *vcpu = NULL;
> > +
> > +   if (irq->shorthand)
> > +   return NULL;
> > +
> > +   rcu_read_lock();
> > +   map = rcu_dereference(kvm->arch.apic_map);
> > +
> > +   if (!map)
> > +   goto out;
> > +
> > +   if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> > +   kvm_lowest_prio_delivery(irq)) {
> > +   u16 cid;
> > +   int i, idx = 0;
> > +   unsigned long bitmap = 1;
> > +   unsigned int dest_vcpus = 0;
> > +   struct kvm_lapic **dst = NULL;
> > +
> > +
> > +   if (!kvm_apic_logical_map_valid(map))
> > +   goto out;
> > +
> > +   apic_logical_id(map, irq->dest_id, , (u16 *));
> > +
> > +   if (cid >= ARRAY_SIZE(map->logical_map))
> > +   goto out;
> > +
> > +   dst = map->logical_map[cid];
> > +
> > +   for_each_set_bit(i, , 16) {
> > +   if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> > +   clear_bit(i, );
> > +   continue;
> > +   }
> > +   }
> > +
> > +   dest_vcpus = hweight16(bitmap);
> > +
> > +   if (dest_vcpus != 0) {
> > +   idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> > +, 16);
> > +   vcpu = dst[idx-1]->vcpu;
> > +   }
> > +   }
> > +
> > +out:
> > +   rcu_read_unlock();
> > +   return vcpu;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> > +
> > +/*
> >* Add a pending IRQ into lapic.
> >* Return 1 if successfully added and 0 if discarded.
> >*/
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 6890ef0..52bffce 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> struct kvm_lapic_irq *irq,
> > struct kvm_vcpu **dest_vcpu);
> >   int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> >const unsigned long *bitmap, u32 bitmap_size);
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> > + struct kvm_lapic_irq *irq);
> >   #endif
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 5eb56ed..3f89189 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct

RE: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

2015-12-20 Thread Wu, Feng



> -Original Message-
> From: Yang Zhang [mailto:yang.zhang...@gmail.com]
> Sent: Monday, December 21, 2015 9:46 AM
> To: Wu, Feng <feng...@intel.com>; pbonz...@redhat.com;
> rkrc...@redhat.com
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
> 
> On 2015/12/16 9:37, Feng Wu wrote:
> > Use vector-hashing to deliver lowest-priority interrupts, As an
> > example, modern Intel CPUs in server platform use this method to
> > handle lowest-priority interrupts.
> >
> > Signed-off-by: Feng Wu <feng...@intel.com>
> > ---
> >   arch/x86/kvm/irq_comm.c | 27 ++-
> >   arch/x86/kvm/lapic.c| 57
> -
> >   arch/x86/kvm/lapic.h|  2 ++
> >   arch/x86/kvm/x86.c  |  9 
> >   arch/x86/kvm/x86.h  |  1 +
> >   5 files changed, 81 insertions(+), 15 deletions(-)
> >
> > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> > index 84b96d3..c8c5f61 100644
> > --- a/arch/x86/kvm/irq_comm.c
> > +++ b/arch/x86/kvm/irq_comm.c
> > @@ -32,6 +32,7 @@
> >   #include "ioapic.h"
> >
> >   #include "lapic.h"
> > +#include "x86.h"
> >
> >   static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
> >struct kvm *kvm, int irq_source_id, int level,
> > @@ -53,8 +54,10 @@ static int kvm_set_ioapic_irq(struct
> kvm_kernel_irq_routing_entry *e,
> >   int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> > struct kvm_lapic_irq *irq, unsigned long *dest_map)
> >   {
> > -   int i, r = -1;
> > +   int i, r = -1, idx = 0;
> > struct kvm_vcpu *vcpu, *lowest = NULL;
> > +   unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> > +   unsigned int dest_vcpus = 0;
> >
> > if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
> > kvm_lowest_prio_delivery(irq)) {
> > @@ -65,6 +68,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
> kvm_lapic *src,
> > if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, , dest_map))
> > return r;
> >
> > +   memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
> > +
> > kvm_for_each_vcpu(i, vcpu, kvm) {
> > if (!kvm_apic_present(vcpu))
> > continue;
> > @@ -78,13 +83,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
> kvm_lapic *src,
> > r = 0;
> > r += kvm_apic_set_irq(vcpu, irq, dest_map);
> > } else if (kvm_lapic_enabled(vcpu)) {
> > -   if (!lowest)
> > -   lowest = vcpu;
> > -   else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> > -   lowest = vcpu;
> > +   if (!kvm_vector_hashing_enabled()) {
> > +   if (!lowest)
> > +   lowest = vcpu;
> > +   else if (kvm_apic_compare_prio(vcpu, lowest) <
> 0)
> > +   lowest = vcpu;
> > +   } else {
> > +   __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
> > +   dest_vcpus++;
> > +   }
> > }
> > }
> >
> > +   if (dest_vcpus != 0) {
> > +   idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> > +dest_vcpu_bitmap, KVM_MAX_VCPUS);
> > +
> > +   lowest = kvm_get_vcpu(kvm, idx - 1);
> > +   }
> > +
> > if (lowest)
> > r = kvm_apic_set_irq(lowest, irq, dest_map);
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index ecd4ea1..e29001f 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -678,6 +678,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu,
> struct kvm_lapic *source,
> > }
> >   }
> >
> > +int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> > +  const unsigned long *bitmap, u32 bitmap_size)
> > +{
> > +   u32 mod;
> > +   int i, idx = 0;
> > +
> > +   mod = vector % dest_vcpus;
> > +
> > +   for (i = 0; i <= mod; i++) {
> > +   idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
> > +   BUG_ON(idx > bitm

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-12-14 Thread Wu, Feng



> -Original Message-
> From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
> Behalf Of Radim Krcmár
> Sent: Friday, December 11, 2015 10:38 PM
> To: Wu, Feng <feng...@intel.com>
> Cc: pbonz...@redhat.com; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> 2015-12-10 01:52+, Wu, Feng:
> >> From: Radim Krčmář [mailto:rkrc...@redhat.com]
> >> (Physical xAPIC+x2APIC mode is still somewhat reasonable and xAPIC CPUs
> >>  start with LDR=0, which means that operating system doesn't need to
> >>  utilize mixed mode, as defined by KVM, when switching to x2APIC.)
> >
> > I think you mean Physical xAPIC+Physical x2APIC mode, right? For physical
> > mode, we don't use LDR in any case, do we? So in physical mode, we only
> > use the APIC ID, that is why they can be mixed, is my understanding correct?
> 
> Yes.  (Technically, physical and logical addressing is always active in
> APIC, but xAPIC must have nonzero LDR to accept logical interrupts[1].)
> If all xAPIC LDRs are zero, KVM doesn't enter a "mixed mode" even if
> some are xAPIC and some x2APIC [2].
> 
> 1: Real LAPICs probably do not accept broadcasts on APICs where LDR=0,
>KVM LAPICs do, but lowest priority broadcast is not allowed anyway,
>so PI doesn't care.
> 
> 2: KVM allows OS-writeable APIC ID, which complicates things and real
>hardware probably doesn't allow it because of that ... we'd be saner
>with RO APIC ID, but it's not that bad.  (And no major OS does it :])
> 
> >>  the system uses cluster xAPIC, OS should set DFR before LDR, which
> >>  doesn't trigger mixed mode either.)
> >
> > Just curious, if the APIC is software disabled and it is in xAPIC mode. OS 
> > sets
> > different value for DFR for different APICs, then when OS sets LDR, KVM can
> > trigger mixed flat and cluster mode, right?
> 
> Exactly.
> APICs with zeroed LDR are ignored, so KVM will use the slow-path for
> delivery (= trigger mixed mode) at the moment the first APIC with
> different DFR is configured.

Thanks a lot for your explanation!

Thanks,
Feng

> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-12-09 Thread Wu, Feng

Hi Radim,

> -Original Message-
> From: Radim Krčmář [mailto:rkrc...@redhat.com]
> Sent: Tuesday, November 17, 2015 3:03 AM
> To: Wu, Feng <feng...@intel.com>
> Cc: pbonz...@redhat.com; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> 2015-11-09 10:46+0800, Feng Wu:
> > Use vector-hashing to handle lowest-priority interrupts for
> > posted-interrupts. As an example, modern Intel CPUs use this
> > method to handle lowest-priority interrupts.
> 
> (I don't think it's a good idea that the algorithm differs from non-PI
>  lowest priority delivery.  I'd make them both vector-hashing, which
>  would be "fun" to explain to people expecting round robin ...)
> 
> > Signed-off-by: Feng Wu <feng...@intel.com>
> > ---
> > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> > +/*
> > + * This routine handles lowest-priority interrupts using vector-hashing
> > + * mechanism. As an example, modern Intel CPUs use this method to handle
> > + * lowest-priority interrupts.
> > + *
> > + * Here is the details about the vector-hashing mechanism:
> > + * 1. For lowest-priority interrupts, store all the possible destination
> > + *vCPUs in an array.
> > + * 2. Use "guest vector % max number of destination vCPUs" to find the 
> > right
> > + *destination vCPU in the array for the lowest-priority interrupt.
> > + */
> 
> (Is Skylake i7-6700 a modern Intel CPU?
>  I didn't manage to get hashing ... all interrupts always went to the
>  lowest APIC ID in the set :/
>  Is there a simple way to verify the algorithm?)
> 
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> > + struct kvm_lapic_irq *irq)
> > +
> > +{
> > +   unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> > +   unsigned int dest_vcpus = 0;
> > +   struct kvm_vcpu *vcpu;
> > +   unsigned int i, mod, idx = 0;
> > +
> > +   vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
> > +   if (vcpu)
> > +   return vcpu;
> 
> I think the rest of this function shouldn't be implemented:
>  - Shorthands are only for IPIs and hence don't need to be handled,
>  - Lowest priority physical broadcast is not supported,
>  - Lowest priority cluster logical broadcast is not supported,
>  - No point in optimizing mixed xAPIC and x2APIC mode,

I read your comments again, and don't quite understand why we
don't need PI optimization for mixed xAPIC and x2APIC mode.

BTW, can we have mixed flat and cluster mode?

Thanks,
Feng

>  - The rest is handled by kvm_intr_vector_hashing_dest_fast().
>(Even lowest priority flat logical "broadcast".)
>  - We do the work twice when vcpu == NULL means that there is no
>matching destination.
> 
> Is there a valid case that can be resolved by going through all vcpus?
> 
> > +
> > +   memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
> > +
> > +   kvm_for_each_vcpu(i, vcpu, kvm) {
> > +   if (!kvm_apic_present(vcpu))
> > +   continue;
> > +
> > +   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> > +   irq->dest_id, irq->dest_mode))
> > +   continue;
> > +
> > +   __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
> > +   dest_vcpus++;
> > +   }
> > +
> > +   if (dest_vcpus == 0)
> > +   return NULL;
> > +
> > +   mod = irq->vector % dest_vcpus;
> > +
> > +   for (i = 0; i <= mod; i++) {
> > +   idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) +
> 1;
> > +   BUG_ON(idx >= KVM_MAX_VCPUS);
> > +   }
> > +
> > +   return kvm_get_vcpu(kvm, idx - 1);
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> > +
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > @@ -816,6 +816,63 @@ out:
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm,
> > +  struct kvm_lapic_irq *irq)
> 
> We now have three very similar functions :(
> 
>   kvm_irq_delivery_to_apic_fast
>   kvm_intr_is_single_vcpu_fast
>   kvm_intr_vector_hashing_dest_fast
> 
> By utilizing the gcc optimizer, they can be merged without introducing
> many instructions to the hot path, kvm_irq_delivery_to_apic_fast.
> (I would eventually do it, so you can save time by ignoring this.)
> 
> Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-12-09 Thread Wu, Feng



> -Original Message-
> From: Radim Krčmář [mailto:rkrc...@redhat.com]
> Sent: Wednesday, December 9, 2015 10:54 PM
> To: Wu, Feng <feng...@intel.com>
> Cc: pbonz...@redhat.com; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> 2015-12-09 08:19+, Wu, Feng:
> >> -Original Message-
> >> From: Radim Krčmář [mailto:rkrc...@redhat.com]
> >> Sent: Tuesday, November 17, 2015 3:03 AM
> >> To: Wu, Feng <feng...@intel.com>
> >> Cc: pbonz...@redhat.com; kvm@vger.kernel.org; linux-
> ker...@vger.kernel.org
> >> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> >> interrupts
> >>
> >> 2015-11-09 10:46+0800, Feng Wu:
> >> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> >> > +  struct kvm_lapic_irq *irq)
> >> > +
> >> > +{
> >> > +unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> >> > +unsigned int dest_vcpus = 0;
> >> > +struct kvm_vcpu *vcpu;
> >> > +unsigned int i, mod, idx = 0;
> >> > +
> >> > +vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
> >> > +if (vcpu)
> >> > +return vcpu;
> >>
> >> I think the rest of this function shouldn't be implemented:
> >>  - Shorthands are only for IPIs and hence don't need to be handled,
> >>  - Lowest priority physical broadcast is not supported,
> >>  - Lowest priority cluster logical broadcast is not supported,
> >>  - No point in optimizing mixed xAPIC and x2APIC mode,
> >
> > I read your comments again, and don't quite understand why we
> > don't need PI optimization for mixed xAPIC and x2APIC mode.
> 
> There shouldn't be a non-hobbyist operating system that uses mixed mode,
> so the optimization would practically be dead code as all other cases
> are handled by kvm_intr_vector_hashing_dest_fast().

Thanks a lot for your elaboration!

> 
> I think that having extra code would bring problems in the future -- we
> need to take care of it when refactoring KVM's APIC and we should also
> write a unit-test for this otherwise dead path.  I don't think that the
> benefit for guests would ever balance those efforts.
> 
> (Physical xAPIC+x2APIC mode is still somewhat reasonable and xAPIC CPUs
>  start with LDR=0, which means that operating system doesn't need to
>  utilize mixed mode, as defined by KVM, when switching to x2APIC.)

I think you mean Physical xAPIC+Physical x2APIC mode, right? For physical
mode, we don't use LDR in any case, do we? So in physical mode, we only
use the APIC ID, that is why they can be mixed, is my understanding correct?
Thanks a lot!

> 
> > BTW, can we have mixed flat and cluster mode?
> 
> Yes, KVM recognizes that mixed mode, but luckily, there are severe
> limitations.
> 
> Notes below SDM section 10.6.2.2:
>   All processors that have their APIC software enabled (using the
>   spurious vector enable/disable bit) must have their DFRs (Destination
>   Format Registers) programmed identically.

Thanks for pointing this out, good to know it!

> 
> I hope there isn't a human that would use it in good faith.
> 
> (Only NMI/SMI/INIT/SIPI are delivered in software disabled mode and if
>  the system uses cluster xAPIC, OS should set DFR before LDR, which
>  doesn't trigger mixed mode either.)

Just curious, if the APIC is software disabled and it is in xAPIC mode. OS sets
different value for DFR for different APICs, then when OS sets LDR, KVM can
trigger mixed flat and cluster mode, right?

Thanks,
Feng

RE: Is KVM support single step execution

2015-11-26 Thread Wu, Feng



> -Original Message-
> From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
> Behalf Of Paolo Bonzini
> Sent: Thursday, November 26, 2015 4:28 PM
> To: Wu, Feng <feng...@intel.com>
> Cc: kvm@vger.kernel.org; Dong, Eddie <eddie.d...@intel.com>
> Subject: Re: Is KVM support single step execution
> 
> 
> 
> On 26/11/2015 06:46, Wu, Feng wrote:
> > Hi Paolo,
> >
> > Do you know whether KVM supports single step execution? If it is,
> > could you please give me some information about it. Really appreciate
> > it!
> 
> Yes, it does.  See KVM_SET_GUEST_DEBUG documentation in
> Documentation/virtual/kvm/api.txt.

Okay，Thanks for the information!

Thanks,
Feng

> 
> Paolo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-11-25 Thread Wu, Feng



> -Original Message-
> From: Radim Krčmář [mailto:rkrc...@redhat.com]
> Sent: Wednesday, November 25, 2015 11:43 PM
> To: Paolo Bonzini <pbonz...@redhat.com>
> Cc: Wu, Feng <feng...@intel.com>; kvm@vger.kernel.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> 2015-11-25 15:38+0100, Paolo Bonzini:
> > On 25/11/2015 15:12, Radim Krcmár wrote:
> >> I think it's ok to pick any algorithm we like.  It's unlikely that
> >> software would recognize and take advantage of the hardware algorithm
> >> without adding a special treatment for KVM.
> >> (I'd vote for the simple pick-first-APIC lowest priority algorithm ...
> >>  I don't see much point in complicating lowest priority when it doesn't
> >>  deliver to lowest priority CPU anyway.)
> >
> > Vector hashing is an improvement for the common case where all vectors
> > are set to all CPUs.  Sure you can get an unlucky assignment, but it's
> > still better than pick-first-APIC.
> 
> Yeah, hashing has a valid use case, but a subtle weighting of drawbacks
> led me to prefer pick-first-APIC ...

Is it possible that pick-first-APIC policy make certain vCPU's irq workload too
heavy?

> 
> (I'd prefer to have simple code in KVM and depend on static IRQ balancing
>  in a guest to handle the distribution.
>  The guest could get the unlucky assignment anyway, so it should be
>  prepared;  and hashing just made KVM worse in that case.  Guests might
>  also configure physical x(2)APIC, where is no lowest priority.
>  And if the guest doesn't do anything with IRQs, then it might not even
>  care about the impact that our choice has.)

Do do you guys have an agreement on how to handle this? Or we can implement
the vector hashing at the current stage. then we can improve it like Radim 
mentioned
above if it is really needed? 

Thanks,
Feng

Is KVM support single step execution

2015-11-25 Thread Wu, Feng

Hi Paolo,

Do you know whether KVM supports single step execution? If it is, could you 
please give me some information about it. Really appreciate it!

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-11-24 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Tuesday, November 24, 2015 10:38 PM
> To: Radim Krcmár <rkrc...@redhat.com>; Wu, Feng <feng...@intel.com>
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> 
> 
> On 24/11/2015 15:35, Radim Krcmár wrote:
> > > Thanks for your guys' review. Yes, we can introduce a module option
> > > for it. According to Radim's comments above, we need use the
> > > same policy for PI and non-PI lowest-priority interrupts, so here is the
> > > question: for vector hashing, it is easy to apply it for both non-PI and 
> > > PI
> > > case, however, for Round-Robin, in non-PI case, the round robin counter
> > > is used and updated when the interrupt is injected to guest, but for
> > > PI case, the interrupt is injected to guest totally by hardware, software
> > > cannot control it while interrupt delivery, we can only decide the
> > > destination vCPU for the PI interrupt in the initial configuration
> > > time (guest update vMSI -> QEMU -> KVM). Do you guys have any good
> > > suggestion to do round robin for PI lowest-priority? Seems Round robin
> > > is not a good way for PI lowest-priority interrupts. Any comments
> > > are appreciated!
> >
> > It's meaningless to try dynamic algorithms with PI so if we allow both
> > lowest priority algorithms, I'd let PI handle any lowest priority only
> > with vector hashing.  (It's an ugly compromise.)
> 
> For now, I would just keep the 4.4 behavior, i.e. disable PI unless
> there is a single destination || vector hashing is enabled.  We can flip
> the switch later.

Okay, let me try to understand this clearly:
- We will have a new KVM command line parameter to indicate whether
  vector hashing is enabled.
- If it is not enabled, for PI, we can only support single destination lowest
  priority interrupts, for non-PI, we continue to use RR.
- If it is enabled, for PI and non-PI we use vector hashing for both of them.

Is this the case you have in mind? Thanks a lot!

Thanks,
Feng

> 
> Paolo

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-11-24 Thread Wu, Feng



> -Original Message-
> From: Radim Krčmář [mailto:rkrc...@redhat.com]
> Sent: Tuesday, November 24, 2015 10:32 PM
> To: Wu, Feng <feng...@intel.com>
> Cc: pbonz...@redhat.com; kvm@vger.kernel.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> 2015-11-24 01:26+, Wu, Feng:
> > "I don't think we do any vector hashing on our client parts.  This may be
> why the customer is not able to detect this on Skylake client silicon.
> > The vector hashing is micro-architectural and something we had done on
> server parts.
> >
> > If you look at the haswell server CPU spec (https://www-
> ssl.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-
> e5-v3-datasheet-vol-2.pdf)
> > In section 4.1.2, you will see an IntControl register (this is a register
> controlled/configured by BIOS) - see below.
> 
> Thank you!
> 
> > If you look at bits 6:4 in that register, you see the option we offer in
> hardware for what kind of redirection is applied to lowest priority 
> interrupts.
> > There are three options:
> > 1.  Fixed priority
> > 2.  Redirect last
> > 3.  Hash Vector
> >
> > If picking vector hash, then bits 10:8 specifies the APIC-ID bits used for 
> > the
> hashing."
> 
> The hash function just interprets a subset of vector's bits as a number
> and uses that as a starting offset in a search for an enabled APIC
> within the destination set?
> 
> For example:
> The x2APIC destination is 0x0055 (= first four even APICs in cluster
> 0), the vector is 0b1110, and bits 10:8 of IntControl are 000.
> 
> 000 means that bits 7:4 of vector are selected, thus the vector hash is
> 0b1110 = 14, so the round-robin effectively does 14 % 4 (because we only
> have 4 destinations) and delivers to the 3rd possible APIC (= ID 6)?

In my current implementation, I don't select a subset of vector's bits as
the number, instead, I use the whole vector number. For software emulation
p. o. v, do we really need to select a subset of the vector's bits as the base
number? What is your opinion? Thanks a lot!

Thank,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-11-23 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Tuesday, November 17, 2015 5:41 PM
> To: Radim Krčmář <rkrc...@redhat.com>; Wu, Feng <feng...@intel.com>
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> 
> 
> On 16/11/2015 20:03, Radim Krčmář wrote:
> > 2015-11-09 10:46+0800, Feng Wu:
> >> Use vector-hashing to handle lowest-priority interrupts for
> >> posted-interrupts. As an example, modern Intel CPUs use this
> >> method to handle lowest-priority interrupts.
> >
> > (I don't think it's a good idea that the algorithm differs from non-PI
> >  lowest priority delivery.  I'd make them both vector-hashing, which
> >  would be "fun" to explain to people expecting round robin ...)
> 
> Yup, I would make it a module option.  Thanks very much Radim for
> helping with the review.

Thanks for your guys' review. Yes, we can introduce a module option
for it. According to Radim's comments above, we need use the
same policy for PI and non-PI lowest-priority interrupts, so here is the
question: for vector hashing, it is easy to apply it for both non-PI and PI
case, however, for Round-Robin, in non-PI case, the round robin counter
is used and updated when the interrupt is injected to guest, but for
PI case, the interrupt is injected to guest totally by hardware, software
cannot control it while interrupt delivery, we can only decide the
destination vCPU for the PI interrupt in the initial configuration
time (guest update vMSI -> QEMU -> KVM). Do you guys have any good
suggestion to do round robin for PI lowest-priority? Seems Round robin
is not a good way for PI lowest-priority interrupts. Any comments
are appreciated!

Thanks,
Feng


> 
> Paolo
> 
> >> Signed-off-by: Feng Wu <feng...@intel.com>
> >> ---
> >> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> >> +/*
> >> + * This routine handles lowest-priority interrupts using vector-hashing
> >> + * mechanism. As an example, modern Intel CPUs use this method to
> handle
> >> + * lowest-priority interrupts.
> >> + *
> >> + * Here is the details about the vector-hashing mechanism:
> >> + * 1. For lowest-priority interrupts, store all the possible destination
> >> + *vCPUs in an array.
> >> + * 2. Use "guest vector % max number of destination vCPUs" to find the
> right
> >> + *destination vCPU in the array for the lowest-priority interrupt.
> >> + */
> >
> > (Is Skylake i7-6700 a modern Intel CPU?
> >  I didn't manage to get hashing ... all interrupts always went to the
> >  lowest APIC ID in the set :/
> >  Is there a simple way to verify the algorithm?)
> >
> >> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> >> +struct kvm_lapic_irq *irq)
> >> +
> >> +{
> >> +  unsigned long
> dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> >> +  unsigned int dest_vcpus = 0;
> >> +  struct kvm_vcpu *vcpu;
> >> +  unsigned int i, mod, idx = 0;
> >> +
> >> +  vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
> >> +  if (vcpu)
> >> +  return vcpu;
> >
> > I think the rest of this function shouldn't be implemented:
> >  - Shorthands are only for IPIs and hence don't need to be handled,
> >  - Lowest priority physical broadcast is not supported,
> >  - Lowest priority cluster logical broadcast is not supported,
> >  - No point in optimizing mixed xAPIC and x2APIC mode,
> >  - The rest is handled by kvm_intr_vector_hashing_dest_fast().
> >(Even lowest priority flat logical "broadcast".)
> >  - We do the work twice when vcpu == NULL means that there is no
> >matching destination.
> >
> > Is there a valid case that can be resolved by going through all vcpus?
> >
> >> +
> >> +  memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
> >> +
> >> +  kvm_for_each_vcpu(i, vcpu, kvm) {
> >> +  if (!kvm_apic_present(vcpu))
> >> +  continue;
> >> +
> >> +  if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> >> +  irq->dest_id, irq->dest_mode))
> >> +  continue;
> >> +
> >> +  __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
> >> +  dest_vcpus++;
> >> +  }
> >> +
> >> +  if (dest_vcpus =

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-11-23 Thread Wu, Feng



> -Original Message-
> From: Radim Krčmář [mailto:rkrc...@redhat.com]
> Sent: Tuesday, November 17, 2015 3:03 AM
> To: Wu, Feng <feng...@intel.com>
> Cc: pbonz...@redhat.com; kvm@vger.kernel.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> 2015-11-09 10:46+0800, Feng Wu:
> > Use vector-hashing to handle lowest-priority interrupts for
> > posted-interrupts. As an example, modern Intel CPUs use this
> > method to handle lowest-priority interrupts.
> 
> (I don't think it's a good idea that the algorithm differs from non-PI
>  lowest priority delivery.  I'd make them both vector-hashing, which
>  would be "fun" to explain to people expecting round robin ...)
> 
> > Signed-off-by: Feng Wu <feng...@intel.com>
> > ---
> > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> > +/*
> > + * This routine handles lowest-priority interrupts using vector-hashing
> > + * mechanism. As an example, modern Intel CPUs use this method to
> handle
> > + * lowest-priority interrupts.
> > + *
> > + * Here is the details about the vector-hashing mechanism:
> > + * 1. For lowest-priority interrupts, store all the possible destination
> > + *vCPUs in an array.
> > + * 2. Use "guest vector % max number of destination vCPUs" to find the
> right
> > + *destination vCPU in the array for the lowest-priority interrupt.
> > + */
> 
> (Is Skylake i7-6700 a modern Intel CPU?
>  I didn't manage to get hashing ... all interrupts always went to the
>  lowest APIC ID in the set :/
>  Is there a simple way to verify the algorithm?)

Sorry for the late response, I try to get more information about vector
hashing before getting back to you. Here is the response from our
hardware architect:

"I don't think we do any vector hashing on our client parts.  This may be why 
the customer is not able to detect this on Skylake client silicon.
The vector hashing is micro-architectural and something we had done on server 
parts.

If you look at the haswell server CPU spec 
(https://www-ssl.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf)
In section 4.1.2, you will see an IntControl register (this is a register 
controlled/configured by BIOS) - see below.

If you look at bits 6:4 in that register, you see the option we offer in 
hardware for what kind of redirection is applied to lowest priority interrupts.
There are three options:
1.  Fixed priority  
2.  Redirect last 
3.  Hash Vector

If picking vector hash, then bits 10:8 specifies the APIC-ID bits used for the 
hashing."

Thanks,
Feng


> 
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> > + struct kvm_lapic_irq *irq)
> > +
> > +{
> > +   unsigned long
> dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> > +   unsigned int dest_vcpus = 0;
> > +   struct kvm_vcpu *vcpu;
> > +   unsigned int i, mod, idx = 0;
> > +
> > +   vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
> > +   if (vcpu)
> > +   return vcpu;
> 
> I think the rest of this function shouldn't be implemented:
>  - Shorthands are only for IPIs and hence don't need to be handled,
>  - Lowest priority physical broadcast is not supported,
>  - Lowest priority cluster logical broadcast is not supported,
>  - No point in optimizing mixed xAPIC and x2APIC mode,
>  - The rest is handled by kvm_intr_vector_hashing_dest_fast().
>(Even lowest priority flat logical "broadcast".)
>  - We do the work twice when vcpu == NULL means that there is no
>matching destination.
> 
> Is there a valid case that can be resolved by going through all vcpus?
> 
> > +
> > +   memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
> > +
> > +   kvm_for_each_vcpu(i, vcpu, kvm) {
> > +   if (!kvm_apic_present(vcpu))
> > +   continue;
> > +
> > +   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> > +   irq->dest_id, irq->dest_mode))
> > +   continue;
> > +
> > +   __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
> > +   dest_vcpus++;
> > +   }
> > +
> > +   if (dest_vcpus == 0)
> > +   return NULL;
> > +
> > +   mod = irq->vector % dest_vcpus;
> > +
> > +   for (i = 0; i <= mod; i++) {
> > +   idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS,
> idx) + 1;
> > +   BUG_ON(idx >= KVM_MAX_VCPUS);
> > +   }
> >

RE: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-11-15 Thread Wu, Feng

Hi Paolo,

Any comments about this patch, thanks in advance!

Thanks,
Feng

> -Original Message-
> From: Wu, Feng
> Sent: Monday, November 9, 2015 10:47 AM
> To: pbonz...@redhat.com
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org; Wu, Feng
> <feng...@intel.com>
> Subject: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted-
> interrupts
> 
> Use vector-hashing to handle lowest-priority interrupts for
> posted-interrupts. As an example, modern Intel CPUs use this
> method to handle lowest-priority interrupts.
> 
> Signed-off-by: Feng Wu <feng...@intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/irq_comm.c | 52
> +
>  arch/x86/kvm/lapic.c| 57
> +
>  arch/x86/kvm/lapic.h|  2 ++
>  arch/x86/kvm/vmx.c  | 14 --
>  5 files changed, 125 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 9265196..e225106 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1258,6 +1258,8 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
> 
>  bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
>struct kvm_vcpu **dest_vcpu);
> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> +   struct kvm_lapic_irq *irq);
> 
>  void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
>struct kvm_lapic_irq *irq);
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 84b96d3..8156e45 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -266,6 +266,58 @@ out:
>   return r;
>  }
> 
> +/*
> + * This routine handles lowest-priority interrupts using vector-hashing
> + * mechanism. As an example, modern Intel CPUs use this method to
> handle
> + * lowest-priority interrupts.
> + *
> + * Here is the details about the vector-hashing mechanism:
> + * 1. For lowest-priority interrupts, store all the possible destination
> + *vCPUs in an array.
> + * 2. Use "guest vector % max number of destination vCPUs" to find the
> right
> + *destination vCPU in the array for the lowest-priority interrupt.
> + */
> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> +   struct kvm_lapic_irq *irq)
> +
> +{
> + unsigned long
> dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> + unsigned int dest_vcpus = 0;
> + struct kvm_vcpu *vcpu;
> + unsigned int i, mod, idx = 0;
> +
> + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
> + if (vcpu)
> + return vcpu;
> +
> + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!kvm_apic_present(vcpu))
> + continue;
> +
> + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> + irq->dest_id, irq->dest_mode))
> + continue;
> +
> + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
> + dest_vcpus++;
> + }
> +
> + if (dest_vcpus == 0)
> + return NULL;
> +
> + mod = irq->vector % dest_vcpus;
> +
> + for (i = 0; i <= mod; i++) {
> + idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS,
> idx) + 1;
> + BUG_ON(idx >= KVM_MAX_VCPUS);
> + }
> +
> + return kvm_get_vcpu(kvm, idx - 1);
> +}
> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> +
>  bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
>struct kvm_vcpu **dest_vcpu)
>  {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index ecd4ea1..4937aa4 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -816,6 +816,63 @@ out:
>   return ret;
>  }
> 
> +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm,
> +struct kvm_lapic_irq *irq)
> +{
> + struct kvm_apic_map *map;
> + struct kvm_vcpu *vcpu = NULL;
> +
> + if (irq->shorthand)
> + return NULL;
> +
> + rcu_read_lock();
> + map = rcu_dereference(kvm->arch.apic_map);
> +
> + if (!map)
> + goto out;
> +
> + if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> + kvm_lowest_prio_delivery(irq)) {
&g

RE: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-10-15 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:paolo.bonz...@gmail.com] On Behalf Of Paolo
> Bonzini
> Sent: Friday, October 16, 2015 2:13 AM
> To: David Matlack <dmatl...@google.com>; Wu, Feng <feng...@intel.com>
> Cc: alex.william...@redhat.com; Joerg Roedel <j...@8bytes.org>; Marcelo
> Tosatti <mtosa...@redhat.com>; eric.au...@linaro.org; kvm list
> <kvm@vger.kernel.org>; io...@lists.linux-foundation.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when
> vCPU is blocked
> 
> 
> 
> On 15/10/2015 19:39, David Matlack wrote:
> > But after spending more time reading the source code this morning I
> > found that kvm_vcpu_check_block() eventually calls into
> > vmx_sync_pir_to_irr(), which copies PIR to IRR and clears ON. And then
> > apic_find_highest_irr() detects the pending posted interrupt.
> 
> Right.  And related to this, Feng, can you check if this is still
> necessary on kvm/queue:
> 
> @@ -6518,6 +6523,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   kvm_vcpu_reload_apic_access_page(vcpu);
>   }
> 
> + /*
> +  * KVM_REQ_EVENT is not set when posted interrupts are set by
> +  * VT-d hardware, so we have to update RVI unconditionally.
> +  */
> + if (kvm_lapic_enabled(vcpu)) {
> + /*
> +  * Update architecture specific hints for APIC
> +  * virtual interrupt delivery.
> +  */
> + if (kvm_x86_ops->hwapic_irr_update)
> + kvm_x86_ops->hwapic_irr_update(vcpu,
> + kvm_lapic_find_highest_irr(vcpu));
> + }
> +
>   if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>   kvm_apic_accept_events(vcpu);
>   if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
> @@ -6534,13 +6553,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   kvm_x86_ops->enable_irq_window(vcpu);
> 
>   if (kvm_lapic_enabled(vcpu)) {
> - /*
> -  * Update architecture specific hints for APIC
> -  * virtual interrupt delivery.
> -  */
> - if (kvm_x86_ops->hwapic_irr_update)
> - kvm_x86_ops->hwapic_irr_update(vcpu,
> - kvm_lapic_find_highest_irr(vcpu));
>   update_cr8_intercept(vcpu);
>   kvm_lapic_sync_to_vapic(vcpu);
>   }
> 

I think the above code is needed, before the place where 'KVM_REQ_EVENT'
got checked in vcpu_enter_guest(), VT-d hardware can issue notification
event at any time. Consider the following scenario:

vcpu_run()
{
..  

for(;;) {
point #1
vcpu_enter_guest()
}   

point #2
}

For example, if we receive notification events issued by VT-d hardware at
point #1 and point#2, then enter vcpu_enter_guest() with 'KVM_REQ_EVENT'
not set, the interrupts cannot be delivered to guest during _this_ VM-Entry.

The point is that VT-d hardware can issue notification event at any time,
but it cannot set 'KVM_REQ_EVENT' like software does.

Maybe one thing we can do is only executing the following code when
vt-d pi is enabled,

 +  /*
 +   * KVM_REQ_EVENT is not set when posted interrupts are set by
 +   * VT-d hardware, so we have to update RVI unconditionally.
 +   */
 +  if (kvm_lapic_enabled(vcpu)) {
 +  /*
 +   * Update architecture specific hints for APIC
 +   * virtual interrupt delivery.
 +   */
 +  if (kvm_x86_ops->hwapic_irr_update)
 +  kvm_x86_ops->hwapic_irr_update(vcpu,
 +  kvm_lapic_find_highest_irr(vcpu));
 +  }
 +

And do this inside the KVM_REQ_EVENT check when VT-d PI is not enabled.

Thanks,
Feng

> 
> It may be obsolete now that we have the patch from Radim to set
> KVM_REQ_EVENT
> in vmx_sync_pir_to_irr
> (http://permalink.gmane.org/gmane.linux.kernel/2057138).
> 
> Thanks,
> 
> Paolo

RE: linux-next: manual merge of the kvm-arm tree with the kvm tree

2015-10-15 Thread Wu, Feng



> -Original Message-
> From: Stephen Rothwell [mailto:s...@canb.auug.org.au]
> Sent: Friday, October 16, 2015 11:53 AM
> To: Christoffer Dall <cd...@cs.columbia.edu>; Marc Zyngier
> <marc.zyng...@arm.com>; Marcelo Tosatti <mtosa...@redhat.com>; Gleb
> Natapov <g...@kernel.org>; kvm@vger.kernel.org
> Cc: linux-n...@vger.kernel.org; linux-ker...@vger.kernel.org; Wu, Feng
> <feng...@intel.com>; Paolo Bonzini <pbonz...@redhat.com>; Christian
> Borntraeger <borntrae...@de.ibm.com>; Cornelia Huck
> <cornelia.h...@de.ibm.com>
> Subject: linux-next: manual merge of the kvm-arm tree with the kvm tree
> 
> I fixed it up (see below) and can carry the fix as necessary (no action
> is required).

Thanks Stephen!

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-10-14 Thread Wu, Feng



> -Original Message-
> From: David Matlack [mailto:dmatl...@google.com]
> Sent: Thursday, October 15, 2015 7:41 AM
> To: Wu, Feng <feng...@intel.com>
> Cc: Paolo Bonzini <pbonz...@redhat.com>; alex.william...@redhat.com; Joerg
> Roedel <j...@8bytes.org>; Marcelo Tosatti <mtosa...@redhat.com>;
> eric.au...@linaro.org; kvm list <kvm@vger.kernel.org>; iommu@lists.linux-
> foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when
> vCPU is blocked
> 
> Hi Feng.
> 
> On Fri, Sep 18, 2015 at 7:29 AM, Feng Wu <feng...@intel.com> wrote:
> > This patch updates the Posted-Interrupts Descriptor when vCPU
> > is blocked.
> >
> > pre-block:
> > - Add the vCPU to the blocked per-CPU list
> > - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR
> >
> > post-block:
> > - Remove the vCPU from the per-CPU list
> 
> I'm wondering what happens if a posted interrupt arrives at the IOMMU
> after pre-block and before post-block.
> 
> In pre_block, NV is set to POSTED_INTR_WAKEUP_VECTOR. IIUC, this means
> future posted interrupts will not trigger "Posted-Interrupt Processing"
> (PIR will not get copied to VIRR). Instead, the IOMMU will do ON := 1,
> PIR |= (1 << vector), and send POSTED_INTR_WAKEUP_VECTOR. PIWV calls
> wakeup_handler which does kvm_vcpu_kick. kvm_vcpu_kick does a wait-queue
> wakeup and possibly a scheduler ipi.
> 
> But the VCPU is sitting in kvm_vcpu_block. It spins and/or schedules
> (wait queue) until it has a reason to wake up. I couldn't find a code
> path from kvm_vcpu_block that lead to checking ON or PIR. How does the
> blocked VCPU "receive" the posted interrupt? (And when does Posted-
> Interrupt Processing get triggered?)

In the pre_block, it also change the 'NDST' filed to the pCPU, on which the vCPU
is put to the per-CPU list 'blocked_vcpu_on_cpu', so when posted-interrupts
come it, it will sent the wakeup notification event to the pCPU above, then in
the wakeup_handler, it can find the vCPU from the per-CPU list, hence
kvm_vcpu_kick can wake up it.

Thanks,
Feng

> 
> Thanks!
> 
> >
> > Signed-off-by: Feng Wu <feng...@intel.com>
> > ---
> > v9:
> > - Add description for blocked_vcpu_on_cpu_lock in
> Documentation/virtual/kvm/locking.txt
> > - Check !kvm_arch_has_assigned_device(vcpu->kvm) first, then
> >   !irq_remapping_cap(IRQ_POSTING_CAP)
> >
> > v8:
> > - Rename 'pi_pre_block' to 'pre_block'
> > - Rename 'pi_post_block' to 'post_block'
> > - Change some comments
> > - Only add the vCPU to the blocking list when the VM has assigned devices.
> >
> >  Documentation/virtual/kvm/locking.txt |  12 +++
> >  arch/x86/include/asm/kvm_host.h   |  13 +++
> >  arch/x86/kvm/vmx.c| 153
> ++
> >  arch/x86/kvm/x86.c|  53 +---
> >  include/linux/kvm_host.h  |   3 +
> >  virt/kvm/kvm_main.c   |   3 +
> >  6 files changed, 227 insertions(+), 10 deletions(-)
> >
> > diff --git a/Documentation/virtual/kvm/locking.txt
> b/Documentation/virtual/kvm/locking.txt
> > index d68af4d..19f94a6 100644
> > --- a/Documentation/virtual/kvm/locking.txt
> > +++ b/Documentation/virtual/kvm/locking.txt
> > @@ -166,3 +166,15 @@ Comment:   The srcu read lock must be held while
> accessing memslots (e.g.
> > MMIO/PIO address->device structure mapping (kvm->buses).
> > The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
> > if it is needed by multiple functions.
> > +
> > +Name:  blocked_vcpu_on_cpu_lock
> > +Type:  spinlock_t
> > +Arch:  x86
> > +Protects:  blocked_vcpu_on_cpu
> > +Comment:   This is a per-CPU lock and it is used for VT-d 
> > posted-interrupts.
> > +   When VT-d posted-interrupts is supported and the VM has 
> > assigned
> > +   devices, we put the blocked vCPU on the list 
> > blocked_vcpu_on_cpu
> > +   protected by blocked_vcpu_on_cpu_lock, when VT-d hardware
> issues
> > +   wakeup notification event since external interrupts from the
> > +   assigned devices happens, we will find the vCPU on the list 
> > to
> > +   wakeup.
> > diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> > index 0ddd353..304fbb5 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -552

RE: [PATCH] genirq: Move irq_set_vcpu_affinity out of "#ifdef CONFIG_SMP"

2015-10-07 Thread Wu, Feng

Hi Thomas & Paolo,

> -Original Message-
> From: Jiang Liu [mailto:jiang@linux.intel.com]
> Sent: Saturday, October 03, 2015 5:11 PM
> To: Wu, Feng; t...@linutronix.de; pbonz...@redhat.com
> Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] genirq: Move irq_set_vcpu_affinity out of "#ifdef
> CONFIG_SMP"
> 
> On 2015/10/3 16:20, Feng Wu wrote:
> > irq_set_vcpu_affinity() is needed when CONFIG_SMP=n, so move the
> > definition out of "#ifdef CONFIG_SMP"

What is your option about this patch, Thanks a lot!

Thanks,
Feng

> >
> > Suggested-by: Paolo Bonzini <pbonz...@redhat.com>
> > Signed-off-by: Feng Wu <feng...@intel.com>
> 
> Reviewed-by: Jiang Liu <jiang@linux.intel.com>
> 
> > ---
> >  kernel/irq/manage.c | 62
> ++---
> >  1 file changed, 31 insertions(+), 31 deletions(-)
> >
> > diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
> > index 1c58655..90b378d 100644
> > --- a/kernel/irq/manage.c
> > +++ b/kernel/irq/manage.c
> > @@ -258,37 +258,6 @@ int irq_set_affinity_hint(unsigned int irq, const
> struct cpumask *m)
> >  }
> >  EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
> >
> > -/**
> > - * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
> > - * @irq: interrupt number to set affinity
> > - * @vcpu_info: vCPU specific data
> > - *
> > - * This function uses the vCPU specific data to set the vCPU
> > - * affinity for an irq. The vCPU specific data is passed from
> > - * outside, such as KVM. One example code path is as below:
> > - * KVM -> IOMMU -> irq_set_vcpu_affinity().
> > - */
> > -int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info)
> > -{
> > -   unsigned long flags;
> > -   struct irq_desc *desc = irq_get_desc_lock(irq, , 0);
> > -   struct irq_data *data;
> > -   struct irq_chip *chip;
> > -   int ret = -ENOSYS;
> > -
> > -   if (!desc)
> > -   return -EINVAL;
> > -
> > -   data = irq_desc_get_irq_data(desc);
> > -   chip = irq_data_get_irq_chip(data);
> > -   if (chip && chip->irq_set_vcpu_affinity)
> > -   ret = chip->irq_set_vcpu_affinity(data, vcpu_info);
> > -   irq_put_desc_unlock(desc, flags);
> > -
> > -   return ret;
> > -}
> > -EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity);
> > -
> >  static void irq_affinity_notify(struct work_struct *work)
> >  {
> > struct irq_affinity_notify *notify =
> > @@ -424,6 +393,37 @@ setup_affinity(struct irq_desc *desc, struct
> cpumask *mask)
> >  }
> >  #endif
> >
> > +/**
> > + * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
> > + * @irq: interrupt number to set affinity
> > + * @vcpu_info: vCPU specific data
> > + *
> > + * This function uses the vCPU specific data to set the vCPU
> > + * affinity for an irq. The vCPU specific data is passed from
> > + * outside, such as KVM. One example code path is as below:
> > + * KVM -> IOMMU -> irq_set_vcpu_affinity().
> > + */
> > +int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info)
> > +{
> > +   unsigned long flags;
> > +   struct irq_desc *desc = irq_get_desc_lock(irq, , 0);
> > +   struct irq_data *data;
> > +   struct irq_chip *chip;
> > +   int ret = -ENOSYS;
> > +
> > +   if (!desc)
> > +   return -EINVAL;
> > +
> > +   data = irq_desc_get_irq_data(desc);
> > +   chip = irq_data_get_irq_chip(data);
> > +   if (chip && chip->irq_set_vcpu_affinity)
> > +   ret = chip->irq_set_vcpu_affinity(data, vcpu_info);
> > +   irq_put_desc_unlock(desc, flags);
> > +
> > +   return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity);
> > +
> >  void __disable_irq(struct irq_desc *desc)
> >  {
> > if (!desc->depth++)
> >
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series

2015-09-28 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Friday, September 25, 2015 7:15 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including
> prerequisite series
> 
> 
> 
> On 25/09/2015 03:49, Wu, Feng wrote:
> > Hi Paolo,
> >
> > Thanks for your review on this series! I'd like to confirm this series (plus
> > the patch fixing the compilation error) is okay to you and I don't need to
> > do extra things for it, right?
> 
> Yes, can you check if branch vtd-pi of
> git://git.kernel.org/pub/scm/virt/kvm/kvm.git works for you?  If so I'll
> merge it.

Thanks a lot for creating branch for vt-d pi. However, I cannot launch guests
with this tree. I encountered the following kernel dump, and I find that the
problematic commit is " 2260b1cde0b5472ab70ad0764b10095372e41913 "

KVM: x86: put vcpu_create under kvm->srcu critical section

This is needed in case vcpu_create wants to access the memslots array.
Fixes this lockdep splat:

After removing this commit from the tree, my VT-d patch-set works fine.


Kernel dump:
[  221.978182] BUG: unable to handle kernel NULL pointer dereference at 
  (null)
[  221.986085] IP: [] kvm_arch_vcpu_create+0x30/0x90 [kvm]
[  221.993102] PGD 0
[  221.995148] Oops:  [#1] SMP
[  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a usbnet 
intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal nouveau 
intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel snd_hda_core 
kvm snd_hwdep snd_pcm crct10dif_pclmul crc32_pclmul snd_seq_midi 
ghash_clmulni_intel mxm_wmi snd_seq_midi_event snd_rawmidi video snd_seq ttm 
aesni_intel aes_x86_64 lrw gf128mul drm_kms_helper snd_seq_device binfmt_misc 
snd_timer glue_helper ablk_helper drm cryptd fb_sys_fops snd syscopyarea 
sysfillrect sb_edac soundcore sysimgblt mei_me parport_pc edac_core ppdev mei 
shpchp lp lpc_ich mac_hid parport acpi_power_meter wmi ixgbe igb i2c_algo_bit 
hid_generic usbhid ptp ahci hid libahci pps_core mdio
[  222.063533] CPU: 4 PID: 3384 Comm: qemu-system-x86 Not tainted 4.3.0-rc1+ #6
[  222.070612] Hardware name: Intel Corp. GRANGEVILLE/GRANTLEY, BIOS 
GNVDCRB1.86B.0020.V07.1409241147 09/24/2014
[  222.080764] task: 88006e7c8000 ti: 8800714a8000 task.ti: 
8800714a8000
[  222.088283] RIP: 0010:[]  [] 
kvm_arch_vcpu_create+0x30/0x90 [kvm]
[  222.097680] RSP: 0018:8800714abde0  EFLAGS: 00010246
[  222.103153] RAX:  RBX: 88016f28c000 RCX: 
[  222.110407] RDX:  RSI:  RDI: 88016f28c000
[  222.117659] RBP: 8800714abdf8 R08: 0001 R09: 0040
[  222.124824] R10: 880077e86438 R11: 880163e06880 R12: 88016f28c000
[  222.132150] R13:  R14: ae41 R15: 
[  222.139405] FS:  7f43fd7ec700() GS:88017870() 
knlGS:
[  222.147629] CS:  0010 DS:  ES:  CR0: 80050033
[  222.153471] CR2:  CR3: 00017074b000 CR4: 003426e0
[  222.160726] DR0:  DR1:  DR2: 
[  222.167979] DR3:  DR6: fffe0ff0 DR7: 0400
[  222.175231] Stack:
[  222.177277]   88016f28c000  
8800714abea0
[  222.184870]  c0355b17 0008 8800714abe28 
810aba32
[  222.192444]  880178816e40 8800714abe40 810a4f44 
880178816e40
[  222.200017] Call Trace:
[  222.202522]  [] kvm_vm_ioctl+0x277/0x6e0 [kvm]
[  222.208633]  [] ? put_prev_task_fair+0x22/0x40
[  222.214741]  [] ? pick_next_task_idle+0x14/0x30
[  222.220942]  [] do_vfs_ioctl+0x2ba/0x490
[  222.226523]  [] ? __do_page_fault+0x1ba/0x410
[  222.232546]  [] SyS_ioctl+0x79/0x90
[  222.237684]  [] ? syscall_return_slowpath+0x55/0x150
[  222.244323]  [] entry_SYSCALL_64_fastpath+0x16/0x75
[  222.250869] Code: 55 48 89 e5 41 55 41 54 53 41 89 f5 48 89 fb e8 27 61 cb 
c0 85 c0 74 13 8b 83 f0 09 00 00 85 c0 74 09 80 3d 53 2e 04 00 00 74 40 <48> 8b 
04 25 00 00 00 00 48 8d 78 48 e8 7f c4 d6 c0 41 89 c4 48
[  222.270790] RIP  [] kvm_arch_vcpu_create+0x30/0x90 [kvm]
[  222.277813]  RSP 
[  222.281359] CR2: 
[  222.290421] ---[ end trace 957f5a39692fe6c7 ]---
root@feng-bdw-de-pi:~/workspace/tools# dmesg > ~/dmesg.log
root@feng-bdw-de-pi:~/workspace/tools# vim ~/dmesg.log
[  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a usbnet 
intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal nouveau 
intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel snd_hda_core 
kvm snd_h

RE: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series

2015-09-28 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Monday, September 28, 2015 6:19 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including
> prerequisite series
> 
> 
> 
> On 28/09/2015 12:14, Wu, Feng wrote:
> > Thanks a lot for creating branch for vt-d pi. However, I cannot launch 
> > guests
> > with this tree. I encountered the following kernel dump, and I find that the
> > problematic commit is " 2260b1cde0b5472ab70ad0764b10095372e41913 "
> >
> > KVM: x86: put vcpu_create under kvm->srcu critical section
> >
> > This is needed in case vcpu_create wants to access the memslots array.
> > Fixes this lockdep splat:
> >
> > After removing this commit from the tree, my VT-d patch-set works fine.
> 
> Great, thanks.  The above commit had already been reverted.
> 
> I'm sorting out the kbuild reports, and then will merge VT-d PI.

Thanks a lot for make this happen!

Thanks,
Feng

> 
> Paolo
> 
> >
> > Kernel dump:
> > [  221.978182] BUG: unable to handle kernel NULL pointer dereference at
> (null)
> > [  221.986085] IP: [] kvm_arch_vcpu_create+0x30/0x90
> [kvm]
> > [  221.993102] PGD 0
> > [  221.995148] Oops:  [#1] SMP
> > [  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a
> usbnet intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal
> nouveau intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel
> snd_hda_core kvm snd_hwdep snd_pcm crct10dif_pclmul crc32_pclmul
> snd_seq_midi ghash_clmulni_intel mxm_wmi snd_seq_midi_event snd_rawmidi
> video snd_seq ttm aesni_intel aes_x86_64 lrw gf128mul drm_kms_helper
> snd_seq_device binfmt_misc snd_timer glue_helper ablk_helper drm cryptd
> fb_sys_fops snd syscopyarea sysfillrect sb_edac soundcore sysimgblt mei_me
> parport_pc edac_core ppdev mei shpchp lp lpc_ich mac_hid parport
> acpi_power_meter wmi ixgbe igb i2c_algo_bit hid_generic usbhid ptp ahci hid
> libahci pps_core mdio
> > [  222.063533] CPU: 4 PID: 3384 Comm: qemu-system-x86 Not tainted
> 4.3.0-rc1+ #6
> > [  222.070612] Hardware name: Intel Corp. GRANGEVILLE/GRANTLEY, BIOS
> GNVDCRB1.86B.0020.V07.1409241147 09/24/2014
> > [  222.080764] task: 88006e7c8000 ti: 8800714a8000 task.ti:
> 8800714a8000
> > [  222.088283] RIP: 0010:[]  []
> kvm_arch_vcpu_create+0x30/0x90 [kvm]
> > [  222.097680] RSP: 0018:8800714abde0  EFLAGS: 00010246
> > [  222.103153] RAX:  RBX: 88016f28c000 RCX:
> 
> > [  222.110407] RDX:  RSI:  RDI:
> 88016f28c000
> > [  222.117659] RBP: 8800714abdf8 R08: 0001 R09:
> 0040
> > [  222.124824] R10: 880077e86438 R11: 880163e06880 R12:
> 88016f28c000
> > [  222.132150] R13:  R14: ae41 R15:
> 
> > [  222.139405] FS:  7f43fd7ec700() GS:88017870()
> knlGS:
> > [  222.147629] CS:  0010 DS:  ES:  CR0: 80050033
> > [  222.153471] CR2:  CR3: 00017074b000 CR4:
> 003426e0
> > [  222.160726] DR0:  DR1:  DR2:
> 
> > [  222.167979] DR3:  DR6: fffe0ff0 DR7:
> 0400
> > [  222.175231] Stack:
> > [  222.177277]   88016f28c000 
> 8800714abea0
> > [  222.184870]  c0355b17 0008 8800714abe28
> 810aba32
> > [  222.192444]  880178816e40 8800714abe40 810a4f44
> 880178816e40
> > [  222.200017] Call Trace:
> > [  222.202522]  [] kvm_vm_ioctl+0x277/0x6e0 [kvm]
> > [  222.208633]  [] ? put_prev_task_fair+0x22/0x40
> > [  222.214741]  [] ? pick_next_task_idle+0x14/0x30
> > [  222.220942]  [] do_vfs_ioctl+0x2ba/0x490
> > [  222.226523]  [] ? __do_page_fault+0x1ba/0x410
> > [  222.232546]  [] SyS_ioctl+0x79/0x90
> > [  222.237684]  [] ? syscall_return_slowpath+0x55/0x150
> > [  222.244323]  []
> entry_SYSCALL_64_fastpath+0x16/0x75
> > [  222.250869] Code: 55 48 89 e5 41 55 41 54 53 41 89 f5 48 89 fb e8 27 61
> cb c0 85 c0 74 13 8b 83 f0 09 00 00 85 c0 74 09 80 3d 53 2e 04 00 00 74 40 
> <48>
> 8b 04 25 00 00 00 00 48 8d 78 48 e8 7f c4 d6 c0 41 89 c4 48
> > [  222.270790] RIP  [] kvm_arch_vcpu_create+0x30/0x90
> [kvm]
> > [  222

RE: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series

2015-09-24 Thread Wu, Feng

Hi Paolo,

Thanks for your review on this series! I'd like to confirm this series (plus
the patch fixing the compilation error) is okay to you and I don't need to
do extra things for it, right?

Thanks,
Feng

> -Original Message-
> From: Wu, Feng
> Sent: Friday, September 18, 2015 10:30 PM
> To: pbonz...@redhat.com; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org; Wu, Feng
> Subject: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including
> prerequisite series
> 
> VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
> With VT-d Posted-Interrupts enabled, external interrupts from
> direct-assigned devices can be delivered to guests without VMM
> intervention when guest is running in non-root mode.
> 
> You can find the VT-d Posted-Interrtups Spec. in the following URL:
> http://www.intel.com/content/www/us/en/intelligent-systems/intel-technolog
> y/vt-directed-io-spec.html
> 
> v9:
> - Include the whole series:
> [01/18]: irq bypasser manager
> [02/18] - [06/18]: Common non-architecture part for VT-d PI and ARM side
> forwarded irq
> [07/18] - [18/18]: VT-d PI part
> 
> v8:
> refer to the changelog in each patch
> 
> v7:
> * Define two weak irq bypass callbacks:
>   - kvm_arch_irq_bypass_start()
>   - kvm_arch_irq_bypass_stop()
> * Remove the x86 dummy implementation of the above two functions.
> * Print some useful information instead of WARN_ON() when the
>   irq bypass consumer unregistration fails.
> * Fix an issue when calling pi_pre_block and pi_post_block.
> 
> v6:
> * Rebase on 4.2.0-rc6
> * Rebase on https://lkml.org/lkml/2015/8/6/526 and
> http://www.gossamer-threads.com/lists/linux/kernel/2235623
> * Make the add_consumer and del_consumer callbacks static
> * Remove pointless INIT_LIST_HEAD to 'vdev->ctx[vector].producer.node)'
> * Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
> * Remove optional dummy callbacks for irq producer
> 
> v4:
> * For lowest-priority interrupt, only support single-CPU destination
> interrupts at the current stage, more common lowest priority support
> will be added later.
> * Accoring to Marcelo's suggestion, when vCPU is blocked, we handle
> the posted-interrupts in the HLT emulation path.
> * Some small changes (coding style, typo, add some code comments)
> 
> v3:
> * Adjust the Posted-interrupts Descriptor updating logic when vCPU is
>   preempted or blocked.
> * KVM_DEV_VFIO_DEVICE_POSTING_IRQ -->
> KVM_DEV_VFIO_DEVICE_POST_IRQ
> * __KVM_HAVE_ARCH_KVM_VFIO_POSTING -->
> __KVM_HAVE_ARCH_KVM_VFIO_POST
> * Add KVM_DEV_VFIO_DEVICE_UNPOST_IRQ attribute for VFIO irq, which
>   can be used to change back to remapping mode.
> * Fix typo
> 
> v2:
> * Use VFIO framework to enable this feature, the VFIO part of this series is
>   base on Eric's patch "[PATCH v3 0/8] KVM-VFIO IRQ forward control"
> * Rebase this patchset on
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git,
>   then revise some irq logic based on the new hierarchy irqdomain patches
> provided
>   by Jiang Liu <jiang@linux.intel.com>
> 
> 
> *** BLURB HERE ***
> 
> Alex Williamson (1):
>   virt: IRQ bypass manager
> 
> Eric Auger (4):
>   KVM: arm/arm64: select IRQ_BYPASS_MANAGER
>   KVM: create kvm_irqfd.h
>   KVM: introduce kvm_arch functions for IRQ bypass
>   KVM: eventfd: add irq bypass consumer management
> 
> Feng Wu (13):
>   KVM: x86: select IRQ_BYPASS_MANAGER
>   KVM: Extend struct pi_desc for VT-d Posted-Interrupts
>   KVM: Add some helper functions for Posted-Interrupts
>   KVM: Define a new interface kvm_intr_is_single_vcpu()
>   KVM: Make struct kvm_irq_routing_table accessible
>   KVM: make kvm_set_msi_irq() public
>   vfio: Register/unregister irq_bypass_producer
>   KVM: x86: Update IRTE for posted-interrupts
>   KVM: Implement IRQ bypass consumer callbacks for x86
>   KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'
>   KVM: Update Posted-Interrupts Descriptor when vCPU is preempted
>   KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
>   iommu/vt-d: Add a command line parameter for VT-d posted-interrupts
> 
>  Documentation/kernel-parameters.txt   |   1 +
>  Documentation/virtual/kvm/locking.txt |  12 ++
>  MAINTAINERS   |   7 +
>  arch/arm/kvm/Kconfig  |   2 +
>  arch/arm/kvm/Makefile |   1 +
>  arch/arm64/kvm/Kconfig|   2 +
>  arch/arm64/kvm/Makefile   |   1 +
>  arch/x86/include/asm/kvm_host.h   |  24

RE: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer

2015-09-22 Thread Wu, Feng



> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org
> [mailto:linux-kernel-ow...@vger.kernel.org] On Behalf Of Eric Auger
> Sent: Tuesday, September 22, 2015 3:46 AM
> To: Paolo Bonzini; Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: kvm@vger.kernel.org; io...@lists.linux-foundation.org;
> linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer
> 
> Hi,
> On 09/21/2015 03:02 PM, Paolo Bonzini wrote:
> >
> >
> > On 21/09/2015 14:53, Wu, Feng wrote:
> >>>>>> I think the point is that we cannot trigger the build of irqbypass
> >>>>>> manager inside KVM or VFIO, we need trigger the build at a high
> >>>>>> level and it should be built before VFIO and KVM. Any ideas?
> >>>>
> >>>> We can add virt/Makefile and build virt/lib/ directly, not through
> >>>> arch/x86/kvm.
> >> Thinking about this more, does that mean we need to add the virt directory
> >> in the top Makefile in Linux tree?
> >
> > Yes, it does.
> So I understand this will replace patches 2 & 3 then and will fix the
> arm64 issue then.

I just sent a patch to fix this build error. BTW, from the reply of Paolo, seems
he dropped patch 3 in this series, maybe he think it doesn't have much
relationship with other patches, so maybe you could include it in your series
when forwarded irq work gets ready.

Thanks,
Feng

> 
> Thanks
> 
> Eric
> 
> >
> > Paolo
> >
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Monday, September 21, 2015 5:32 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer
> 
> 
> 
> On 21/09/2015 10:56, Wu, Feng wrote:
> > Hi Paolo & Alex,
> >
> > I find that there is a build error in the following two cases:
> > - KVM is configured as 'M' and VFIO as 'Y'
> > The reason is the build of irqbypass manager is triggered in
> > arch/x86/kvm/Makefile, and VFIO is built before KVM, hence
> > it cannot find the symbols in irqbypass manager.
> >
> > - Disable KVM and enable VFIO in .config
> > The reason is similar with the above one, the irqbypass manager
> > is not built since KVM is not configured.
> >
> > I think the point is that we cannot trigger the build of irqbypass
> > manager inside KVM or VFIO, we need trigger the build at a high
> > level and it should be built before VFIO and KVM. Any ideas?
> 
> We can add virt/Makefile and build virt/lib/ directly, not through
> arch/x86/kvm.

Yes, that can solve the build error. Should I send a new version?

Thanks,
Feng

> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Monday, September 21, 2015 5:32 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer
> 
> 
> 
> On 21/09/2015 10:56, Wu, Feng wrote:
> > Hi Paolo & Alex,
> >
> > I find that there is a build error in the following two cases:
> > - KVM is configured as 'M' and VFIO as 'Y'
> > The reason is the build of irqbypass manager is triggered in
> > arch/x86/kvm/Makefile, and VFIO is built before KVM, hence
> > it cannot find the symbols in irqbypass manager.
> >
> > - Disable KVM and enable VFIO in .config
> > The reason is similar with the above one, the irqbypass manager
> > is not built since KVM is not configured.
> >
> > I think the point is that we cannot trigger the build of irqbypass
> > manager inside KVM or VFIO, we need trigger the build at a high
> > level and it should be built before VFIO and KVM. Any ideas?
> 
> We can add virt/Makefile and build virt/lib/ directly, not through
> arch/x86/kvm.

Thinking about this more, does that mean we need to add the virt directory
in the top Makefile in Linux tree?

Thanks,
Feng

> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer


> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Monday, September 21, 2015 8:07 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer
> 
> 
> 
> On 21/09/2015 13:35, Wu, Feng wrote:
> >>> > > I think the point is that we cannot trigger the build of irqbypass
> >>> > > manager inside KVM or VFIO, we need trigger the build at a high
> >>> > > level and it should be built before VFIO and KVM. Any ideas?
> >> >
> >> > We can add virt/Makefile and build virt/lib/ directly, not through
> >> > arch/x86/kvm.
> > Yes, that can solve the build error. Should I send a new version?
> 
> You can send a separate patch on top of this v9.

Sure, will do this soon!

Thanks,
Feng

> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer

Hi Paolo & Alex,

I find that there is a build error in the following two cases:
- KVM is configured as 'M' and VFIO as 'Y'
The reason is the build of irqbypass manager is triggered in
arch/x86/kvm/Makefile, and VFIO is built before KVM, hence
it cannot find the symbols in irqbypass manager.

- Disable KVM and enable VFIO in .config
The reason is similar with the above one, the irqbypass manager
is not built since KVM is not configured.

I think the point is that we cannot trigger the build of irqbypass
manager inside KVM or VFIO, we need trigger the build at a high
level and it should be built before VFIO and KVM. Any ideas?

Thanks,
Feng

> -Original Message-
> From: Wu, Feng
> Sent: Friday, September 18, 2015 10:30 PM
> To: pbonz...@redhat.com; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org; Wu, Feng
> Subject: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer
> 
> This patch adds the registration/unregistration of an
> irq_bypass_producer for MSI/MSIx on vfio pci devices.
> 
> Signed-off-by: Feng Wu <feng...@intel.com>
> ---
> v8:
> - Merge "[PATCH v7 08/17] vfio: Select IRQ_BYPASS_MANAGER for vfio PCI
> devices"
>   into this patch.
> 
> v6:
> - Make the add_consumer and del_consumer callbacks static
> - Remove pointless INIT_LIST_HEAD to 'vdev->ctx[vector].producer.node)'
> - Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
> - Remove optional dummy callbacks for irq producer
> 
>  drivers/vfio/pci/Kconfig| 1 +
>  drivers/vfio/pci/vfio_pci_intrs.c   | 9 +
>  drivers/vfio/pci/vfio_pci_private.h | 2 ++
>  3 files changed, 12 insertions(+)
> 
> diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
> index 579d83b..02912f1 100644
> --- a/drivers/vfio/pci/Kconfig
> +++ b/drivers/vfio/pci/Kconfig
> @@ -2,6 +2,7 @@ config VFIO_PCI
>   tristate "VFIO support for PCI devices"
>   depends on VFIO && PCI && EVENTFD
>   select VFIO_VIRQFD
> + select IRQ_BYPASS_MANAGER
>   help
> Support for the PCI VFIO bus driver.  This is required to make
> use of PCI drivers using the VFIO framework.
> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
> b/drivers/vfio/pci/vfio_pci_intrs.c
> index 1f577b4..c65299d 100644
> --- a/drivers/vfio/pci/vfio_pci_intrs.c
> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
> @@ -319,6 +319,7 @@ static int vfio_msi_set_vector_signal(struct
> vfio_pci_device *vdev,
> 
>   if (vdev->ctx[vector].trigger) {
>   free_irq(irq, vdev->ctx[vector].trigger);
> + irq_bypass_unregister_producer(>ctx[vector].producer);
>   kfree(vdev->ctx[vector].name);
>   eventfd_ctx_put(vdev->ctx[vector].trigger);
>   vdev->ctx[vector].trigger = NULL;
> @@ -360,6 +361,14 @@ static int vfio_msi_set_vector_signal(struct
> vfio_pci_device *vdev,
>   return ret;
>   }
> 
> + vdev->ctx[vector].producer.token = trigger;
> + vdev->ctx[vector].producer.irq = irq;
> + ret = irq_bypass_register_producer(>ctx[vector].producer);
> + if (unlikely(ret))
> + dev_info(>dev,
> + "irq bypass producer (token %p) registeration fails: %d\n",
> + vdev->ctx[vector].producer.token, ret);
> +
>   vdev->ctx[vector].trigger = trigger;
> 
>   return 0;
> diff --git a/drivers/vfio/pci/vfio_pci_private.h
> b/drivers/vfio/pci/vfio_pci_private.h
> index ae0e1b4..0e7394f 100644
> --- a/drivers/vfio/pci/vfio_pci_private.h
> +++ b/drivers/vfio/pci/vfio_pci_private.h
> @@ -13,6 +13,7 @@
> 
>  #include 
>  #include 
> +#include 
> 
>  #ifndef VFIO_PCI_PRIVATE_H
>  #define VFIO_PCI_PRIVATE_H
> @@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
>   struct virqfd   *mask;
>   char*name;
>   boolmasked;
> + struct irq_bypass_producer  producer;
>  };
> 
>  struct vfio_pci_device {
> --
> 2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-20 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:paolo.bonz...@gmail.com] On Behalf Of Paolo
> Bonzini
> Sent: Saturday, September 19, 2015 12:07 AM
> To: Wu, Feng; Alex Williamson; j...@8bytes.org; Marcelo Tosatti
> Cc: io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org; KVM list;
> Eric Auger
> Subject: Re: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when
> vCPU is blocked
> 
> 
> 
> On 18/09/2015 16:29, Feng Wu wrote:
> > This patch updates the Posted-Interrupts Descriptor when vCPU
> > is blocked.
> >
> > pre-block:
> > - Add the vCPU to the blocked per-CPU list
> > - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR
> >
> > post-block:
> > - Remove the vCPU from the per-CPU list
> >
> > Signed-off-by: Feng Wu
> <feng.wu-ral2jqcrhueavxtiumw...@public.gmane.org>
> > ---
> > v9:
> > - Add description for blocked_vcpu_on_cpu_lock in
> Documentation/virtual/kvm/locking.txt
> > - Check !kvm_arch_has_assigned_device(vcpu->kvm) first, then
> >   !irq_remapping_cap(IRQ_POSTING_CAP)
> >
> > v8:
> > - Rename 'pi_pre_block' to 'pre_block'
> > - Rename 'pi_post_block' to 'post_block'
> > - Change some comments
> > - Only add the vCPU to the blocking list when the VM has assigned devices.
> >
> >  Documentation/virtual/kvm/locking.txt |  12 +++
> >  arch/x86/include/asm/kvm_host.h   |  13 +++
> >  arch/x86/kvm/vmx.c| 153
> ++
> >  arch/x86/kvm/x86.c|  53 +---
> >  include/linux/kvm_host.h  |   3 +
> >  virt/kvm/kvm_main.c   |   3 +
> >  6 files changed, 227 insertions(+), 10 deletions(-)
> >
> > diff --git a/Documentation/virtual/kvm/locking.txt
> b/Documentation/virtual/kvm/locking.txt
> > index d68af4d..19f94a6 100644
> > --- a/Documentation/virtual/kvm/locking.txt
> > +++ b/Documentation/virtual/kvm/locking.txt
> > @@ -166,3 +166,15 @@ Comment:   The srcu read lock must be held while
> accessing memslots (e.g.
> > MMIO/PIO address->device structure mapping (kvm->buses).
> > The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
> > if it is needed by multiple functions.
> > +
> > +Name:  blocked_vcpu_on_cpu_lock
> > +Type:  spinlock_t
> > +Arch:  x86
> > +Protects:  blocked_vcpu_on_cpu
> > +Comment:   This is a per-CPU lock and it is used for VT-d 
> > posted-interrupts.
> > +   When VT-d posted-interrupts is supported and the VM has assigned
> > +   devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
> > +   protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
> > +   wakeup notification event since external interrupts from the
> > +   assigned devices happens, we will find the vCPU on the list to
> > +   wakeup.
> > diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> > index 0ddd353..304fbb5 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -552,6 +552,8 @@ struct kvm_vcpu_arch {
> >  */
> > bool write_fault_to_shadow_pgtable;
> >
> > +   bool halted;
> > +
> > /* set at EPT violation at this point */
> > unsigned long exit_qualification;
> >
> > @@ -864,6 +866,17 @@ struct kvm_x86_ops {
> > /* pmu operations of sub-arch */
> > const struct kvm_pmu_ops *pmu_ops;
> >
> > +   /*
> > +* Architecture specific hooks for vCPU blocking due to
> > +* HLT instruction.
> > +* Returns for .pre_block():
> > +*- 0 means continue to block the vCPU.
> > +*- 1 means we cannot block the vCPU since some event
> > +*happens during this period, such as, 'ON' bit in
> > +*posted-interrupts descriptor is set.
> > +*/
> > +   int (*pre_block)(struct kvm_vcpu *vcpu);
> > +   void (*post_block)(struct kvm_vcpu *vcpu);
> > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
> >   uint32_t guest_irq, bool set);
> >  };
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 902a67d..9968896 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -879,6 +879,13 @@ static DEFINE_PER_CPU(struct vmcs *,
> current_vmcs);
> >  static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
> >  static DEFINE_PER_CPU(struct des

RE: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-20 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:paolo.bonz...@gmail.com] On Behalf Of Paolo
> Bonzini
> Sent: Monday, September 21, 2015 1:33 PM
> To: Wu, Feng; Alex Williamson; j...@8bytes.org; Marcelo Tosatti
> Cc: io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org; KVM list;
> Eric Auger
> Subject: Re: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when
> vCPU is blocked
> 
> 
> 
> On 21/09/2015 04:16, Wu, Feng wrote:
> > I tested the above patch you suggested, it works fine. Thank you! So
> > do I need to resend a new version or you can handle it in your tree?
> 
> I will handle it.

Thanks a lot for your review on this series!

Thanks,
Feng

> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-19 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:paolo.bonz...@gmail.com] On Behalf Of Paolo
> Bonzini
> Sent: Saturday, September 19, 2015 12:07 AM
> To: Wu, Feng; Alex Williamson; j...@8bytes.org; Marcelo Tosatti
> Cc: io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org; KVM list;
> Eric Auger
> Subject: Re: [PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when
> vCPU is blocked
> 
> 
> 
> On 18/09/2015 16:29, Feng Wu wrote:
> > This patch updates the Posted-Interrupts Descriptor when vCPU
> > is blocked.
> >
> > pre-block:
> > - Add the vCPU to the blocked per-CPU list
> > - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR
> >
> > post-block:
> > - Remove the vCPU from the per-CPU list
> >
> > Signed-off-by: Feng Wu
> <feng.wu-ral2jqcrhueavxtiumw...@public.gmane.org>
> > ---
> > v9:
> > - Add description for blocked_vcpu_on_cpu_lock in
> Documentation/virtual/kvm/locking.txt
> > - Check !kvm_arch_has_assigned_device(vcpu->kvm) first, then
> >   !irq_remapping_cap(IRQ_POSTING_CAP)
> >
> > v8:
> > - Rename 'pi_pre_block' to 'pre_block'
> > - Rename 'pi_post_block' to 'post_block'
> > - Change some comments
> > - Only add the vCPU to the blocking list when the VM has assigned devices.
> >
> >  Documentation/virtual/kvm/locking.txt |  12 +++
> >  arch/x86/include/asm/kvm_host.h   |  13 +++
> >  arch/x86/kvm/vmx.c| 153
> ++
> >  arch/x86/kvm/x86.c|  53 +---
> >  include/linux/kvm_host.h  |   3 +
> >  virt/kvm/kvm_main.c   |   3 +
> >  6 files changed, 227 insertions(+), 10 deletions(-)
> >
> > diff --git a/Documentation/virtual/kvm/locking.txt
> b/Documentation/virtual/kvm/locking.txt
> > index d68af4d..19f94a6 100644
> > --- a/Documentation/virtual/kvm/locking.txt
> > +++ b/Documentation/virtual/kvm/locking.txt
> > @@ -166,3 +166,15 @@ Comment:   The srcu read lock must be held while
> accessing memslots (e.g.
> > MMIO/PIO address->device structure mapping (kvm->buses).
> > The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
> > if it is needed by multiple functions.
> > +
> > +Name:  blocked_vcpu_on_cpu_lock
> > +Type:  spinlock_t
> > +Arch:  x86
> > +Protects:  blocked_vcpu_on_cpu
> > +Comment:   This is a per-CPU lock and it is used for VT-d 
> > posted-interrupts.
> > +   When VT-d posted-interrupts is supported and the VM has assigned
> > +   devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
> > +   protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
> > +   wakeup notification event since external interrupts from the
> > +   assigned devices happens, we will find the vCPU on the list to
> > +   wakeup.
> > diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> > index 0ddd353..304fbb5 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -552,6 +552,8 @@ struct kvm_vcpu_arch {
> >  */
> > bool write_fault_to_shadow_pgtable;
> >
> > +   bool halted;
> > +
> > /* set at EPT violation at this point */
> > unsigned long exit_qualification;
> >
> > @@ -864,6 +866,17 @@ struct kvm_x86_ops {
> > /* pmu operations of sub-arch */
> > const struct kvm_pmu_ops *pmu_ops;
> >
> > +   /*
> > +* Architecture specific hooks for vCPU blocking due to
> > +* HLT instruction.
> > +* Returns for .pre_block():
> > +*- 0 means continue to block the vCPU.
> > +*- 1 means we cannot block the vCPU since some event
> > +*happens during this period, such as, 'ON' bit in
> > +*posted-interrupts descriptor is set.
> > +*/
> > +   int (*pre_block)(struct kvm_vcpu *vcpu);
> > +   void (*post_block)(struct kvm_vcpu *vcpu);
> > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
> >   uint32_t guest_irq, bool set);
> >  };
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 902a67d..9968896 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -879,6 +879,13 @@ static DEFINE_PER_CPU(struct vmcs *,
> current_vmcs);
> >  static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
> >  static DEFINE_PER_CPU(struct des

RE: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Friday, September 18, 2015 10:59 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including
> prerequisite series
> 
> 
> 
> On 18/09/2015 16:29, Feng Wu wrote:
> > VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
> > With VT-d Posted-Interrupts enabled, external interrupts from
> > direct-assigned devices can be delivered to guests without VMM
> > intervention when guest is running in non-root mode.
> >
> > You can find the VT-d Posted-Interrtups Spec. in the following URL:
> >
> http://www.intel.com/content/www/us/en/intelligent-systems/intel-technolog
> y/vt-directed-io-spec.html
> 
> Thanks.  I will squash patches 2 and 14 together, and drop patch 3.
> 
> Signed-off-bys are missing in patch 1 and 4.  The patches exist
> elsewhere in the mailing list archives, so not a big deal.  Or just
> reply to them with the S-o-b line.
> 

Thanks for your quick response, Paolo! I didn't change the code
in patch 1 and 4, do I need to add s-o-b, if needed, I can reply
the patches.

Thanks,
Feng

> Alex, can you ack the series and review patch 12?
> 
> Joerg, can you ack patch 18?
> 
> Paolo
> 
> > v9:
> > - Include the whole series:
> > [01/18]: irq bypasser manager
> > [02/18] - [06/18]: Common non-architecture part for VT-d PI and ARM side
> forwarded irq
> > [07/18] - [18/18]: VT-d PI part
> >
> > v8:
> > refer to the changelog in each patch
> >
> > v7:
> > * Define two weak irq bypass callbacks:
> >   - kvm_arch_irq_bypass_start()
> >   - kvm_arch_irq_bypass_stop()
> > * Remove the x86 dummy implementation of the above two functions.
> > * Print some useful information instead of WARN_ON() when the
> >   irq bypass consumer unregistration fails.
> > * Fix an issue when calling pi_pre_block and pi_post_block.
> >
> > v6:
> > * Rebase on 4.2.0-rc6
> > * Rebase on https://lkml.org/lkml/2015/8/6/526 and
> http://www.gossamer-threads.com/lists/linux/kernel/2235623
> > * Make the add_consumer and del_consumer callbacks static
> > * Remove pointless INIT_LIST_HEAD to 'vdev->ctx[vector].producer.node)'
> > * Use dev_info instead of WARN_ON() when irq_bypass_register_producer
> fails
> > * Remove optional dummy callbacks for irq producer
> >
> > v4:
> > * For lowest-priority interrupt, only support single-CPU destination
> > interrupts at the current stage, more common lowest priority support
> > will be added later.
> > * Accoring to Marcelo's suggestion, when vCPU is blocked, we handle
> > the posted-interrupts in the HLT emulation path.
> > * Some small changes (coding style, typo, add some code comments)
> >
> > v3:
> > * Adjust the Posted-interrupts Descriptor updating logic when vCPU is
> >   preempted or blocked.
> > * KVM_DEV_VFIO_DEVICE_POSTING_IRQ -->
> KVM_DEV_VFIO_DEVICE_POST_IRQ
> > * __KVM_HAVE_ARCH_KVM_VFIO_POSTING -->
> __KVM_HAVE_ARCH_KVM_VFIO_POST
> > * Add KVM_DEV_VFIO_DEVICE_UNPOST_IRQ attribute for VFIO irq, which
> >   can be used to change back to remapping mode.
> > * Fix typo
> >
> > v2:
> > * Use VFIO framework to enable this feature, the VFIO part of this series is
> >   base on Eric's patch "[PATCH v3 0/8] KVM-VFIO IRQ forward control"
> > * Rebase this patchset on
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git,
> >   then revise some irq logic based on the new hierarchy irqdomain patches
> provided
> >   by Jiang Liu <jiang@linux.intel.com>
> >
> >
> > *** BLURB HERE ***
> >
> > Alex Williamson (1):
> >   virt: IRQ bypass manager
> >
> > Eric Auger (4):
> >   KVM: arm/arm64: select IRQ_BYPASS_MANAGER
> >   KVM: create kvm_irqfd.h
> >   KVM: introduce kvm_arch functions for IRQ bypass
> >   KVM: eventfd: add irq bypass consumer management
> >
> > Feng Wu (13):
> >   KVM: x86: select IRQ_BYPASS_MANAGER
> >   KVM: Extend struct pi_desc for VT-d Posted-Interrupts
> >   KVM: Add some helper functions for Posted-Interrupts
> >   KVM: Define a new interface kvm_intr_is_single_vcpu()
> >   KVM: Make struct kvm_irq_routing_table accessible
> >   KVM: make kvm_set_msi_irq() public
> >   vfio: Register/unregister irq_bypass_producer
> >   KVM: x86: Update IR

RE: [PATCH v9 01/18] virt: IRQ bypass manager

Signed-off-by: Feng Wu <feng...@intel.com>

> -Original Message-
> From: iommu-boun...@lists.linux-foundation.org
> [mailto:iommu-boun...@lists.linux-foundation.org] On Behalf Of Feng Wu
> Sent: Friday, September 18, 2015 10:30 PM
> To: pbonz...@redhat.com; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org;
> kvm@vger.kernel.org; eric.au...@linaro.org
> Subject: [PATCH v9 01/18] virt: IRQ bypass manager
> 
> From: Alex Williamson <alex.william...@redhat.com>
> 
> When a physical I/O device is assigned to a virtual machine through
> facilities like VFIO and KVM, the interrupt for the device generally
> bounces through the host system before being injected into the VM.
> However, hardware technologies exist that often allow the host to be
> bypassed for some of these scenarios.  Intel Posted Interrupts allow
> the specified physical edge interrupts to be directly injected into a
> guest when delivered to a physical processor while the vCPU is
> running.  ARM IRQ Forwarding allows forwarded physical interrupts to
> be directly deactivated by the guest.
> 
> The IRQ bypass manager here is meant to provide the shim to connect
> interrupt producers, generally the host physical device driver, with
> interrupt consumers, generally the hypervisor, in order to configure
> these bypass mechanism.  To do this, we base the connection on a
> shared, opaque token.  For KVM-VFIO this is expected to be an
> eventfd_ctx since this is the connection we already use to connect an
> eventfd to an irqfd on the in-kernel path.  When a producer and
> consumer with matching tokens is found, callbacks via both registered
> participants allow the bypass facilities to be automatically enabled.
> 
> Signed-off-by: Alex Williamson <alex.william...@redhat.com>
> Reviewed-by: Eric Auger <eric.au...@linaro.org>
> Tested-by: Eric Auger <eric.au...@linaro.org>
> Tested-by: Feng Wu <feng...@intel.com>
> ---
> v4: All producer callbacks are optional, as with Intel PI, it's
> possible for the producer to be blissfully unaware of the bypass.
> 
>  MAINTAINERS   |   7 ++
>  include/linux/irqbypass.h |  90 
>  virt/lib/Kconfig  |   2 +
>  virt/lib/Makefile |   1 +
>  virt/lib/irqbypass.c  | 257
> ++
>  5 files changed, 357 insertions(+)
>  create mode 100644 include/linux/irqbypass.h
>  create mode 100644 virt/lib/Kconfig
>  create mode 100644 virt/lib/Makefile
>  create mode 100644 virt/lib/irqbypass.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a9ae6c1..10c8b2f 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -10963,6 +10963,13 @@ L:   net...@vger.kernel.org
>  S:   Maintained
>  F:   drivers/net/ethernet/via/via-velocity.*
> 
> +VIRT LIB
> +M:   Alex Williamson <alex.william...@redhat.com>
> +M:   Paolo Bonzini <pbonz...@redhat.com>
> +L:   kvm@vger.kernel.org
> +S:   Supported
> +F:   virt/lib/
> +
>  VIVID VIRTUAL VIDEO DRIVER
>  M:   Hans Verkuil <hverk...@xs4all.nl>
>  L:   linux-me...@vger.kernel.org
> diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
> new file mode 100644
> index 000..1551b5b
> --- /dev/null
> +++ b/include/linux/irqbypass.h
> @@ -0,0 +1,90 @@
> +/*
> + * IRQ offload/bypass manager
> + *
> + * Copyright (C) 2015 Red Hat, Inc.
> + * Copyright (c) 2015 Linaro Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#ifndef IRQBYPASS_H
> +#define IRQBYPASS_H
> +
> +#include 
> +
> +struct irq_bypass_consumer;
> +
> +/*
> + * Theory of operation
> + *
> + * The IRQ bypass manager is a simple set of lists and callbacks that allows
> + * IRQ producers (ex. physical interrupt sources) to be matched to IRQ
> + * consumers (ex. virtualization hardware that allows IRQ bypass or offload)
> + * via a shared token (ex. eventfd_ctx).  Producers and consumers register
> + * independently.  When a token match is found, the optional @stop callback
> + * will be called for each participant.  The pair will then be connected via
> + * the @add_* callbacks, and finally the optional @start callback will allow
> + * any final coordination.  When either participant is unregistered, the
> + * process is repeated using the @del_* callbacks in place of the @add_*
> + * callbacks.  Match tokens must be unique per producer/consumer, 1:N
> pairings
> + * are not supported.
> + */
> +
> +/**

RE: [PATCH v9 04/18] KVM: create kvm_irqfd.h

Signed-off-by: Feng Wu <feng...@intel.com>

> -Original Message-
> From: iommu-boun...@lists.linux-foundation.org
> [mailto:iommu-boun...@lists.linux-foundation.org] On Behalf Of Feng Wu
> Sent: Friday, September 18, 2015 10:30 PM
> To: pbonz...@redhat.com; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org;
> kvm@vger.kernel.org; eric.au...@linaro.org
> Subject: [PATCH v9 04/18] KVM: create kvm_irqfd.h
> 
> From: Eric Auger <eric.au...@linaro.org>
> 
> Move _irqfd_resampler and _irqfd struct declarations in a new
> public header: kvm_irqfd.h. They are respectively renamed into
> kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes
> will be used by architecture specific code, in the context of
> IRQ bypass manager integration.
> 
> Signed-off-by: Eric Auger <eric.au...@linaro.org>
> ---
>  include/linux/kvm_irqfd.h | 69 ++
>  virt/kvm/eventfd.c| 95 
> ---
>  2 files changed, 92 insertions(+), 72 deletions(-)
>  create mode 100644 include/linux/kvm_irqfd.h
> 
> diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
> new file mode 100644
> index 000..f926b39
> --- /dev/null
> +++ b/include/linux/kvm_irqfd.h
> @@ -0,0 +1,69 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * irqfd: Allows an fd to be used to inject an interrupt to the guest
> + * Credit goes to Avi Kivity for the original idea.
> + */
> +
> +#ifndef __LINUX_KVM_IRQFD_H
> +#define __LINUX_KVM_IRQFD_H
> +
> +#include 
> +#include 
> +
> +/*
> + * Resampling irqfds are a special variety of irqfds used to emulate
> + * level triggered interrupts.  The interrupt is asserted on eventfd
> + * trigger.  On acknowledgment through the irq ack notifier, the
> + * interrupt is de-asserted and userspace is notified through the
> + * resamplefd.  All resamplers on the same gsi are de-asserted
> + * together, so we don't need to track the state of each individual
> + * user.  We can also therefore share the same irq source ID.
> + */
> +struct kvm_kernel_irqfd_resampler {
> + struct kvm *kvm;
> + /*
> +  * List of resampling struct _irqfd objects sharing this gsi.
> +  * RCU list modified under kvm->irqfds.resampler_lock
> +  */
> + struct list_head list;
> + struct kvm_irq_ack_notifier notifier;
> + /*
> +  * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
> +  * resamplers among irqfds on the same gsi.
> +  * Accessed and modified under kvm->irqfds.resampler_lock
> +  */
> + struct list_head link;
> +};
> +
> +struct kvm_kernel_irqfd {
> + /* Used for MSI fast-path */
> + struct kvm *kvm;
> + wait_queue_t wait;
> + /* Update side is protected by irqfds.lock */
> + struct kvm_kernel_irq_routing_entry irq_entry;
> + seqcount_t irq_entry_sc;
> + /* Used for level IRQ fast-path */
> + int gsi;
> + struct work_struct inject;
> + /* The resampler used by this irqfd (resampler-only) */
> + struct kvm_kernel_irqfd_resampler *resampler;
> + /* Eventfd notified on resample (resampler-only) */
> + struct eventfd_ctx *resamplefd;
> + /* Entry in list of irqfds for a resampler (resampler-only) */
> + struct list_head resampler_link;
> + /* Used for setup/shutdown */
> + struct eventfd_ctx *eventfd;
> + struct list_head list;
> + poll_table pt;
> + struct work_struct shutdown;
> +};
> +
> +#endif /* __LINUX_KVM_IRQFD_H */
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 9ff4193..647ffb8 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -23,6 +23,7 @@
> 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -39,68 +40,14 @@
>  #include 
> 
>  #ifdef CONFIG_HAVE_KVM_IRQFD
> -/*
> - * 
> - * irqfd: Allows an fd to be used to inject an interrupt to the guest
> - *
> - * Credit goes to Avi Kivity for the original idea.
> - * -

RE: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Friday, September 18, 2015 11:21 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including
> prerequisite series
> 
> 
> 
> On 18/09/2015 17:08, Wu, Feng wrote:
> >
> >
> >> -Original Message-
> >> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> >> Sent: Friday, September 18, 2015 10:59 PM
> >> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> >> mtosa...@redhat.com
> >> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> >> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> >> Subject: Re: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - 
> >> including
> >> prerequisite series
> >>
> >>
> >>
> >> On 18/09/2015 16:29, Feng Wu wrote:
> >>> VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
> >>> With VT-d Posted-Interrupts enabled, external interrupts from
> >>> direct-assigned devices can be delivered to guests without VMM
> >>> intervention when guest is running in non-root mode.
> >>>
> >>> You can find the VT-d Posted-Interrtups Spec. in the following URL:
> >>>
> >>
> http://www.intel.com/content/www/us/en/intelligent-systems/intel-technolog
> >> y/vt-directed-io-spec.html
> >>
> >> Thanks.  I will squash patches 2 and 14 together, and drop patch 3.
> >>
> >> Signed-off-bys are missing in patch 1 and 4.  The patches exist
> >> elsewhere in the mailing list archives, so not a big deal.  Or just
> >> reply to them with the S-o-b line.
> >>
> >
> > Thanks for your quick response, Paolo! I didn't change the code
> > in patch 1 and 4, do I need to add s-o-b, if needed, I can reply
> > the patches.
> 
> Yes, the s-o-b just means that the code passed through your hands.

Done.
> 
> Note that I replied to patch 17, but no need to resend that one
> either---just mailing list discussion is enough.

Do you mean you replied to patch 17 just now, but I don't find your replies
in the mailing list.

Thanks,
Feng

> 
> Paolo
> 
> > Thanks,
> > Feng
> >
> >> Alex, can you ack the series and review patch 12?
> >>
> >> Joerg, can you ack patch 18?
> >>
> >> Paolo
> >>
> >>> v9:
> >>> - Include the whole series:
> >>> [01/18]: irq bypasser manager
> >>> [02/18] - [06/18]: Common non-architecture part for VT-d PI and ARM side
> >> forwarded irq
> >>> [07/18] - [18/18]: VT-d PI part
> >>>
> >>> v8:
> >>> refer to the changelog in each patch
> >>>
> >>> v7:
> >>> * Define two weak irq bypass callbacks:
> >>>   - kvm_arch_irq_bypass_start()
> >>>   - kvm_arch_irq_bypass_stop()
> >>> * Remove the x86 dummy implementation of the above two functions.
> >>> * Print some useful information instead of WARN_ON() when the
> >>>   irq bypass consumer unregistration fails.
> >>> * Fix an issue when calling pi_pre_block and pi_post_block.
> >>>
> >>> v6:
> >>> * Rebase on 4.2.0-rc6
> >>> * Rebase on https://lkml.org/lkml/2015/8/6/526 and
> >> http://www.gossamer-threads.com/lists/linux/kernel/2235623
> >>> * Make the add_consumer and del_consumer callbacks static
> >>> * Remove pointless INIT_LIST_HEAD to 'vdev->ctx[vector].producer.node)'
> >>> * Use dev_info instead of WARN_ON() when irq_bypass_register_producer
> >> fails
> >>> * Remove optional dummy callbacks for irq producer
> >>>
> >>> v4:
> >>> * For lowest-priority interrupt, only support single-CPU destination
> >>> interrupts at the current stage, more common lowest priority support
> >>> will be added later.
> >>> * Accoring to Marcelo's suggestion, when vCPU is blocked, we handle
> >>> the posted-interrupts in the HLT emulation path.
> >>> * Some small changes (coding style, typo, add some code comments)
> >>>
> >>> v3:
> >>> * Adjust the Posted-interrupts Descriptor updating logic when vCPU is
> >>>   preempted or blocked.
> >>> * KVM_DEV_VFIO_DEVICE_POSTING_IRQ --

RE: [PATCH v8 03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-17 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Thursday, September 17, 2015 5:42 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v8 03/13] KVM: Define a new interface
> kvm_intr_is_single_vcpu()
> 
> 
> 
> On 17/09/2015 05:17, Wu, Feng wrote:
> >>> > > + if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> >>> > > + if (irq->dest_id == 0xFF)
> >>> > > + goto out;
> >>> > > +
> >>> > > + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> >> >
> >> > Warning here is wrong, the guest can trigger it.
> > Could you please share more information about how the guest
> > triggers these conditions (including the following two), Thanks
> > a lot!
> 
> irq->dest_id is a 16-bit value, so it can be > 255.

Yes, irq->dest_id is defined as u32, but by looking the current KVM
code, seems desst_id is used as an u8 variable, even in x2apic mode
the dest_id will not beyond 255 (except for broadcast dest in in x2apic
mode). Correct me if I am wrong. Thanks a lot!

> 
> > +   if (!kvm_apic_logical_map_valid(map)) {
> > +   WARN_ON_ONCE(1);
> 
> Here, the guest can trigger it by setting a few APICs in flat mode and
> others in cluster mode, for example.

Oh, right, the logical map works only when the destination mode of all
the vCPUs are the same.

> 
> > +   if (cid >= ARRAY_SIZE(map->logical_map)) {
> > +   WARN_ON_ONCE(1);
> 
> In x2apic mode irq->dest_id could have bits 12..15 set.

cid is gotten from bit 16 ..31 of the ldr (in apic_logical_id()), and
in x2apic mode, ldr is constructed in kvm_apic_set_x2apic_id() as
below:

u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));

So in fact, cid is (id >> 4), I cannot think of why cid can beyond 15.
Do I miss something here? Thanks!

Thanks,
Feng

> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v8 03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-17 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Friday, September 18, 2015 12:00 AM
> To: Radim Krčmář
> Cc: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com; eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v8 03/13] KVM: Define a new interface
> kvm_intr_is_single_vcpu()
> 
> 
> 
> On 17/09/2015 17:58, Radim Krčmář wrote:
> > For interrupts from MSI and IOxAPIC:
> > - Flat logical interrupts are delivered as if we had natural
> >   (CPU0<->bit0, CPU1<->bit1, ...) flat logical xAPIC for first 8 VCPUs.
> > - Cluster logical doesn't work much, it's interpreted like flat logical.
> >   I didn't care about xAPIC cluster because Linux, the sole user of our
> >   paravirtualized x2APIC, doesn't configure it.
> >
> > I'll paste kvm_apic_mda() source for better explanation:
> >
> >   static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
> > struct kvm_lapic
> *target)
> >   {
> > bool ipi = source != NULL;
> > bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
> >
> > if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
> > return X2APIC_BROADCAST;
> >
> > return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
> >   }
> >
> > MSI/IOxAPIC interrupt means that source is NULL and if the target is in
> > x2APIC mode, the original 'dest_id' is returned as mda => a flat logical
> > xAPIC to 0x0f will get interpreted as (cluster) logical x2APIC 0xf in
> > kvm_apic_match_logical_addr().
> > xAPIC address are only 8 bit long so they always get delivered to x2APIC
> > cluster 0, where first 16 bits work like xAPIC flat logical mode.
> 
> Ok, I was wondering whether this was the correct interpretation.  Thanks!

Paolo, I don't think Radim clarify your concern, right? Since mda is 8-bit, it
is wrong with mda >> 16, this is your concern, right?

Thanks,
Feng

> 
> Paolo
N�r��yb�X��ǧv�^�)޺{.n�+h����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf

RE: [PATCH v8 03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-17 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Thursday, September 17, 2015 10:25 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org; Radim
> Krčmář
> Subject: Re: [PATCH v8 03/13] KVM: Define a new interface
> kvm_intr_is_single_vcpu()
> 
> >> On 17/09/2015 05:17, Wu, Feng wrote:
> >>>>>>> + if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> >>>>>>> + if (irq->dest_id == 0xFF)
> >>>>>>> + goto out;
> >>>>>>> +
> >>>>>>> + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> >>>>>
> >>>>> Warning here is wrong, the guest can trigger it.
> >>> Could you please share more information about how the guest
> >>> triggers these conditions (including the following two), Thanks
> >>> a lot!
> >>
> >> irq->dest_id is a 16-bit value, so it can be > 255.
> >
> > Yes, irq->dest_id is defined as u32, but by looking the current KVM
> > code, seems desst_id is used as an u8 variable, even in x2apic mode
> > the dest_id will not beyond 255 (except for broadcast dest in in x2apic
> > mode). Correct me if I am wrong. Thanks a lot!
> 
> Actually you're right, the MSI destination is only 8 bits.  I was
> confused because of
> 
> #defineMSI_ADDR_DEST_ID_MASK  0x000
> 
> in arch/x86/include/asm/msidef.h.  But there may be a bug, see below...
> 
> >>> + if (cid >= ARRAY_SIZE(map->logical_map)) {
> >>> + WARN_ON_ONCE(1);
> >>
> >> In x2apic mode irq->dest_id could have bits 12..15 set.
> >
> > cid is gotten from bit 16 ..31 of the ldr (in apic_logical_id()), and
> > in x2apic mode, ldr is constructed in kvm_apic_set_x2apic_id() as
> > below:
> >
> > u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
> >
> > So in fact, cid is (id >> 4), I cannot think of why cid can beyond 15.
> 
> I think kvm_apic_match_logical_addr for MSI and IOAPIC interrupts is
> buggy in x2apic mode.
> 
> It does:
> 
> if (apic_x2apic_mode(apic))
> return ((logical_id >> 16) == (mda >> 16))
>&& (logical_id & mda & 0x) != 0;
> 
> But mda is only 8-bits for MSI and IOAPIC interrupts.
> 
> Radim, should kvm_apic_mda also handle the !ipi && x2apic_mda && dest_id
> != APIC_BROADCAST case?  It never triggers with Linux because it uses
> only the physical mode (that's not super-easy to see;
> ioapic_set_affinity looks for the RTEs in irq_data->chip_data and that
> is allocated with kzalloc).
> 
> > Do I miss something here? Thanks!
> 
> No, you were right.
> 
> But still I think the WARNs are unnecessary; it is conceivable that some
> future chipset adds support for more than 8-bits in the dest_id.

No problem, I agree with it. Here I just want clarify some questions, thanks
for the elaboration!

Thanks,
Feng

> 
> Paolo
> 
> > Thanks,
> > Feng
> >
> >>
> >> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v8 09/13] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-09-16 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Wednesday, September 16, 2015 5:27 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v8 09/13] KVM: Add an arch specific hooks in 'struct
> kvm_kernel_irqfd'
> 
> 
> 
> On 16/09/2015 10:50, Feng Wu wrote:
> > +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
> > +  uint32_t guest_irq, bool set)
> > +{
> > +   return !kvm_x86_ops->update_pi_irte ? -EINVAL :
> > +   kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
> > +}
> > +
> 
> Just use "if" here.  No need to resend if this is the only comment.

I am sorry, I don't quite understand. Do you mean I don't need to include
this patch in v9? If so, what about other patches with your Reviewed-by?

Thanks,
Feng

> 
> >
> >  }
> > +int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
> > +   struct kvm *kvm, unsigned
> 
> Empty line after "}".
> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v8 03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-16 Thread Wu, Feng



> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Wednesday, September 16, 2015 5:23 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v8 03/13] KVM: Define a new interface
> kvm_intr_is_single_vcpu()
> 
> 
> 
> On 16/09/2015 10:49, Feng Wu wrote:
> > This patch defines a new interface kvm_intr_is_single_vcpu(),
> > which can returns whether the interrupt is for single-CPU or not.
> >
> > It is used by VT-d PI, since now we only support single-CPU
> > interrupts, For lowest-priority interrupts, if user configures
> > it via /proc/irq or uses irqbalance to make it single-CPU, we
> > can use PI to deliver the interrupts to it. Full functionality
> > of lowest-priority support will be added later.
> >
> > Signed-off-by: Feng Wu <feng...@intel.com>
> > ---
> > v8:
> > - Some optimizations in kvm_intr_is_single_vcpu().
> > - Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
> > - Add kvm_intr_is_single_vcpu_fast() as the fast path to find
> >   the target vCPU for the single-destination interrupt
> >
> >  arch/x86/include/asm/kvm_host.h |  3 ++
> >  arch/x86/kvm/irq_comm.c | 94
> +
> >  arch/x86/kvm/lapic.c|  5 +--
> >  arch/x86/kvm/lapic.h|  2 +
> >  4 files changed, 101 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> > index 49ec903..af11bca 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
> >  int x86_set_memory_region(struct kvm *kvm,
> >   const struct kvm_userspace_memory_region *mem);
> >
> > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> > +struct kvm_vcpu **dest_vcpu);
> > +
> >  #endif /* _ASM_X86_KVM_HOST_H */
> > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> > index 9efff9e..97ba1d6 100644
> > --- a/arch/x86/kvm/irq_comm.c
> > +++ b/arch/x86/kvm/irq_comm.c
> > @@ -297,6 +297,100 @@ out:
> > return r;
> >  }
> >
> > +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> > +struct kvm_lapic_irq *irq,
> > +struct kvm_vcpu **dest_vcpu)
> 
> Please put this in lapic.c, similar to kvm_irq_delivery_to_apic_fast, so
> that you do not have to export other functions.
> 
> > +{
> > +   struct kvm_apic_map *map;
> > +   bool ret = false;
> > +   struct kvm_lapic *dst = NULL;
> > +
> > +   if (irq->shorthand)
> > +   return false;
> > +
> > +   rcu_read_lock();
> > +   map = rcu_dereference(kvm->arch.apic_map);
> > +
> > +   if (!map)
> > +   goto out;
> > +
> > +   if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> > +   if (irq->dest_id == 0xFF)
> > +   goto out;
> > +
> > +   if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> 
> Warning here is wrong, the guest can trigger it.

Could you please share more information about how the guest
triggers these conditions (including the following two), Thanks
a lot!

Thanks,
Feng

> 
> > +   WARN_ON_ONCE(1);
> > +   goto out;
> > +   }
> > +
> > +   dst = map->phys_map[irq->dest_id];
> > +   if (dst && kvm_apic_present(dst->vcpu))
> > +   *dest_vcpu = dst->vcpu;
> > +   else
> > +   goto out;
> > +   } else {
> > +   u16 cid;
> > +   unsigned long bitmap = 1;
> > +   int i, r = 0;
> > +
> > +   if (!kvm_apic_logical_map_valid(map)) {
> > +   WARN_ON_ONCE(1);
> 
> Same here.
> 
> > +   goto out;
> > +   }
> > +
> > +   apic_logical_id(map, irq->dest_id, , (u16 *));
> > +
> > +   if (cid >= ARRAY_SIZE(map->logical_map)) {
> > +   WARN_ON_ONCE(1);
> 
> Same here.
> 
> Otherwise looks good.
> 
> Paolo
> 
> > +   goto out;
> > +   }
> > +
> > +   for_each_set_bit(

RE: [PATCH v7 15/17] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-14 Thread Wu, Feng

First of all, Paolo, thanks a lot for your review on this series, it really 
means a lot!:)

> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org
> [mailto:linux-kernel-ow...@vger.kernel.org] On Behalf Of Paolo Bonzini
> Sent: Friday, September 11, 2015 7:21 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v7 15/17] KVM: Update Posted-Interrupts Descriptor when
> vCPU is blocked
> 
> 
> 
> On 25/08/2015 10:50, Feng Wu wrote:
> > This patch updates the Posted-Interrupts Descriptor when vCPU
> > is blocked.
> >
> > pre-block:
> > - Add the vCPU to the blocked per-CPU list
> > - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR
> >
> > post-block:
> > - Remove the vCPU from the per-CPU list
> >
> > Signed-off-by: Feng Wu <feng...@intel.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |   5 ++
> >  arch/x86/kvm/vmx.c  | 151
> 
> >  arch/x86/kvm/x86.c  |  55 ---
> >  include/linux/kvm_host.h|   3 +
> >  virt/kvm/kvm_main.c |   3 +
> >  5 files changed, 207 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> > index 22269b4..32af275 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -554,6 +554,8 @@ struct kvm_vcpu_arch {
> >  */
> > bool write_fault_to_shadow_pgtable;
> >
> > +   bool halted;
> > +
> > /* set at EPT violation at this point */
> > unsigned long exit_qualification;
> >
> > @@ -868,6 +870,9 @@ struct kvm_x86_ops {
> >
> > void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
> > void (*pi_set_sn)(struct kvm_vcpu *vcpu);
> > +
> > +   int (*pi_pre_block)(struct kvm_vcpu *vcpu);
> > +   void (*pi_post_block)(struct kvm_vcpu *vcpu);
> 
> Just pre_block/post_block please.  Also, please document the return
> value of pre_block.
> 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index ef93fdc..fc7f222 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -5869,7 +5869,13 @@ int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
> >  {
> > ++vcpu->stat.halt_exits;
> > if (irqchip_in_kernel(vcpu->kvm)) {
> > -   vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
> > +   /* Handle posted-interrupt when vCPU is to be halted */
> > +   if (!kvm_x86_ops->pi_pre_block ||
> > +   (kvm_x86_ops->pi_pre_block &&
> 
> No need to test kvm_x86_ops->pi_pre_block again.
> 
> > +   kvm_x86_ops->pi_pre_block(vcpu) == 0)) {
> > +   vcpu->arch.halted = true;
> > +   vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
> > +   }
> > return 1;
> > } else {
> > vcpu->run->exit_reason = KVM_EXIT_HLT;
> > @@ -6518,6 +6524,21 @@ static int vcpu_enter_guest(struct kvm_vcpu
> *vcpu)
> > kvm_vcpu_reload_apic_access_page(vcpu);
> > }
> >
> > +   /*
> > +* Since posted-interrupts can be set by VT-d HW now, in this
> > +* case, KVM_REQ_EVENT is not set. We move the following
> > +* operations out of the if statement.
> > +*/
> 
> Just "KVM_REQ_EVENT is not set when posted interrupts are set by VT-d
> hardware, so we have to update RVI unconditionally", please.
> 
> Could we skip this (in a future patch) if PI.ON=0?

Do you mean only executing the following code when PI.ON == 1?
Maybe we cannot do that, since 'ON' can be cleared by hypervisor
in lots of places.

> 
> > +   if (kvm_lapic_enabled(vcpu)) {
> > +   /*
> > +* Update architecture specific hints for APIC
> > +* virtual interrupt delivery.
> > +*/
> > +   if (kvm_x86_ops->hwapic_irr_update)
> > +   kvm_x86_ops->hwapic_irr_update(vcpu,
> > +   kvm_lapic_find_highest_irr(vcpu));
> > +   }
> > +
> > if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > kvm_apic_accept_events(vcpu);
> > if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
> > @@ -6534,13 +6555,6 @@ static int vcpu_enter_guest(struct kvm_vcpu
> *vcpu)
> > kv

RE: [PATCH v7 10/17] KVM: x86: Update IRTE for posted-interrupts

2015-08-25 Thread Wu, Feng



 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Wednesday, August 26, 2015 3:58 AM
 To: Wu, Feng
 Cc: pbonz...@redhat.com; j...@8bytes.org; mtosa...@redhat.com;
 eric.au...@linaro.org; kvm@vger.kernel.org;
 io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
 Subject: Re: [PATCH v7 10/17] KVM: x86: Update IRTE for posted-interrupts
 
 On Tue, 2015-08-25 at 16:50 +0800, Feng Wu wrote:
  This patch adds the routine to update IRTE for posted-interrupts
  when guest changes the interrupt configuration.
 
  Signed-off-by: Feng Wu feng...@intel.com
  ---
   arch/x86/kvm/x86.c | 73
 ++
   1 file changed, 73 insertions(+)
  +   kvm_set_msi_irq(e, irq);
  +   if (!kvm_intr_is_single_vcpu(kvm, irq, vcpu))
  +   continue;
  +
  +   vcpu_info.pi_desc_addr = kvm_x86_ops-get_pi_desc_addr(vcpu);
  +   vcpu_info.vector = irq.vector;
  +
  +   if (set)
  +   ret = irq_set_vcpu_affinity(host_irq, vcpu_info);
  +   else {
  +   /* suppress notification event before unposting */
  +   kvm_x86_ops-pi_set_sn(vcpu);
  +   ret = irq_set_vcpu_affinity(host_irq, NULL);
  +   kvm_x86_ops-pi_clear_sn(vcpu);
  +   }
 
 Can we add trace events so that we have a way to tell when PI is being
 enabled/disabled other than performance heuristics?  Thanks,

Sure, I will add it.

Thanks,
Feng

 
 Alex

RE: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference

2015-08-07 Thread Wu, Feng

 -Original Message-
 From: Paolo Bonzini [mailto:paolo.bonz...@gmail.com] On Behalf Of Paolo
 Bonzini
 Sent: Friday, August 07, 2015 6:17 PM
 To: Wu, Feng; linux-ker...@vger.kernel.org; kvm@vger.kernel.org
 Cc: Steve Rutherford; rkrc...@redhat.com
 Subject: Re: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference

 On 07/08/2015 09:46, Wu, Feng wrote:
  If I understand it correctly, here you reserve the low part of the routing
  table, and insert entries with KVM_IRQ_ROUTING_MSI type in them,
  then you use this as a hint to KVM to set the EOI bit map. I have two
  concerns:

  - Currently, GSI 2 is used for MSI routing, I want to make sure after this
  patch, whether GSI 2 can still be used for _real_ MSI routing, if it can,
  does everything work correctly?

 The patch has no effect if you use the in-kernel IOAPIC.  If you use a
 userspace IOAPIC you won't be able to use GSI 2 for MSI routing because
 it falls in the reserved range.

Good to know this, it addresses my concern, thanks!

  - Now, KVM_IRQ_ROUTING_MSI and KVM_IRQ_ROUTING_IRQCHIP
  type entries cannot share the same map[gsi] (pls refer to the following
  code), so where should be the IOAPIC entries exist in the map[] array?

 With split irqchip, only KVM_IRQ_ROUTING_MSI is used.  Does this answer
 your question?

Ah, I got it, since the IOAPIC is in userspace, there is no need to add the 
IOAPIC
routing information in the routing table. Thanks for the clarification!

Thanks,
Feng

 Paolo

  static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 struct kvm_kernel_irq_routing_entry
 *e,
 const struct kvm_irq_routing_entry
 *ue)
  {

  ..

  /*
   * Do not allow GSI to be mapped to the same irqchip more than
 once.
   * Allow only one to one mapping between GSI and MSI.
   */
  hlist_for_each_entry(ei, rt-map[ue-gsi], link)
  if (ei-type == KVM_IRQ_ROUTING_MSI ||
  ue-type == KVM_IRQ_ROUTING_MSI ||
  ue-u.irqchip.irqchip == ei-irqchip.irqchip)
  return r;

  ..
  }
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference

2015-08-07 Thread Wu, Feng



 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
 Behalf Of Paolo Bonzini
 Sent: Wednesday, August 05, 2015 11:24 PM
 To: linux-ker...@vger.kernel.org; kvm@vger.kernel.org
 Cc: Steve Rutherford; rkrc...@redhat.com
 Subject: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference
 
 From: Steve Rutherford srutherf...@google.com
 
 In order to support a userspace IOAPIC interacting with an in kernel
 APIC, the EOI exit bitmaps need to be configurable.
 
 If the IOAPIC is in userspace (i.e. the irqchip has been split), the
 EOI exit bitmaps will be set whenever the GSI Routes are configured.
 In particular, for the low MSI routes are reservable for userspace
 IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
 destination vector of the route will be set for the destination VCPU.
 
 The intention is for the userspace IOAPICs to use the reservable MSI
 routes to inject interrupts into the guest.
 
 This is a slight abuse of the notion of an MSI Route, given that MSIs
 classically bypass the IOAPIC. It might be worthwhile to add an
 additional route type to improve clarity.
 
 Compile tested for Intel x86.
 
 Signed-off-by: Steve Rutherford srutherf...@google.com
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  Documentation/virtual/kvm/api.txt |  9 ++---
  arch/x86/include/asm/kvm_host.h   |  1 +
  arch/x86/kvm/ioapic.h |  2 ++
  arch/x86/kvm/irq_comm.c   | 42
 +++
  arch/x86/kvm/lapic.c  |  3 +--
  arch/x86/kvm/x86.c|  9 -
  include/linux/kvm_host.h  | 17 
  virt/kvm/irqchip.c| 12 ++-
  8 files changed, 79 insertions(+), 16 deletions(-)
 
 diff --git a/Documentation/virtual/kvm/api.txt
 b/Documentation/virtual/kvm/api.txt
 index bda6cb747b23..dcd748e2d46d 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -3635,7 +3635,7 @@ KVM handlers should exit to userspace with rc =
 -EREMOTE.
  7.5 KVM_CAP_SPLIT_IRQCHIP
 
  Architectures: x86
 -Parameters: None
 +Parameters: args[0] - number of routes reserved for userspace IOAPICs
  Returns: 0 on success, -1 on error
 
  Create a local apic for each processor in the kernel. This can be used
 @@ -3643,8 +3643,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace
 VMM wishes to emulate the
  IOAPIC and PIC (and also the PIT, even though this has to be enabled
  separately).
 
 -This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in
 kernel
 -IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
 +This capability also enables in kernel routing of interrupt requests;
 +when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type
 are
 +used in the IRQ routing table.  The first args[0] MSI routes are reserved
 +for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
 +a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
 
  Fails if VCPU has already been created, or if the irqchip is already in the
  kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
 diff --git a/arch/x86/include/asm/kvm_host.h
 b/arch/x86/include/asm/kvm_host.h
 index 4294722dfd1d..4bc714f7b164 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -687,6 +687,7 @@ struct kvm_arch {
   u64 disabled_quirks;
 
   bool irqchip_split;
 + u8 nr_reserved_ioapic_pins;
  };
 
  struct kvm_vm_stat {
 diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
 index a8842c0dee73..084617d37c74 100644
 --- a/arch/x86/kvm/ioapic.h
 +++ b/arch/x86/kvm/ioapic.h
 @@ -9,6 +9,7 @@ struct kvm;
  struct kvm_vcpu;
 
  #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
 +#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
  #define IOAPIC_VERSION_ID 0x11   /* IOAPIC version */
  #define IOAPIC_EDGE_TRIG  0
  #define IOAPIC_LEVEL_TRIG 1
 @@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
 kvm_lapic *src,
  int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
  int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
  void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
  #endif
 diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
 index 67f6b62a6814..177460998bb0 100644
 --- a/arch/x86/kvm/irq_comm.c
 +++ b/arch/x86/kvm/irq_comm.c
 @@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
  {
   return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
  }
 +
 +void kvm_arch_irq_routing_update(struct kvm *kvm)
 +{
 + if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
 + return;
 + kvm_make_scan_ioapic_request(kvm);
 +}
 +
 +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 +{
 + struct kvm *kvm = vcpu-kvm;
 + struct

RE: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference

2015-08-06 Thread Wu, Feng



 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
 Behalf Of Paolo Bonzini
 Sent: Wednesday, August 05, 2015 11:24 PM
 To: linux-ker...@vger.kernel.org; kvm@vger.kernel.org
 Cc: Steve Rutherford; rkrc...@redhat.com
 Subject: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference
 
 From: Steve Rutherford srutherf...@google.com
 
 In order to support a userspace IOAPIC interacting with an in kernel
 APIC, the EOI exit bitmaps need to be configurable.
 
 If the IOAPIC is in userspace (i.e. the irqchip has been split), the
 EOI exit bitmaps will be set whenever the GSI Routes are configured.
 In particular, for the low MSI routes are reservable for userspace
 IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
 destination vector of the route will be set for the destination VCPU.
 
 The intention is for the userspace IOAPICs to use the reservable MSI
 routes to inject interrupts into the guest.
 
 This is a slight abuse of the notion of an MSI Route, given that MSIs
 classically bypass the IOAPIC. It might be worthwhile to add an
 additional route type to improve clarity.
 
 Compile tested for Intel x86.
 
 Signed-off-by: Steve Rutherford srutherf...@google.com
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  Documentation/virtual/kvm/api.txt |  9 ++---
  arch/x86/include/asm/kvm_host.h   |  1 +
  arch/x86/kvm/ioapic.h |  2 ++
  arch/x86/kvm/irq_comm.c   | 42
 +++
  arch/x86/kvm/lapic.c  |  3 +--
  arch/x86/kvm/x86.c|  9 -
  include/linux/kvm_host.h  | 17 
  virt/kvm/irqchip.c| 12 ++-
  8 files changed, 79 insertions(+), 16 deletions(-)
 
 diff --git a/Documentation/virtual/kvm/api.txt
 b/Documentation/virtual/kvm/api.txt
 index bda6cb747b23..dcd748e2d46d 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -3635,7 +3635,7 @@ KVM handlers should exit to userspace with rc =
 -EREMOTE.
  7.5 KVM_CAP_SPLIT_IRQCHIP
 
  Architectures: x86
 -Parameters: None
 +Parameters: args[0] - number of routes reserved for userspace IOAPICs
  Returns: 0 on success, -1 on error
 
  Create a local apic for each processor in the kernel. This can be used
 @@ -3643,8 +3643,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace
 VMM wishes to emulate the
  IOAPIC and PIC (and also the PIT, even though this has to be enabled
  separately).
 
 -This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in
 kernel
 -IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
 +This capability also enables in kernel routing of interrupt requests;
 +when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type
 are
 +used in the IRQ routing table.  The first args[0] MSI routes are reserved
 +for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
 +a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
 
  Fails if VCPU has already been created, or if the irqchip is already in the
  kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
 diff --git a/arch/x86/include/asm/kvm_host.h
 b/arch/x86/include/asm/kvm_host.h
 index 4294722dfd1d..4bc714f7b164 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -687,6 +687,7 @@ struct kvm_arch {
   u64 disabled_quirks;
 
   bool irqchip_split;
 + u8 nr_reserved_ioapic_pins;
  };
 
  struct kvm_vm_stat {
 diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
 index a8842c0dee73..084617d37c74 100644
 --- a/arch/x86/kvm/ioapic.h
 +++ b/arch/x86/kvm/ioapic.h
 @@ -9,6 +9,7 @@ struct kvm;
  struct kvm_vcpu;
 
  #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
 +#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
  #define IOAPIC_VERSION_ID 0x11   /* IOAPIC version */
  #define IOAPIC_EDGE_TRIG  0
  #define IOAPIC_LEVEL_TRIG 1
 @@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
 kvm_lapic *src,
  int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
  int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
  void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
  #endif
 diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
 index 67f6b62a6814..177460998bb0 100644
 --- a/arch/x86/kvm/irq_comm.c
 +++ b/arch/x86/kvm/irq_comm.c
 @@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
  {
   return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
  }
 +
 +void kvm_arch_irq_routing_update(struct kvm *kvm)
 +{
 + if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
 + return;
 + kvm_make_scan_ioapic_request(kvm);
 +}
 +
 +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 +{
 + struct kvm *kvm = vcpu-kvm;
 + struct

RE: [PATCH 5/9] KVM: x86: unify handling of interrupt window

2015-08-06 Thread Wu, Feng



 -Original Message-
 From: linux-kernel-ow...@vger.kernel.org
 [mailto:linux-kernel-ow...@vger.kernel.org] On Behalf Of Paolo Bonzini
 Sent: Wednesday, August 05, 2015 11:24 PM
 To: linux-ker...@vger.kernel.org; kvm@vger.kernel.org
 Cc: Steve Rutherford; rkrc...@redhat.com
 Subject: [PATCH 5/9] KVM: x86: unify handling of interrupt window
 
 The interrupt window is currently checked twice, once in vmx.c/svm.c and
 once in dm_request_for_irq_injection.  The only difference is the extra
 check for kvm_arch_interrupt_allowed in dm_request_for_irq_injection,
 and the different return value (EINTR/KVM_EXIT_INTR for vmx.c/svm.c vs.
 0/KVM_EXIT_IRQ_WINDOW_OPEN for dm_request_for_irq_injection).
 
 However, dm_request_for_irq_injection is basically dead code!  Revive it
 by removing the checks in vmx.c and svm.c's vmexit handlers, and
 fixing the returned values for the dm_request_for_irq_injection case.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  arch/x86/kvm/svm.c | 13 -
  arch/x86/kvm/vmx.c | 11 ---
  arch/x86/kvm/x86.c |  4 ++--
  3 files changed, 2 insertions(+), 26 deletions(-)
 
 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
 index 222439fd73d4..189e46479dd5 100644
 --- a/arch/x86/kvm/svm.c
 +++ b/arch/x86/kvm/svm.c
 @@ -3396,24 +3396,11 @@ static int msr_interception(struct vcpu_svm
 *svm)
 
  static int interrupt_window_interception(struct vcpu_svm *svm)
  {
 - struct kvm_run *kvm_run = svm-vcpu.run;
 -
   kvm_make_request(KVM_REQ_EVENT, svm-vcpu);
   svm_clear_vintr(svm);
   svm-vmcb-control.int_ctl = ~V_IRQ_MASK;
   mark_dirty(svm-vmcb, VMCB_INTR);
   ++svm-vcpu.stat.irq_window_exits;
 - /*
 -  * If the user space waits to inject interrupts, exit as soon as
 -  * possible
 -  */
 - if (!lapic_in_kernel(svm-vcpu) 
 - kvm_run-request_interrupt_window 
 - !kvm_cpu_has_interrupt(svm-vcpu)) {
 - kvm_run-exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 - return 0;
 - }
 -
   return 1;
  }
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index ef15dc72284b..4cf25b90dbe0 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -5524,17 +5524,6 @@ static int handle_interrupt_window(struct
 kvm_vcpu *vcpu)
   kvm_make_request(KVM_REQ_EVENT, vcpu);
 
   ++vcpu-stat.irq_window_exits;
 -
 - /*
 -  * If the user space waits to inject interrupts, exit as soon as
 -  * possible
 -  */
 - if (!lapic_in_kernel(vcpu) 
 - vcpu-run-request_interrupt_window 
 - !kvm_cpu_has_interrupt(vcpu)) {
 - vcpu-run-exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 - return 0;
 - }
   return 1;
  }

Is it possible to adjust the code here and remove the later checking in x86.c?
In that case, we can avoid calling dm_request_for_irq_injection(vcpu) for
VM exit. Basically dm_request_for_irq_injection() did the following
checks:

- The same as in handle_interrupt_window() below:

if (!irqchip_in_kernel(vcpu-kvm) 
vcpu-run-request_interrupt_window 
!kvm_cpu_has_interrupt(vcpu))

- kvm_arch_interrupt_allowed(vcpu), in which, most of the conditions
are guaranteed by interrupt window exits, the only one I am not sure
how to handle is ' to_vmx(vcpu)-nested.nested_run_pending'.

Thanks,
Feng

 
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index daa6d4fe97fe..62362fed4169 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -6475,8 +6475,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
   kvm_inject_pending_timer_irqs(vcpu);
 
   if (dm_request_for_irq_injection(vcpu)) {
 - r = -EINTR;
 - vcpu-run-exit_reason = KVM_EXIT_INTR;
 + r = 0;
 + vcpu-run-exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
   ++vcpu-stat.request_irq_exits;
   break;
   }
 --
 1.8.3.1
 
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v3] virt: IRQ bypass manager

2015-08-05 Thread Wu, Feng

 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Thursday, August 06, 2015 6:08 AM
 To: linux-ker...@vger.kernel.org; kvm@vger.kernel.org
 Cc: eric.au...@st.com; eric.au...@linaro.org; j...@8bytes.org;
 avi.kiv...@gmail.com; pbonz...@redhat.com; Wu, Feng
 Subject: [PATCH v3] virt: IRQ bypass manager

 When a physical I/O device is assigned to a virtual machine through
 facilities like VFIO and KVM, the interrupt for the device generally
 bounces through the host system before being injected into the VM.
 However, hardware technologies exist that often allow the host to be
 bypassed for some of these scenarios.  Intel Posted Interrupts allow
 the specified physical edge interrupts to be directly injected into a
 guest when delivered to a physical processor while the vCPU is
 running.  ARM IRQ Forwarding allows forwarded physical interrupts to
 be directly deactivated by the guest.

 The IRQ bypass manager here is meant to provide the shim to connect
 interrupt producers, generally the host physical device driver, with
 interrupt consumers, generally the hypervisor, in order to configure
 these bypass mechanism.  To do this, we base the connection on a
 shared, opaque token.  For KVM-VFIO this is expected to be an
 eventfd_ctx since this is the connection we already use to connect an
 eventfd to an irqfd on the in-kernel path.  When a producer and
 consumer with matching tokens is found, callbacks via both registered
 participants allow the bypass facilities to be automatically enabled.

 Signed-off-by: Alex Williamson alex.william...@redhat.com
 ---

 v3: Fix list_for_each_entry issue noted by Eric

 Do we want to revisit whether add and del are required callbacks?
 Maybe they should only be required for the consumer?  If Feng already
 doesn't require them for Intel PI, let's not impose stub callback
 requirements.

Since both irq_bypass_register_producer() and irq_bypass_register_consumer()
call __connect(), which then calls add_consumer() and add_producer(), only 
providing
the add/del callbacks for 'struct irq_bypass_consumer' is fine to me. Thanks!

BTW, could you please have a look at the common part patches v4 sent by Eric:
[PATCH v4 0/5] KVM: irqfd consumer based on IRQ bypass manager

If it looks fine to your guys, we can continue for the arch specific work, 
thank you!

Thanks,
Feng

  MAINTAINERS   |7 +
  include/linux/irqbypass.h |   90 
  virt/lib/Kconfig  |2
  virt/lib/Makefile |1
  virt/lib/irqbypass.c  |  256
 +
  5 files changed, 356 insertions(+)
  create mode 100644 include/linux/irqbypass.h
  create mode 100644 virt/lib/Kconfig
  create mode 100644 virt/lib/Makefile
  create mode 100644 virt/lib/irqbypass.c

 diff --git a/MAINTAINERS b/MAINTAINERS
 index a9ae6c1..10c8b2f 100644
 --- a/MAINTAINERS
 +++ b/MAINTAINERS
 @@ -10963,6 +10963,13 @@ L:   net...@vger.kernel.org
  S:   Maintained
  F:   drivers/net/ethernet/via/via-velocity.*

 +VIRT LIB
 +M:   Alex Williamson alex.william...@redhat.com
 +M:   Paolo Bonzini pbonz...@redhat.com
 +L:   kvm@vger.kernel.org
 +S:   Supported
 +F:   virt/lib/
 +
  VIVID VIRTUAL VIDEO DRIVER
  M:   Hans Verkuil hverk...@xs4all.nl
  L:   linux-me...@vger.kernel.org
 diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
 new file mode 100644
 index 000..fde7b64
 --- /dev/null
 +++ b/include/linux/irqbypass.h
 @@ -0,0 +1,90 @@
 +/*
 + * IRQ offload/bypass manager
 + *
 + * Copyright (C) 2015 Red Hat, Inc.
 + * Copyright (c) 2015 Linaro Ltd.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2 as
 + * published by the Free Software Foundation.
 + */
 +#ifndef IRQBYPASS_H
 +#define IRQBYPASS_H
 +
 +#include linux/list.h
 +
 +struct irq_bypass_consumer;
 +
 +/*
 + * Theory of operation
 + *
 + * The IRQ bypass manager is a simple set of lists and callbacks that allows
 + * IRQ producers (ex. physical interrupt sources) to be matched to IRQ
 + * consumers (ex. virtualization hardware that allows IRQ bypass or offload)
 + * via a shared token (ex. eventfd_ctx).  Producers and consumers register
 + * independently.  When a token match is found, the optional @stop callback
 + * will be called for each participant.  The pair will then be connected via
 + * the @add_* callbacks, and finally the optional @start callback will allow
 + * any final coordination.  When either participant is unregistered, the
 + * process is repeated using the @del_* callbacks in place of the @add_*
 + * callbacks.  Match tokens must be unique per producer/consumer, 1:N
 pairings
 + * are not supported.
 + */
 +
 +/**
 + * struct irq_bypass_producer - IRQ bypass producer definition
 + * @node: IRQ bypass manager private list management
 + * @token: opaque token to match between producer and consumer
 + * @irq: Linux IRQ

RE: [v5 15/19] KVM: eventfd: add irq bypass consumer management

2015-07-27 Thread Wu, Feng

 -Original Message-
 From: Paolo Bonzini [mailto:pbonz...@redhat.com]
 Sent: Monday, July 13, 2015 9:47 PM
 To: Eric Auger; Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
 Cc: alex.william...@redhat.com; j...@8bytes.org
 Subject: Re: [v5 15/19] KVM: eventfd: add irq bypass consumer management

  13/07/2015 15:16, Eric Auger wrote:

   +irqfd-consumer.token = (void *)irqfd-eventfd;
   +kvm_arch_irq_consumer_init(irqfd-consumer);
  what if the architecture does not implement kvm_arch_irq_consumer_init?

  Also you are using here this single function kvm_arch_irq_consumer_init
  to do some irq bypass manager settings + attaching your
  irqfd-arch_update cb which does not really relate to IRQ bypass
  manager. I think I preferred the approach where start/top/add/del were
  exposed separately ([RFC v2 5/6] KVM: introduce kvm_arch functions for
  IRQ bypass).

  Why not adding another kvm_arch_irq_routing_update then, not necessarily
  linked to irq bypass manager.

 Yes, I also preferred the dummy kvm_arch_* functions to this approach
 with an init function.  You'd have to add dummy init functions anyway
 for non-ARM, non-x86 architectures.

I think dummy kvm_arch_* is okay for me. However, my point is that currently
'add_producer ' and 'del_producer ' are mandatory, other callbacks are optional.
In patch [RFC v2 5/6] KVM: introduce kvm_arch functions for IRQ bypass  and
[RFC v2 6/6] KVM: eventfd: add irq bypass consumer management , it
provides all the callbacks, which means we need to implement dummy arch
specific functions no matter it is necessary. In that case, seems it is 
pointless
to make some of the callbacks optional. Anyway, if you guys are fine with the
dummy approach, I am good! :)

Thanks,
Feng

 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC v2 0/6] IRQ bypass manager and irqfd consumer

2015-07-23 Thread Wu, Feng



 -Original Message-
 From: linux-kernel-ow...@vger.kernel.org
 [mailto:linux-kernel-ow...@vger.kernel.org] On Behalf Of Eric Auger
 Sent: Monday, July 06, 2015 8:11 PM
 To: eric.au...@st.com; eric.au...@linaro.org;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
 mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
 b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: [RFC v2 0/6] IRQ bypass manager and irqfd consumer
 
 This series introduces and extends the IRQ bypass manager written
 by Alex and transforms irqfd into an IRQ bypass manager consumer.
 The producer part, in my case the VFIO platform driver will be introduced
 separately. That code should be usable by both ARM IRQ forwarding
 and Intel Posted Interrupts.
 
 The IRQ bypass manager enables to setup a negotiated link between an
 IRQ producer and an IRQ consumer (typically the VFIO driver and KVM irqfd).
 
 The series currently includes Alex's code which was sent my email.
 Its structure obvioulsy will adapt to Alex's will.
 
 Also the irq bypass manager gets compiled/linked on arm/arm64 along
 with KVM and VFIO platform driver.
 
 can be found at:
 https://git.linaro.org/people/eric.auger/linux.git/shortlog/refs/heads/v4.2-rc1-
 bypass-fwd-v2
 
 Best Regards
 
 Eric
 
 History:
 v1 - v2:
 - isolate the bypass manager and irqfd consumer in this series
 - take into account Paolo's comments and use container_of strategy and
   remove additional fields introduced in v1.
 - create kvm_irqfd.h
 - add unregistration in irqfd_shutdown

Hi Eric,

[4/6], [5/6], and [6/6] of this series are common to forwarded irq and posted
interrupts, did you have a chance to get a new version of them based on
Alex's latest irqbypass manager patch:

https://lkml.org/lkml/2015/7/16/810

Thanks a lot!

Thanks,
Feng

 
 v1: originally part of [RFC 00/17] ARM IRQ forward control based on IRQ
 bypass manager (https://lkml.org/lkml/2015/7/2/268)
 
 
 Eric Auger (6):
   KVM: arm/arm64: select IRQ_BYPASS_MANAGER
   VFIO: platform: select IRQ_BYPASS_MANAGER
   irq: bypass: Extend skeleton for ARM forwarding control
   KVM: create kvm_irqfd.h
   KVM: introduce kvm_arch functions for IRQ bypass
   KVM: eventfd: add irq bypass consumer management
 
  arch/arm/kvm/Kconfig  |   1 +
  arch/arm64/kvm/Kconfig|   1 +
  drivers/vfio/platform/Kconfig |   1 +
  include/linux/irqbypass.h |  19 ++--
  include/linux/kvm_host.h  |  37 ++
  include/linux/kvm_irqfd.h |  70 +++
  kernel/irq/bypass.c   |  44 +++--
  virt/kvm/Kconfig  |   3 ++
  virt/kvm/eventfd.c| 109 
 +-
  9 files changed, 203 insertions(+), 82 deletions(-)
  create mode 100644 include/linux/kvm_irqfd.h
 
 --
 1.9.1
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [v5 00/19] Add VT-d Posted-Interrupts support

2015-07-13 Thread Wu, Feng



 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Monday, July 13, 2015 9:19 PM
 To: Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
 Cc: pbonz...@redhat.com; alex.william...@redhat.com; j...@8bytes.org
 Subject: Re: [v5 00/19] Add VT-d Posted-Interrupts support
 
 Hi Feng,
 On 07/13/2015 11:47 AM, Feng Wu wrote:
  VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
  With VT-d Posted-Interrupts enabled, external interrupts from
  direct-assigned devices can be delivered to guests without VMM
  intervention when guest is running in non-root mode.
 
  You can find the VT-d Posted-Interrtups Spec. in the following URL:
 
 http://www.intel.com/content/www/us/en/intelligent-systems/intel-technolog
 y/vt-directed-io-spec.html
 
  This series was part of
 http://thread.gmane.org/gmane.linux.kernel.iommu/7708. To make things
 clear, send out IOMMU part here.
 
  This patch-set is based on the lastest x86/apic branch of tip tree.
 
  Divide the whole series which contain multiple components into three parts:
  - Prerequisite changes to irq subsystem (already merged)
  - IOMMU part (already merged)
  - KVM and VFIO parts (this series)
 
  v5:
  - Based on Alex and Eric's irq bypass manager:
  https://lkml.org/lkml/2015/7/10/663
  - Reuse some common patch from Eric
 
 A comment about the overall structure. Previously you prefered to have 2
 separate series, one usable by both of us and one with my forwarding
 stuff. Why did you change your mind?

I didn't change my mind, since alex sent out the latest irq bypass manger
patch, in which, some callbacks are renamed and some are changed to
optional, I feel there may need some changes to your patch below:
[RFC v2 0/6] IRQ bypass manager and irqfd consumer

So I integrate it here, sorry for the inconvenience. Could you please send
Out a new version of this patch-set, then I can follow it. Thanks a lot!

Thanks,
Feng

 
 Best Regards
 
 Eric
 
  Eric Auger (3):
KVM: create kvm_irqfd.h
KVM: eventfd: add irq bypass information in irqfd
KVM: eventfd: add irq bypass consumer management
 
  Feng Wu (16):
KVM: Extend struct pi_desc for VT-d Posted-Interrupts
KVM: Add some helper functions for Posted-Interrupts
KVM: Define a new interface kvm_intr_is_single_vcpu()
KVM: Get Posted-Interrupts descriptor address from struct kvm_vcpu
KVM: Add interfaces to control PI outside vmx
KVM: Make struct kvm_irq_routing_table accessible
KVM: make kvm_set_msi_irq() public
vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices
vfio: Register/unregister irq_bypass_producer
KVM, x86: Select IRQ_BYPASS_MANAGER for KVM_INTEL
KVM: x86: Update IRTE for posted-interrupts
KVM: x86: Add arch specific routines for irqbypass manager
KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'
KVM: Update Posted-Interrupts Descriptor when vCPU is preempted
KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
KVM: Warn if 'SN' is set during posting interrupts by software
 
   arch/x86/include/asm/kvm_host.h |  15 ++
   arch/x86/kvm/Kconfig|   1 +
   arch/x86/kvm/irq_comm.c |  28 +++-
   arch/x86/kvm/vmx.c  | 278
 +++-
   arch/x86/kvm/x86.c  | 160 +++--
   drivers/vfio/pci/Kconfig|   1 +
   drivers/vfio/pci/vfio_pci_intrs.c   |  19 +++
   drivers/vfio/pci/vfio_pci_private.h |   2 +
   include/linux/kvm_host.h|  23 +++
   include/linux/kvm_irqfd.h   |  74 ++
   virt/kvm/eventfd.c  | 115 ++-
   virt/kvm/irqchip.c  |  11 --
   virt/kvm/kvm_main.c |   3 +
   13 files changed, 632 insertions(+), 98 deletions(-)
   create mode 100644 include/linux/kvm_irqfd.h
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC v1 5/5] Call irqbypass update routine when updating irqfd

2015-07-10 Thread Wu, Feng

 -Original Message-
 From: Wu, Feng
 Sent: Friday, July 10, 2015 12:13 PM
 To: Alex Williamson
 Cc: kvm@vger.kernel.org; pbonz...@redhat.com; j...@8bytes.org;
 avi.kiv...@gmail.com; eric.au...@linaro.org; Wu, Feng
 Subject: RE: [RFC v1 5/5] Call irqbypass update routine when updating irqfd

  -Original Message-
  From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
  Behalf Of Alex Williamson
  Sent: Friday, July 10, 2015 11:26 AM
  To: Wu, Feng
  Cc: kvm@vger.kernel.org; pbonz...@redhat.com; j...@8bytes.org;
  avi.kiv...@gmail.com; eric.au...@linaro.org
  Subject: Re: [RFC v1 5/5] Call irqbypass update routine when updating irqfd

  On Fri, 2015-07-10 at 11:00 +0800, Feng Wu wrote:
   Call update routine when updating irqfd, this can update the
   IRTE for Intel posted-interrupts.

   Signed-off-by: Feng Wu feng...@intel.com
   ---
virt/kvm/eventfd.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

   diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
   index a32cf6c..1226835 100644
   --- a/virt/kvm/eventfd.c
   +++ b/virt/kvm/eventfd.c
   @@ -570,8 +570,10 @@ void kvm_irq_routing_update(struct kvm *kvm)

 spin_lock_irq(kvm-irqfds.lock);

   - list_for_each_entry(irqfd, kvm-irqfds.items, list)
   + list_for_each_entry(irqfd, kvm-irqfds.items, list) {
 irqfd_update(kvm, irqfd);
   + irqfd-consumer.update(irqfd-consumer);
   + }

 spin_unlock_irq(kvm-irqfds.lock);
}

  I don't understand why the irq bypass manager needs to know about this
  update callback.  We could just as easily make it be a function pointer
  on the irqfd structure or maybe just open code it.  It's defined by the
  consumer and called by the consumer, the irq bypass manager shouldn't
  know about it.  Thanks,

 Yes, you are right. All we need is the producer information which has been
 passed in the register routine. And we can easily make this update logic
 inside the consumer. Thanks for your comments!

 Thanks,
 Feng

BTW, Paolo  Alex, in VFIO framework, how can we know a vCPU or a guest
has assigned devices to it?

Thanks,
Feng

  Alex

  --
  To unsubscribe from this list: send the line unsubscribe kvm in
  the body of a message to majord...@vger.kernel.org
  More majordomo info at  http://vger.kernel.org/majordomo-info.html
N�r��yb�X��ǧv�^�)޺{.n�+h����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf

RE: [RFC v1 5/5] Call irqbypass update routine when updating irqfd

2015-07-09 Thread Wu, Feng

 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
 Behalf Of Alex Williamson
 Sent: Friday, July 10, 2015 11:26 AM
 To: Wu, Feng
 Cc: kvm@vger.kernel.org; pbonz...@redhat.com; j...@8bytes.org;
 avi.kiv...@gmail.com; eric.au...@linaro.org
 Subject: Re: [RFC v1 5/5] Call irqbypass update routine when updating irqfd

 On Fri, 2015-07-10 at 11:00 +0800, Feng Wu wrote:
  Call update routine when updating irqfd, this can update the
  IRTE for Intel posted-interrupts.

  Signed-off-by: Feng Wu feng...@intel.com
  ---
   virt/kvm/eventfd.c | 4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)

  diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
  index a32cf6c..1226835 100644
  --- a/virt/kvm/eventfd.c
  +++ b/virt/kvm/eventfd.c
  @@ -570,8 +570,10 @@ void kvm_irq_routing_update(struct kvm *kvm)

  spin_lock_irq(kvm-irqfds.lock);

  -   list_for_each_entry(irqfd, kvm-irqfds.items, list)
  +   list_for_each_entry(irqfd, kvm-irqfds.items, list) {
  irqfd_update(kvm, irqfd);
  +   irqfd-consumer.update(irqfd-consumer);
  +   }

  spin_unlock_irq(kvm-irqfds.lock);
   }

 I don't understand why the irq bypass manager needs to know about this
 update callback.  We could just as easily make it be a function pointer
 on the irqfd structure or maybe just open code it.  It's defined by the
 consumer and called by the consumer, the irq bypass manager shouldn't
 know about it.  Thanks,

Yes, you are right. All we need is the producer information which has been
passed in the register routine. And we can easily make this update logic
inside the consumer. Thanks for your comments!

Thanks,
Feng

 Alex

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC PATCH] irq: IRQ bypass manager

2015-07-08 Thread Wu, Feng

 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Wednesday, July 08, 2015 5:40 AM
 To: linux-ker...@vger.kernel.org; kvm@vger.kernel.org
 Cc: eric.au...@st.com; eric.au...@linaro.org; j...@8bytes.org;
 avi.kiv...@gmail.com; pbonz...@redhat.com; Wu, Feng
 Subject: [RFC PATCH] irq: IRQ bypass manager

 When a physical I/O device is assigned to a virtual machine through
 facilities like VFIO and KVM, the interrupt for the device generally
 bounces through the host system before being injected into the VM.
 However, hardware technologies exist that often allow the host to be
 bypassed for some of these scenarios.  Intel Posted Interrupts allow
 the specified physical edge interrupts to be directly injected into a
 guest when delivered to a physical processor while the vCPU is
 running.  ARM IRQ Forwarding allows the hypervisor to handle level
 triggered device interrupts as edge interrupts, by giving the guest
 control of de-asserting and unmasking the interrupt line.

 The IRQ bypass manager here is meant to provide the shim to connect
 interrupt producers, generally the host physical device driver, with
 interrupt consumers, generally the hypervisor, in order to configure
 these bypass mechanism.  To do this, we base the connection on a
 shared, opaque token.  For KVM-VFIO this is expected to be an
 eventfd_ctx since this is the connection we already use to connect an
 eventfd to an irqfd on the in-kernel path.  When a producer and
 consumer with matching tokens is found, callbacks via both registered
 participants allow the bypass facilities to be automatically enabled.

My Pi patches can work well based on this one and the one Eric sent
out earlier. Alex, what should we do in the next step to speed up the
upstreaming process?

Thanks,
Feng

 Signed-off-by: Alex Williamson alex.william...@redhat.com
 Cc: Eric Auger eric.au...@linaro.org
 ---

 This is the current draft of the IRQ bypass manager, I've made the
 following changes:

  - Incorporated Eric's extensions (I would welcome Sign-offs from all
involved in the development, especially Eric - I've gone ahead and
added Linaro copyright for the contributions so far)
  - Module support with module reference tracking
  - might_sleep() as suggested by Paolo
  - kerneldoc as suggested by Paolo
  - Renamed file s/bypass/irqbypass/ because a module named bypass
is strange

 Issues:
  - The update() callback is defined but not used
  - We can't have *all* the callbacks be optional.  I assume add/del
are required
  - Naming consistency, stop is to start as suspend is to resume, not
stop/resume
  - Callback descriptions including why we need separate stop/start
hooks when it seems like the callee could reasonably assume such
around the add/del callbacks
  - Need functional prototypes for both PI and forwarding

  include/linux/irqbypass.h |   75 
  kernel/irq/Kconfig|3 +
  kernel/irq/Makefile   |1
  kernel/irq/irqbypass.c|  206
 +
  4 files changed, 285 insertions(+)
  create mode 100644 include/linux/irqbypass.h
  create mode 100644 kernel/irq/irqbypass.c

 diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
 new file mode 100644
 index 000..cc7ce45
 --- /dev/null
 +++ b/include/linux/irqbypass.h
 @@ -0,0 +1,75 @@
 +/*
 + * IRQ offload/bypass manager
 + *
 + * Copyright (C) 2015 Red Hat, Inc.
 + * Copyright (c) 2015 Linaro Ltd.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2 as
 + * published by the Free Software Foundation.
 + */
 +#ifndef IRQBYPASS_H
 +#define IRQBYPASS_H
 +
 +#include linux/list.h
 +
 +struct irq_bypass_consumer;
 +
 +/**
 + * struct irq_bypass_producer - IRQ bypass producer definition
 + * @node: IRQ bypass manager private list management
 + * @token: opaque token to match between producer and consumer
 + * @irq: Linux IRQ number for the producer device
 + * @stop:
 + * @resume:
 + * @add_consumer:
 + * @del_consumer:
 + *
 + * The IRQ bypass producer structure represents an interrupt source for
 + * participation in possible host bypass, for instance an interrupt vector
 + * for a physical device assigned to a VM.
 + */
 +struct irq_bypass_producer {
 + struct list_head node;
 + void *token;
 + int irq; /* linux irq */
 + void (*stop)(struct irq_bypass_producer *);
 + void (*resume)(struct irq_bypass_producer *);
 + void (*add_consumer)(struct irq_bypass_producer *,
 +  struct irq_bypass_consumer *);
 + void (*del_consumer)(struct irq_bypass_producer *,
 +  struct irq_bypass_consumer *);
 +};
 +
 +/**
 + * struct irq_bypass_consumer - IRQ bypass consumer definition
 + * @node: IRQ bypass manager private list management
 + * @token: opaque token to match between producer and consumer

RE: [RFC v2 6/6] KVM: eventfd: add irq bypass consumer management



 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Tuesday, July 07, 2015 5:06 PM
 To: Wu, Feng; eric.au...@st.com; linux-arm-ker...@lists.infradead.org;
 kvm...@lists.cs.columbia.edu; kvm@vger.kernel.org;
 christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
 mtosa...@redhat.com; j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: Re: [RFC v2 6/6] KVM: eventfd: add irq bypass consumer management
 
 Hi Feng,
 On 07/07/2015 10:47 AM, Wu, Feng wrote:
 
 
  -Original Message-
  From: Eric Auger [mailto:eric.au...@linaro.org]
  Sent: Monday, July 06, 2015 8:12 PM
  To: eric.au...@st.com; eric.au...@linaro.org;
  linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
  kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
  alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
  mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
  b.rey...@virtualopensystems.com
  Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
  Subject: [RFC v2 6/6] KVM: eventfd: add irq bypass consumer management
 
  This patch adds the registration/unregistration of an
  irq_bypass_consumer on irqfd assignment/deassignment.
 
  Signed-off-by: Eric Auger eric.au...@linaro.org
 
  ---
 
  v1 - v2:
  - populate of kvm and gsi removed
  - unregister the consumer on irqfd_shutdown
  ---
   include/linux/kvm_irqfd.h |  1 +
   virt/kvm/eventfd.c| 14 +++---
   2 files changed, 12 insertions(+), 3 deletions(-)
 
  diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
  index f926b39..3c0bd07 100644
  --- a/include/linux/kvm_irqfd.h
  +++ b/include/linux/kvm_irqfd.h
  @@ -64,6 +64,7 @@ struct kvm_kernel_irqfd {
 struct list_head list;
 poll_table pt;
 struct work_struct shutdown;
  +  struct irq_bypass_consumer consumer;
   };
 
   #endif /* __LINUX_KVM_IRQFD_H */
  diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
  index b79dc61..9452754 100644
  --- a/virt/kvm/eventfd.c
  +++ b/virt/kvm/eventfd.c
  @@ -35,6 +35,7 @@
   #include linux/srcu.h
   #include linux/slab.h
   #include linux/seqlock.h
  +#include linux/irqbypass.h
   #include trace/events/kvm.h
 
   #include kvm/iodev.h
  @@ -140,6 +141,7 @@ irqfd_shutdown(struct work_struct *work)
 /*
  * It is now safe to release the object's resources
  */
  +  irq_bypass_unregister_consumer(irqfd-consumer);
 eventfd_ctx_put(irqfd-eventfd);
 kfree(irqfd);
   }
  @@ -380,7 +382,14 @@ kvm_irqfd_assign(struct kvm *kvm, struct
 kvm_irqfd
  *args)
  */
 fdput(f);
 
  -  /* irq_bypass_register_consumer(); */
  +  irqfd-consumer.token = (void *)irqfd-eventfd;
  +  irqfd-consumer.add_producer = kvm_arch_irq_bypass_add_producer;
  +  irqfd-consumer.del_producer = kvm_arch_irq_bypass_del_producer;
  +  irqfd-consumer.stop = kvm_arch_irq_bypass_stop;
  +  irqfd-consumer.resume = kvm_arch_irq_bypass_resume;
  +  irqfd-consumer.update = kvm_arch_irq_bypass_update;
  +  ret = irq_bypass_register_consumer(irqfd-consumer);
  +  WARN_ON(ret);
 
 return 0;
 
  @@ -481,8 +490,6 @@ kvm_irqfd_deassign(struct kvm *kvm, struct
 kvm_irqfd
  *args)
 struct kvm_kernel_irqfd *irqfd, *tmp;
 struct eventfd_ctx *eventfd;
 
  -  /* irq_bypass_unregister_consumer() */
  -
 eventfd = eventfd_ctx_fdget(args-fd);
 if (IS_ERR(eventfd))
 return PTR_ERR(eventfd);
  @@ -501,6 +508,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct
 kvm_irqfd
  *args)
 irqfd-irq_entry.type = 0;
 write_seqcount_end(irqfd-irq_entry_sc);
 irqfd_deactivate(irqfd);
  +  irq_bypass_unregister_consumer(irqfd-consumer);
 
  consumer is unregistered in irqfd_shutdown(), why need we do it here?
 Yes you're right, this is not needed anymore. Did things too hastily :-(
 
 Otherwise does that isolated series fit your needs for posted interrupts
 and match your expectations with regards to split/reusability?

Basically, they are fine. I am trying to rebase my patch on top of it. If I
encounter any issues, I will discuss it here ASAP.

Thanks,
Feng

 
 Eric
 
 
  Thanks,
  Feng
 
 }
 }
 
  --
  1.9.1
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC v2 6/6] KVM: eventfd: add irq bypass consumer management



 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Monday, July 06, 2015 8:12 PM
 To: eric.au...@st.com; eric.au...@linaro.org;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
 mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
 b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: [RFC v2 6/6] KVM: eventfd: add irq bypass consumer management
 
 This patch adds the registration/unregistration of an
 irq_bypass_consumer on irqfd assignment/deassignment.
 
 Signed-off-by: Eric Auger eric.au...@linaro.org
 
 ---
 
 v1 - v2:
 - populate of kvm and gsi removed
 - unregister the consumer on irqfd_shutdown
 ---
  include/linux/kvm_irqfd.h |  1 +
  virt/kvm/eventfd.c| 14 +++---
  2 files changed, 12 insertions(+), 3 deletions(-)
 
 diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
 index f926b39..3c0bd07 100644
 --- a/include/linux/kvm_irqfd.h
 +++ b/include/linux/kvm_irqfd.h
 @@ -64,6 +64,7 @@ struct kvm_kernel_irqfd {
   struct list_head list;
   poll_table pt;
   struct work_struct shutdown;
 + struct irq_bypass_consumer consumer;
  };
 
  #endif /* __LINUX_KVM_IRQFD_H */
 diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
 index b79dc61..9452754 100644
 --- a/virt/kvm/eventfd.c
 +++ b/virt/kvm/eventfd.c
 @@ -35,6 +35,7 @@
  #include linux/srcu.h
  #include linux/slab.h
  #include linux/seqlock.h
 +#include linux/irqbypass.h
  #include trace/events/kvm.h
 
  #include kvm/iodev.h
 @@ -140,6 +141,7 @@ irqfd_shutdown(struct work_struct *work)
   /*
* It is now safe to release the object's resources
*/
 + irq_bypass_unregister_consumer(irqfd-consumer);
   eventfd_ctx_put(irqfd-eventfd);
   kfree(irqfd);
  }
 @@ -380,7 +382,14 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd
 *args)
*/
   fdput(f);
 
 - /* irq_bypass_register_consumer(); */
 + irqfd-consumer.token = (void *)irqfd-eventfd;
 + irqfd-consumer.add_producer = kvm_arch_irq_bypass_add_producer;
 + irqfd-consumer.del_producer = kvm_arch_irq_bypass_del_producer;
 + irqfd-consumer.stop = kvm_arch_irq_bypass_stop;
 + irqfd-consumer.resume = kvm_arch_irq_bypass_resume;
 + irqfd-consumer.update = kvm_arch_irq_bypass_update;
 + ret = irq_bypass_register_consumer(irqfd-consumer);
 + WARN_ON(ret);
 
   return 0;
 
 @@ -481,8 +490,6 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd
 *args)
   struct kvm_kernel_irqfd *irqfd, *tmp;
   struct eventfd_ctx *eventfd;
 
 - /* irq_bypass_unregister_consumer() */
 -
   eventfd = eventfd_ctx_fdget(args-fd);
   if (IS_ERR(eventfd))
   return PTR_ERR(eventfd);
 @@ -501,6 +508,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd
 *args)
   irqfd-irq_entry.type = 0;
   write_seqcount_end(irqfd-irq_entry_sc);
   irqfd_deactivate(irqfd);
 + irq_bypass_unregister_consumer(irqfd-consumer);

consumer is unregistered in irqfd_shutdown(), why need we do it here?

Thanks,
Feng

   }
   }
 
 --
 1.9.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding control



 -Original Message-
 From: Paolo Bonzini [mailto:pbonz...@redhat.com]
 Sent: Tuesday, July 07, 2015 7:01 PM
 To: Wu, Feng; Eric Auger; eric.au...@st.com;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
 j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: Re: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding
 control
 
 
 
 On 07/07/2015 12:58, Wu, Feng wrote:
 
 
  -Original Message-
  From: Eric Auger [mailto:eric.au...@linaro.org]
  Sent: Monday, July 06, 2015 8:11 PM
  To: eric.au...@st.com; eric.au...@linaro.org;
  linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
  kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
  alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
  mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
  b.rey...@virtualopensystems.com
  Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
  Subject: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding
 control
 
  - [add,del]_[consumer,producer] updated to takes both the consumer and
producer handles. This is requested to combine info from both,
typically to link the source irq owned by the producer with the gsi
owned by the consumer (forwarded IRQ setup).
  - new methods are added:
x stop/resume: Those are needed for forwarding since the state change
  requires to entermingle actions at consumer, producer.
x consumer update for posted interrupts
  - On handshake, we now call connect, disconnect which features the more
complex sequence.
  - add irq on producer side
 
  Signed-off-by: Eric Auger eric.au...@linaro.org
 
  ---
 
  v1 - v2:
  - remove vfio_device, kvm, gsi, opaque fields included in v1 except common
  - all those in can be retrieved with container_of in callbacks
  ---
   include/linux/irqbypass.h | 19 ---
   kernel/irq/bypass.c   | 44
  
   2 files changed, 56 insertions(+), 7 deletions(-)
 
  diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
  index 718508e..8f62235 100644
  --- a/include/linux/irqbypass.h
  +++ b/include/linux/irqbypass.h
  @@ -3,17 +3,30 @@
 
   #include linux/list.h
 
  +struct irq_bypass_consumer;
  +
   struct irq_bypass_producer {
 struct list_head node;
 void *token;
  -  /* TBD */
  +  int irq; /* linux irq */
  +  void (*stop)(struct irq_bypass_producer *);
  +  void (*resume)(struct irq_bypass_producer *);
  +  void (*add_consumer)(struct irq_bypass_producer *,
  +   struct irq_bypass_consumer *);
  +  void (*del_consumer)(struct irq_bypass_producer *,
  +   struct irq_bypass_consumer *);
   };
 
   struct irq_bypass_consumer {
 struct list_head node;
 void *token;
 
  Can we add a pointer to ' struct irq_bypass_producer ', and
  assign it when connecting, de-assign it when disconnecting.
  since in some case, I need to update IRTE from the consumer
  side, where I cannot get the related producer info (I need irq info)
  without iterating it again.
 
 You can use container_of to add it to your own struct, e.g.
 
   struct irq_bypass_consumer cons;
   struct irq_bypass_producer *prod;

Do you mean this:

struct kvm_kernel_irqfd {

..

struct irq_bypass_consumer cons;
struct irq_bypass_producer *prod;
};

Thanks,
Feng

 
 Paolo
 
  Thanks,
  Feng
 
  -  void (*add_producer)(struct irq_bypass_producer *);
  -  void (*del_producer)(struct irq_bypass_producer *);
  +  void (*stop)(struct irq_bypass_consumer *);
  +  void (*resume)(struct irq_bypass_consumer *);
  +  void (*add_producer)(struct irq_bypass_consumer *,
  +   struct irq_bypass_producer *);
  +  void (*del_producer)(struct irq_bypass_consumer *,
  +   struct irq_bypass_producer *);
  +  void (*update)(struct irq_bypass_consumer *);
   };
 
   int irq_bypass_register_producer(struct irq_bypass_producer *);
  diff --git a/kernel/irq/bypass.c b/kernel/irq/bypass.c
  index 5d0f92b..efadbe5 100644
  --- a/kernel/irq/bypass.c
  +++ b/kernel/irq/bypass.c
  @@ -19,6 +19,42 @@ static LIST_HEAD(producers);
   static LIST_HEAD(consumers);
   static DEFINE_MUTEX(lock);
 
  +/* lock must be hold when calling connect */
  +static void connect(struct irq_bypass_producer *prod,
  +  struct irq_bypass_consumer *cons)
  +{
  +  if (prod-stop)
  +  prod-stop(prod);
  +  if (cons-stop)
  +  cons-stop(cons);
  +  if (prod-add_consumer)
  +  prod-add_consumer(prod, cons);
  +  if (cons-add_producer)
  +  cons-add_producer(cons, prod);
  +  if (cons-resume)
  +  cons-resume(cons);
  +  if (prod-resume)
  +  prod-resume(prod

RE: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding control



 -Original Message-
 From: Paolo Bonzini [mailto:pbonz...@redhat.com]
 Sent: Tuesday, July 07, 2015 7:14 PM
 To: Wu, Feng; Eric Auger; eric.au...@st.com;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
 j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: Re: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding
 control
 
 
 
 On 07/07/2015 13:13, Wu, Feng wrote:
   You can use container_of to add it to your own struct, e.g.
  
struct irq_bypass_consumer cons;
struct irq_bypass_producer *prod;
  Do you mean this:
 
  struct kvm_kernel_irqfd {
 
  ..
 
  struct irq_bypass_consumer cons;
  struct irq_bypass_producer *prod;
  };
 
 Yes.

Then I still need assign prod and de-assign prod in 
irq_bypass_register_consumer/irq_bypass_unregister_consumer,
Right? Would you please share why this is better. My original though is as 
below:

diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
index 8f62235..11930c1 100644
--- a/include/linux/irqbypass.h
+++ b/include/linux/irqbypass.h
@@ -20,6 +20,7 @@ struct irq_bypass_producer {
 struct irq_bypass_consumer {
struct list_head node;
void *token;
+   struct irq_bypass_producer *producer;
void (*stop)(struct irq_bypass_consumer *);
void (*resume)(struct irq_bypass_consumer *);
void (*add_producer)(struct irq_bypass_consumer *,
diff --git a/kernel/irq/bypass.c b/kernel/irq/bypass.c
index efadbe5..be2da25 100644
--- a/kernel/irq/bypass.c
+++ b/kernel/irq/bypass.c
@@ -122,6 +122,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer 
*consumer)

list_for_each_entry(producer, producers, node) {
if (producer-token == consumer-token) {
+   consumer-producer = producer;
connect(producer, consumer);
break;
}
@@ -140,6 +141,7 @@ void irq_bypass_unregister_consumer(struct 
irq_bypass_consumer *consumer)

list_for_each_entry(producer, producers, node) {
if (producer-token == consumer-token) {
+   consumer-producer = NULL;
disconnect(producer, consumer);
break;
}

Thanks,
Feng

 
 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding control

 -Original Message-
 From: Wu, Feng
 Sent: Tuesday, July 07, 2015 7:24 PM
 To: Paolo Bonzini; Eric Auger; eric.au...@st.com;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
 j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org; Wu, Feng
 Subject: RE: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding
 control

  -Original Message-
  From: Paolo Bonzini [mailto:pbonz...@redhat.com]
  Sent: Tuesday, July 07, 2015 7:22 PM
  To: Wu, Feng; Eric Auger; eric.au...@st.com;
  linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
  kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
  alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
  j...@8bytes.org; b.rey...@virtualopensystems.com
  Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
  Subject: Re: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding
  control

  On 07/07/2015 13:18, Wu, Feng wrote:
   Then I still need assign prod and de-assign prod in
   irq_bypass_register_consumer/irq_bypass_unregister_consumer, Right?
   Would you please share why this is better.

  The need to store the consumer-producer link seems to be unique to
  posted interrupts.  It is difficult to say without seeing the PI code,
  but I prefer to keep the bypass manager as small as possible.

 Fine. I will follow your suggestion!

If using the following changes, how can we assign 'prod', we need to use
container_of to get struct kvm_kernel_irqfd and then refer to 'prod', but
we cannot do this in irq_bypass_register_consumer(), right? It is a
common API. But we can only get the associated producer info inside
bypass manager, right?

Thanks,
Feng

struct kvm_kernel_irqfd {

..

struct irq_bypass_consumer cons;
struct irq_bypass_producer *prod;
};

 Thanks,
 Feng

  Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding control



 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Monday, July 06, 2015 8:11 PM
 To: eric.au...@st.com; eric.au...@linaro.org;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
 mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
 b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding control
 
 - [add,del]_[consumer,producer] updated to takes both the consumer and
   producer handles. This is requested to combine info from both,
   typically to link the source irq owned by the producer with the gsi
   owned by the consumer (forwarded IRQ setup).
 - new methods are added:
   x stop/resume: Those are needed for forwarding since the state change
 requires to entermingle actions at consumer, producer.
   x consumer update for posted interrupts
 - On handshake, we now call connect, disconnect which features the more
   complex sequence.
 - add irq on producer side
 
 Signed-off-by: Eric Auger eric.au...@linaro.org
 
 ---
 
 v1 - v2:
 - remove vfio_device, kvm, gsi, opaque fields included in v1 except common
 - all those in can be retrieved with container_of in callbacks
 ---
  include/linux/irqbypass.h | 19 ---
  kernel/irq/bypass.c   | 44
 
  2 files changed, 56 insertions(+), 7 deletions(-)
 
 diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
 index 718508e..8f62235 100644
 --- a/include/linux/irqbypass.h
 +++ b/include/linux/irqbypass.h
 @@ -3,17 +3,30 @@
 
  #include linux/list.h
 
 +struct irq_bypass_consumer;
 +
  struct irq_bypass_producer {
   struct list_head node;
   void *token;
 - /* TBD */
 + int irq; /* linux irq */
 + void (*stop)(struct irq_bypass_producer *);
 + void (*resume)(struct irq_bypass_producer *);
 + void (*add_consumer)(struct irq_bypass_producer *,
 +  struct irq_bypass_consumer *);
 + void (*del_consumer)(struct irq_bypass_producer *,
 +  struct irq_bypass_consumer *);
  };
 
  struct irq_bypass_consumer {
   struct list_head node;
   void *token;

Can we add a pointer to ' struct irq_bypass_producer ', and
assign it when connecting, de-assign it when disconnecting.
since in some case, I need to update IRTE from the consumer
side, where I cannot get the related producer info (I need irq info)
without iterating it again.

Thanks,
Feng

 - void (*add_producer)(struct irq_bypass_producer *);
 - void (*del_producer)(struct irq_bypass_producer *);
 + void (*stop)(struct irq_bypass_consumer *);
 + void (*resume)(struct irq_bypass_consumer *);
 + void (*add_producer)(struct irq_bypass_consumer *,
 +  struct irq_bypass_producer *);
 + void (*del_producer)(struct irq_bypass_consumer *,
 +  struct irq_bypass_producer *);
 + void (*update)(struct irq_bypass_consumer *);
  };
 
  int irq_bypass_register_producer(struct irq_bypass_producer *);
 diff --git a/kernel/irq/bypass.c b/kernel/irq/bypass.c
 index 5d0f92b..efadbe5 100644
 --- a/kernel/irq/bypass.c
 +++ b/kernel/irq/bypass.c
 @@ -19,6 +19,42 @@ static LIST_HEAD(producers);
  static LIST_HEAD(consumers);
  static DEFINE_MUTEX(lock);
 
 +/* lock must be hold when calling connect */
 +static void connect(struct irq_bypass_producer *prod,
 + struct irq_bypass_consumer *cons)
 +{
 + if (prod-stop)
 + prod-stop(prod);
 + if (cons-stop)
 + cons-stop(cons);
 + if (prod-add_consumer)
 + prod-add_consumer(prod, cons);
 + if (cons-add_producer)
 + cons-add_producer(cons, prod);
 + if (cons-resume)
 + cons-resume(cons);
 + if (prod-resume)
 + prod-resume(prod);
 +}
 +
 +/* lock must be hold when calling disconnect */
 +static void disconnect(struct irq_bypass_producer *prod,
 +struct irq_bypass_consumer *cons)
 +{
 + if (prod-stop)
 + prod-stop(prod);
 + if (cons-stop)
 + cons-stop(cons);
 + if (cons-del_producer)
 + cons-del_producer(cons, prod);
 + if (prod-del_consumer)
 + prod-del_consumer(prod, cons);
 + if (cons-resume)
 + cons-resume(cons);
 + if (prod-resume)
 + prod-resume(prod);
 +}
 +
  int irq_bypass_register_producer(struct irq_bypass_producer *producer)
  {
   struct irq_bypass_producer *tmp;
 @@ -38,7 +74,7 @@ int irq_bypass_register_producer(struct
 irq_bypass_producer *producer)
 
   list_for_each_entry(consumer, consumers, node) {
   if (consumer-token == producer-token) {
 - consumer-add_producer(producer

RE: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding control

 -Original Message-
 From: Paolo Bonzini [mailto:pbonz...@redhat.com]
 Sent: Tuesday, July 07, 2015 7:22 PM
 To: Wu, Feng; Eric Auger; eric.au...@st.com;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
 j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: Re: [RFC v2 3/6] irq: bypass: Extend skeleton for ARM forwarding
 control

 On 07/07/2015 13:18, Wu, Feng wrote:
  Then I still need assign prod and de-assign prod in
  irq_bypass_register_consumer/irq_bypass_unregister_consumer, Right?
  Would you please share why this is better.

 The need to store the consumer-producer link seems to be unique to
 posted interrupts.  It is difficult to say without seeing the PI code,
 but I prefer to keep the bypass manager as small as possible.

Fine. I will follow your suggestion!

Thanks,
Feng

 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC PATCH] irq: IRQ bypass manager



 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Wednesday, July 08, 2015 5:40 AM
 To: linux-ker...@vger.kernel.org; kvm@vger.kernel.org
 Cc: eric.au...@st.com; eric.au...@linaro.org; j...@8bytes.org;
 avi.kiv...@gmail.com; pbonz...@redhat.com; Wu, Feng
 Subject: [RFC PATCH] irq: IRQ bypass manager
 
 When a physical I/O device is assigned to a virtual machine through
 facilities like VFIO and KVM, the interrupt for the device generally
 bounces through the host system before being injected into the VM.
 However, hardware technologies exist that often allow the host to be
 bypassed for some of these scenarios.  Intel Posted Interrupts allow
 the specified physical edge interrupts to be directly injected into a
 guest when delivered to a physical processor while the vCPU is
 running.  ARM IRQ Forwarding allows the hypervisor to handle level
 triggered device interrupts as edge interrupts, by giving the guest
 control of de-asserting and unmasking the interrupt line.
 
 The IRQ bypass manager here is meant to provide the shim to connect
 interrupt producers, generally the host physical device driver, with
 interrupt consumers, generally the hypervisor, in order to configure
 these bypass mechanism.  To do this, we base the connection on a
 shared, opaque token.  For KVM-VFIO this is expected to be an
 eventfd_ctx since this is the connection we already use to connect an
 eventfd to an irqfd on the in-kernel path.  When a producer and
 consumer with matching tokens is found, callbacks via both registered
 participants allow the bypass facilities to be automatically enabled.
 
 Signed-off-by: Alex Williamson alex.william...@redhat.com
 Cc: Eric Auger eric.au...@linaro.org
 ---
 
 This is the current draft of the IRQ bypass manager, I've made the
 following changes:
 
  - Incorporated Eric's extensions (I would welcome Sign-offs from all
involved in the development, especially Eric - I've gone ahead and
added Linaro copyright for the contributions so far)
  - Module support with module reference tracking
  - might_sleep() as suggested by Paolo
  - kerneldoc as suggested by Paolo
  - Renamed file s/bypass/irqbypass/ because a module named bypass
is strange
 
 Issues:
  - The update() callback is defined but not used

Yeah, the update() callback is added by me, I need it to update IRTE
in irqfd_update().

  - We can't have *all* the callbacks be optional.  I assume add/del
are required
  - Naming consistency, stop is to start as suspend is to resume, not
stop/resume
  - Callback descriptions including why we need separate stop/start
hooks when it seems like the callee could reasonably assume such
around the add/del callbacks
  - Need functional prototypes for both PI and forwarding
 
  include/linux/irqbypass.h |   75 
  kernel/irq/Kconfig|3 +
  kernel/irq/Makefile   |1
  kernel/irq/irqbypass.c|  206
 +
  4 files changed, 285 insertions(+)
  create mode 100644 include/linux/irqbypass.h
  create mode 100644 kernel/irq/irqbypass.c
 
 diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
 new file mode 100644
 index 000..cc7ce45
 --- /dev/null
 +++ b/include/linux/irqbypass.h
 @@ -0,0 +1,75 @@
 +/*
 + * IRQ offload/bypass manager
 + *
 + * Copyright (C) 2015 Red Hat, Inc.
 + * Copyright (c) 2015 Linaro Ltd.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2 as
 + * published by the Free Software Foundation.
 + */
 +#ifndef IRQBYPASS_H
 +#define IRQBYPASS_H
 +
 +#include linux/list.h
 +
 +struct irq_bypass_consumer;
 +
 +/**
 + * struct irq_bypass_producer - IRQ bypass producer definition
 + * @node: IRQ bypass manager private list management
 + * @token: opaque token to match between producer and consumer
 + * @irq: Linux IRQ number for the producer device
 + * @stop:
 + * @resume:
 + * @add_consumer:
 + * @del_consumer:
 + *
 + * The IRQ bypass producer structure represents an interrupt source for
 + * participation in possible host bypass, for instance an interrupt vector
 + * for a physical device assigned to a VM.
 + */
 +struct irq_bypass_producer {
 + struct list_head node;
 + void *token;
 + int irq; /* linux irq */
 + void (*stop)(struct irq_bypass_producer *);
 + void (*resume)(struct irq_bypass_producer *);
 + void (*add_consumer)(struct irq_bypass_producer *,
 +  struct irq_bypass_consumer *);
 + void (*del_consumer)(struct irq_bypass_producer *,
 +  struct irq_bypass_consumer *);
 +};
 +
 +/**
 + * struct irq_bypass_consumer - IRQ bypass consumer definition
 + * @node: IRQ bypass manager private list management
 + * @token: opaque token to match between producer and consumer
 + * @stop:
 + * @resume:
 + * @add_consumer:
 + * @del_consumer:
 + * @update

RE: [RFC 16/17] KVM: eventfd: add irq bypass consumer management

2015-07-06 Thread Wu, Feng



 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Thursday, July 02, 2015 9:17 PM
 To: eric.au...@st.com; eric.au...@linaro.org;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
 mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
 b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: [RFC 16/17] KVM: eventfd: add irq bypass consumer management
 
 This patch adds the registration/unregistration of an
 irq_bypass_consumer on irqfd assignment/deassignment.
 
 Signed-off-by: Eric Auger eric.au...@linaro.org
 ---
  virt/kvm/eventfd.c | 22 +++---
  1 file changed, 19 insertions(+), 3 deletions(-)
 
 diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
 index f3da161..425a47b 100644
 --- a/virt/kvm/eventfd.c
 +++ b/virt/kvm/eventfd.c
 @@ -34,6 +34,7 @@
  #include linux/srcu.h
  #include linux/slab.h
  #include linux/seqlock.h
 +#include linux/irqbypass.h
  #include trace/events/kvm.h
 
  #include kvm/iodev.h
 @@ -93,6 +94,7 @@ struct _irqfd {
   struct list_head list;
   poll_table pt;
   struct work_struct shutdown;
 + struct irq_bypass_consumer *cons;
  };
 
  static struct workqueue_struct *irqfd_cleanup_wq;
 @@ -429,7 +431,21 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd
 *args)
*/
   fdput(f);
 
 - /* irq_bypass_register_consumer(); */
 + irqfd-cons = kzalloc(sizeof(struct irq_bypass_consumer),
 +   GFP_KERNEL);
 + if (!irqfd-cons) {
 + ret = -ENOMEM;
 + goto fail;
 + }
 + irqfd-cons-token = (void *)irqfd-eventfd;
 + irqfd-cons-gsi = irqfd-gsi;
 + irqfd-cons-kvm = kvm;
 + irqfd-cons-add_producer = kvm_arch_add_producer;
 + irqfd-cons-del_producer = kvm_arch_del_producer;
 + irqfd-cons-stop_consumer = kvm_arch_stop_consumer;
 + irqfd-cons-resume_consumer = kvm_arch_resume_consumer;
 + ret = irq_bypass_register_consumer(irqfd-cons);
 + WARN_ON(ret);
 
   return 0;
 
 @@ -530,8 +546,6 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd
 *args)
   struct _irqfd *irqfd, *tmp;
   struct eventfd_ctx *eventfd;
 
 - /* irq_bypass_unregister_consumer() */
 -
   eventfd = eventfd_ctx_fdget(args-fd);
   if (IS_ERR(eventfd))
   return PTR_ERR(eventfd);
 @@ -550,6 +564,8 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd
 *args)
   irqfd-irq_entry.type = 0;
   write_seqcount_end(irqfd-irq_entry_sc);
   irqfd_deactivate(irqfd);
 + irq_bypass_unregister_consumer(irqfd-cons);
 + kfree(irqfd-cons);

There may be an issue here. 'irqfd' is freed in irqfd_deactivate() -- ... 
--.irqfd_shutdown(),
and irqfd_deactivate() can be called in the other two places below:
- irqfd_wakeup()
- kvm_irqfd_release()
I think we also need to call irq_bypass_unregister_consumer() there, right?

Thanks,
Feng


   }
   }
 
 --
 1.9.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC 16/17] KVM: eventfd: add irq bypass consumer management

2015-07-06 Thread Wu, Feng



 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Monday, July 06, 2015 7:20 PM
 To: Wu, Feng; eric.au...@st.com; linux-arm-ker...@lists.infradead.org;
 kvm...@lists.cs.columbia.edu; kvm@vger.kernel.org;
 christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
 mtosa...@redhat.com; j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: Re: [RFC 16/17] KVM: eventfd: add irq bypass consumer management
 
 Hi Feng,
 On 07/06/2015 09:55 AM, Wu, Feng wrote:
 
 
  -Original Message-
  From: Eric Auger [mailto:eric.au...@linaro.org]
  Sent: Thursday, July 02, 2015 9:17 PM
  To: eric.au...@st.com; eric.au...@linaro.org;
  linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
  kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
  alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
  mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
  b.rey...@virtualopensystems.com
  Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
  Subject: [RFC 16/17] KVM: eventfd: add irq bypass consumer management
 
  This patch adds the registration/unregistration of an
  irq_bypass_consumer on irqfd assignment/deassignment.
 
  Signed-off-by: Eric Auger eric.au...@linaro.org
  ---
   virt/kvm/eventfd.c | 22 +++---
   1 file changed, 19 insertions(+), 3 deletions(-)
 
  diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
  index f3da161..425a47b 100644
  --- a/virt/kvm/eventfd.c
  +++ b/virt/kvm/eventfd.c
  @@ -34,6 +34,7 @@
   #include linux/srcu.h
   #include linux/slab.h
   #include linux/seqlock.h
  +#include linux/irqbypass.h
   #include trace/events/kvm.h
 
   #include kvm/iodev.h
  @@ -93,6 +94,7 @@ struct _irqfd {
 struct list_head list;
 poll_table pt;
 struct work_struct shutdown;
  +  struct irq_bypass_consumer *cons;
   };
 
   static struct workqueue_struct *irqfd_cleanup_wq;
  @@ -429,7 +431,21 @@ kvm_irqfd_assign(struct kvm *kvm, struct
 kvm_irqfd
  *args)
  */
 fdput(f);
 
  -  /* irq_bypass_register_consumer(); */
  +  irqfd-cons = kzalloc(sizeof(struct irq_bypass_consumer),
  +GFP_KERNEL);
  +  if (!irqfd-cons) {
  +  ret = -ENOMEM;
  +  goto fail;
  +  }
  +  irqfd-cons-token = (void *)irqfd-eventfd;
  +  irqfd-cons-gsi = irqfd-gsi;
  +  irqfd-cons-kvm = kvm;
  +  irqfd-cons-add_producer = kvm_arch_add_producer;
  +  irqfd-cons-del_producer = kvm_arch_del_producer;
  +  irqfd-cons-stop_consumer = kvm_arch_stop_consumer;
  +  irqfd-cons-resume_consumer = kvm_arch_resume_consumer;
  +  ret = irq_bypass_register_consumer(irqfd-cons);
  +  WARN_ON(ret);
 
 return 0;
 
  @@ -530,8 +546,6 @@ kvm_irqfd_deassign(struct kvm *kvm, struct
 kvm_irqfd
  *args)
 struct _irqfd *irqfd, *tmp;
 struct eventfd_ctx *eventfd;
 
  -  /* irq_bypass_unregister_consumer() */
  -
 eventfd = eventfd_ctx_fdget(args-fd);
 if (IS_ERR(eventfd))
 return PTR_ERR(eventfd);
  @@ -550,6 +564,8 @@ kvm_irqfd_deassign(struct kvm *kvm, struct
 kvm_irqfd
  *args)
 irqfd-irq_entry.type = 0;
 write_seqcount_end(irqfd-irq_entry_sc);
 irqfd_deactivate(irqfd);
  +  irq_bypass_unregister_consumer(irqfd-cons);
  +  kfree(irqfd-cons);
 
  There may be an issue here. 'irqfd' is freed in irqfd_deactivate() -- ...
 --.irqfd_shutdown(),
  and irqfd_deactivate() can be called in the other two places below:
  - irqfd_wakeup()
  - kvm_irqfd_release()
  I think we also need to call irq_bypass_unregister_consumer() there, right?
 yes you're right. what about doing the unregistration in irqfd_shutdown
 then?

I am fine with this!

Thanks,
Feng
 
 Thanks for spotting this.
 
 Eric
 
 
  Thanks,
  Feng
 
 
 }
 }
 
  --
  1.9.1
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding control

2015-07-03 Thread Wu, Feng

 -Original Message-
 From: Paolo Bonzini [mailto:pbonz...@redhat.com]
 Sent: Friday, July 03, 2015 3:06 PM
 To: Wu, Feng; Eric Auger; eric.au...@st.com;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
 j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: Re: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding
 control

 On 03/07/2015 09:00, Wu, Feng wrote:
struct irq_bypass_consumer {
   struct list_head node;
   void *token;
+  unsigned irq;  /*got from producer when registered*/
   void (*add_producer)(struct irq_bypass_producer *,
struct irq_bypass_consumer *);
   void (*del_producer)(struct irq_bypass_producer *,
struct irq_bypass_consumer *);
+  void (*update)(struct irq_bypass_consumer *);
};

'update' is used to update the IRTE, while irq is initialized when
registered, which is used to find the right IRTE.

   Feel free to add update in your PI patches.  I am not sure if irq
   belongs here or in the containing struct.  You can play with both and
   submit the version that looks better to you.
  Thanks for your review, Paolo. In my understanding, irq comes from
  the producer side, while gsi belongs to the consumer, so we need
  to get the irq from the producer somewhere. I am not sure adding
  irq here is the good way, but what I need is in the 'update' function,
  I have irq, gsi in hand. :)

 It's difficult to say without seeing the patches...  The IRQ is stored
 in the producer already with Eric's changes.  If you need to store the
 old IRQ value, because update needs to do something with it, then I
 think irq belongs in the container struct.

 Perhaps update needs to have a producer argument as well?

I also consider this method, basically, I will call 'update' in irqfd_update(),
but seems I need do extra things to get the producer structure (such as,
iterate the producer list to find the one with the same 'token') before
calling 'update' from consumer side. I am not sure it is worth doing
that way.

Thanks,
Feng

 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding control

2015-07-03 Thread Wu, Feng

 -Original Message-
 From: Paolo Bonzini [mailto:paolo.bonz...@gmail.com] On Behalf Of Paolo
 Bonzini
 Sent: Friday, July 03, 2015 2:52 PM
 To: Wu, Feng; Eric Auger; eric.au...@st.com;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
 j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: Re: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding
 control

 On 03/07/2015 04:43, Wu, Feng wrote:

  struct irq_bypass_consumer {
 struct list_head node;
 void *token;
  +  unsigned irq;/*got from producer when registered*/
 void (*add_producer)(struct irq_bypass_producer *,
  struct irq_bypass_consumer *);
 void (*del_producer)(struct irq_bypass_producer *,
  struct irq_bypass_consumer *);
  +  void (*update)(struct irq_bypass_consumer *);
  };

  'update' is used to update the IRTE, while irq is initialized when
  registered, which is used to find the right IRTE.

 Feel free to add update in your PI patches.  I am not sure if irq
 belongs here or in the containing struct.  You can play with both and
 submit the version that looks better to you.

Thanks for your review, Paolo. In my understanding, irq comes from
the producer side, while gsi belongs to the consumer, so we need
to get the irq from the producer somewhere. I am not sure adding
irq here is the good way, but what I need is in the 'update' function,
I have irq, gsi in hand. :)

Thanks,
Feng

 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC 09/17] bypass: IRQ bypass manager proto by Alex

 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Thursday, July 02, 2015 9:17 PM
 To: eric.au...@st.com; eric.au...@linaro.org;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
 mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
 b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: [RFC 09/17] bypass: IRQ bypass manager proto by Alex

 From: Alex Williamson alex.william...@redhat.com

 There are plenty of details to be filled in, but I think the basics
 looks something like the code below.  The IRQ bypass manager just
 defines a pair of structures, one for interrupt producers and one for
 interrupt consumers.  I'm certain that we'll need more callbacks than
 I've defined below, but figuring out what those should be for the best
 abstraction is the hardest part of this idea.  The manager provides both
 registration and de-registration interfaces for both types of objects
 and keeps lists for each, protected by a lock.  The manager doesn't even
 really need to know what the match token is, but I assume for our
 purposes it will be an eventfd_ctx.

 On the vfio side, the producer struct would be embedded in the
 vfio_pci_irq_ctx struct.  KVM would probably embed the consumer struct
 in _irqfd.  As I've coded below, the IRQ bypass manager calls the
 consumer callbacks, so the producer struct would need fields or
 callbacks to provide the consumer the info it needs.  AIUI the Posted
 Interrupt model, VFIO only needs to provide data to the consumer.  For
 IRQ Forwarding, I think the producer needs to be informed when bypass is
 active to model the incoming interrupt as edge vs level.

 I've prototyped the base IRQ bypass manager here as static, but I don't
 see any reason it couldn't be a module that's loaded by dependency when
 either vfio-pci or kvm-intel is loaded (or other producer/consumer
 objects).

 Is this a reasonable starting point to craft the additional fields and
 callbacks and interaction of who calls who that we need to support
 Posted Interrupts and IRQ Forwarding?  Is the AMD version of this still
 alive?  Thanks,

 Alex

In fact, I also implement a RFC patch for this new framework. I am
thinking, can we discuss all the requirements for irq forwarding and
posted interrupts, and make it a separate patchset as a general
layer? Then we can continue to push arch specific stuff, it is more
clear and easy.

Thanks,
Feng

 ---
  arch/x86/kvm/Kconfig  |   1 +
  drivers/vfio/pci/Kconfig  |   1 +
  drivers/vfio/pci/vfio_pci_intrs.c |   6 ++
  include/linux/irqbypass.h |  23 
  kernel/irq/Kconfig|   3 +
  kernel/irq/Makefile   |   1 +
  kernel/irq/bypass.c   | 116
 ++
  virt/kvm/eventfd.c|   4 ++
  8 files changed, 155 insertions(+)
  create mode 100644 include/linux/irqbypass.h
  create mode 100644 kernel/irq/bypass.c

 diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
 index d8a1d56..86d0d77 100644
 --- a/arch/x86/kvm/Kconfig
 +++ b/arch/x86/kvm/Kconfig
 @@ -61,6 +61,7 @@ config KVM_INTEL
   depends on KVM
   # for perf_guest_get_msrs():
   depends on CPU_SUP_INTEL
 + select IRQ_BYPASS_MANAGER
   ---help---
 Provides support for KVM on Intel processors equipped with the VT
 extensions.
 diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
 index 579d83b..02912f1 100644
 --- a/drivers/vfio/pci/Kconfig
 +++ b/drivers/vfio/pci/Kconfig
 @@ -2,6 +2,7 @@ config VFIO_PCI
   tristate VFIO support for PCI devices
   depends on VFIO  PCI  EVENTFD
   select VFIO_VIRQFD
 + select IRQ_BYPASS_MANAGER
   help
 Support for the PCI VFIO bus driver.  This is required to make
 use of PCI drivers using the VFIO framework.
 diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
 b/drivers/vfio/pci/vfio_pci_intrs.c
 index 1f577b4..4e053be 100644
 --- a/drivers/vfio/pci/vfio_pci_intrs.c
 +++ b/drivers/vfio/pci/vfio_pci_intrs.c
 @@ -181,6 +181,7 @@ static int vfio_intx_set_signal(struct vfio_pci_device
 *vdev, int fd)

   if (vdev-ctx[0].trigger) {
   free_irq(pdev-irq, vdev);
 + /* irq_bypass_unregister_producer(); */
   kfree(vdev-ctx[0].name);
   eventfd_ctx_put(vdev-ctx[0].trigger);
   vdev-ctx[0].trigger = NULL;
 @@ -214,6 +215,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device
 *vdev, int fd)
   return ret;
   }

 + /* irq_bypass_register_producer(); */
 +
   /*
* INTx disable will stick across the new irq setup,
* disable_irq won't.
 @@ -319,6 +322,7 @@ static int vfio_msi_set_vector_signal(struct
 vfio_pci_device *vdev,

   if (vdev-ctx[vector

RE: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding control

 -Original Message-
 From: Paolo Bonzini [mailto:pbonz...@redhat.com]
 Sent: Thursday, July 02, 2015 9:41 PM
 To: Eric Auger; eric.au...@st.com; linux-arm-ker...@lists.infradead.org;
 kvm...@lists.cs.columbia.edu; kvm@vger.kernel.org;
 christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
 Wu, Feng; j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: Re: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding
 control

 On 02/07/2015 15:17, Eric Auger wrote:
  - new fields are added on producer side: linux irq, vfio_device handle,
active which reflects whether the source is active (at interrupt
controller level or at VFIO level - automasked -) and finally an
opaque pointer which will be used to point to the vfio_platform_device
in this series.

 Linux IRQ and active should be okay.  As to the vfio_device handle, you
 should link it from the vfio_platform_device instead.  And for the
 vfio_platform_device, you can link it from the vfio_platform_irq instead.

 Once you've done this, embed the irq_bypass_producer struct in the
 vfio_platform_irq struct; in the new kvm_arch_* functions, go back to
 the vfio_platform_irq struct via container_of.  From there you can
 retrieve pointers to the vfio_platform_device and the vfio_device.

  - new fields on consumer side: the kvm handle, the gsi

 You do not need to add these.  Instead, add the kvm handle to irqfd
 only.  Like above, embed the irq_bypass_consumer struct in the irqfd
 struct; in the new kvm_arch_* functions, go back to the
 vfio_platform_irq struct via container_of.

I also need the gsi field here, for posted-interrupts, I need 'gsi', 'irq' to
update the IRTE.

Thanks,
Feng

 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding control



 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Thursday, July 02, 2015 9:17 PM
 To: eric.au...@st.com; eric.au...@linaro.org;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; pbonz...@redhat.com; avi.kiv...@gmail.com;
 mtosa...@redhat.com; Wu, Feng; j...@8bytes.org;
 b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
 Subject: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding control
 
 - [add,del]_[consumer,producer] updated to takes both the consumer and
   producer handles. This is requested to combine info from both,
   typically to link the source irq owned by the producer with the gsi
   owned by the consumer (forwarded IRQ setup).
 - new functions are added: [stop,resume]_[consumer, producer]. Those are
   needed for forwarding since the state change requires to entermingle
   actions at consumer, producer.
 - On handshake, we now call connect, disconnect which features the more
   complex sequence.
 - new fields are added on producer side: linux irq, vfio_device handle,
   active which reflects whether the source is active (at interrupt
   controller level or at VFIO level - automasked -) and finally an
   opaque pointer which will be used to point to the vfio_platform_device
   in this series.
 - new fields on consumer side: the kvm handle, the gsi
 
 Integration of posted interrupt series will help to refine those choices

On PI side, I need another filed as below,

struct irq_bypass_consumer {
   struct list_head node;
   void *token;
+  unsigned irq;/*got from producer when registered*/
   void (*add_producer)(struct irq_bypass_producer *,
struct irq_bypass_consumer *);
   void (*del_producer)(struct irq_bypass_producer *,
struct irq_bypass_consumer *);
+  void (*update)(struct irq_bypass_consumer *);
};

'update' is used to update the IRTE, while irq is initialized when
registered, which is used to find the right IRTE.

Thanks,
Feng

 
 Signed-off-by: Eric Auger eric.au...@linaro.org
 
 ---
 
 - connect/disconnect could become a cb too. For forwarding it may make
   sense to have failure at connection: this would happen when the physical
   IRQ is either active at irqchip level or VFIO masked. This means some
   of the cb should return an error and this error management could be
   prod/cons specific. Where to attach the connect/disconnect cb: to the
   cons or prod, to both?
 - Hence may be sensible to do the list_add only if connect returns 0
 - disconnect would not be allowed to fail.
 ---
  include/linux/irqbypass.h | 26 ++---
  kernel/irq/bypass.c   | 48
 +++
  2 files changed, 67 insertions(+), 7 deletions(-)
 
 diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
 index 718508e..591ae3f 100644
 --- a/include/linux/irqbypass.h
 +++ b/include/linux/irqbypass.h
 @@ -3,17 +3,37 @@
 
  #include linux/list.h
 
 +struct vfio_device;
 +struct irq_bypass_consumer;
 +struct kvm;
 +
  struct irq_bypass_producer {
   struct list_head node;
   void *token;
 - /* TBD */
 + unsigned int irq; /* host physical irq */
 + struct vfio_device *vdev; /* vfio device that requested irq */
 + /* is irq active at irqchip or VFIO masked? */
 + bool active;
 + void *opaque;
 + void (*stop_producer)(struct irq_bypass_producer *);
 + void (*resume_producer)(struct irq_bypass_producer *);
 + void (*add_consumer)(struct irq_bypass_producer *,
 +  struct irq_bypass_consumer *);
 + void (*del_consumer)(struct irq_bypass_producer *,
 +  struct irq_bypass_consumer *);
  };
 
  struct irq_bypass_consumer {
   struct list_head node;
   void *token;
 - void (*add_producer)(struct irq_bypass_producer *);
 - void (*del_producer)(struct irq_bypass_producer *);
 + unsigned int gsi;   /* the guest gsi */
 + struct kvm *kvm;
 + void (*stop_consumer)(struct irq_bypass_consumer *);
 + void (*resume_consumer)(struct irq_bypass_consumer *);
 + void (*add_producer)(struct irq_bypass_consumer *,
 +  struct irq_bypass_producer *);
 + void (*del_producer)(struct irq_bypass_consumer *,
 +  struct irq_bypass_producer *);
  };
 
  int irq_bypass_register_producer(struct irq_bypass_producer *);
 diff --git a/kernel/irq/bypass.c b/kernel/irq/bypass.c
 index 5d0f92b..fb31fef 100644
 --- a/kernel/irq/bypass.c
 +++ b/kernel/irq/bypass.c
 @@ -19,6 +19,46 @@ static LIST_HEAD(producers);
  static LIST_HEAD(consumers);
  static DEFINE_MUTEX(lock);
 
 +/* lock must be hold when calling connect */
 +static void connect(struct irq_bypass_producer *prod,
 + struct

RE: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding control

 -Original Message-
 From: Wu, Feng
 Sent: Friday, July 03, 2015 10:20 AM
 To: Paolo Bonzini; Eric Auger; eric.au...@st.com;
 linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org; christoffer.d...@linaro.org; marc.zyng...@arm.com;
 alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
 j...@8bytes.org; b.rey...@virtualopensystems.com
 Cc: linux-ker...@vger.kernel.org; patc...@linaro.org; Wu, Feng
 Subject: RE: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding
 control

  -Original Message-
  From: Paolo Bonzini [mailto:pbonz...@redhat.com]
  Sent: Thursday, July 02, 2015 9:41 PM
  To: Eric Auger; eric.au...@st.com; linux-arm-ker...@lists.infradead.org;
  kvm...@lists.cs.columbia.edu; kvm@vger.kernel.org;
  christoffer.d...@linaro.org; marc.zyng...@arm.com;
  alex.william...@redhat.com; avi.kiv...@gmail.com; mtosa...@redhat.com;
  Wu, Feng; j...@8bytes.org; b.rey...@virtualopensystems.com
  Cc: linux-ker...@vger.kernel.org; patc...@linaro.org
  Subject: Re: [RFC 12/17] irq: bypass: Extend skeleton for ARM forwarding
  control

  On 02/07/2015 15:17, Eric Auger wrote:
   - new fields are added on producer side: linux irq, vfio_device handle,
 active which reflects whether the source is active (at interrupt
 controller level or at VFIO level - automasked -) and finally an
 opaque pointer which will be used to point to the vfio_platform_device
 in this series.

  Linux IRQ and active should be okay.  As to the vfio_device handle, you
  should link it from the vfio_platform_device instead.  And for the
  vfio_platform_device, you can link it from the vfio_platform_irq instead.

  Once you've done this, embed the irq_bypass_producer struct in the
  vfio_platform_irq struct; in the new kvm_arch_* functions, go back to
  the vfio_platform_irq struct via container_of.  From there you can
  retrieve pointers to the vfio_platform_device and the vfio_device.

   - new fields on consumer side: the kvm handle, the gsi

  You do not need to add these.  Instead, add the kvm handle to irqfd
  only.  Like above, embed the irq_bypass_consumer struct in the irqfd
  struct; in the new kvm_arch_* functions, go back to the
  vfio_platform_irq struct via container_of.

 I also need the gsi field here, for posted-interrupts, I need 'gsi', 'irq' to
 update the IRTE.

Oh... we can get gsi from irq_bypass_consumer - _irqfd - gsi, so it
is not needed in irq_bypass_consumer. Got it! :)

Thanks,
Feng

 Thanks,
 Feng

  Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-29 Thread Wu, Feng

 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, June 19, 2015 4:04 AM
 To: Wu, Feng
 Cc: Eric Auger; Avi Kivity; kvm@vger.kernel.org; linux-ker...@vger.kernel.org;
 pbonz...@redhat.com; mtosa...@redhat.com; Joerg Roedel
 Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

 [Adding Joerg since he was part of this original idea]

 On Thu, 2015-06-18 at 09:16 +, Wu, Feng wrote:

   -Original Message-
   From: Alex Williamson [mailto:alex.william...@redhat.com]
   Sent: Tuesday, June 16, 2015 12:45 AM
   To: Eric Auger
   Cc: Avi Kivity; Wu, Feng; kvm@vger.kernel.org;
 linux-ker...@vger.kernel.org;
   pbonz...@redhat.com; mtosa...@redhat.com
   Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

   On Mon, 2015-06-15 at 18:17 +0200, Eric Auger wrote:
Hi Alex, all,
On 06/12/2015 09:03 PM, Alex Williamson wrote:
 On Fri, 2015-06-12 at 21:48 +0300, Avi Kivity wrote:
 On 06/12/2015 06:41 PM, Alex Williamson wrote:
 On Fri, 2015-06-12 at 00:23 +, Wu, Feng wrote:
 -Original Message-
 From: Avi Kivity [mailto:avi.kiv...@gmail.com]
 Sent: Friday, June 12, 2015 3:59 AM
 To: Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
 Cc: pbonz...@redhat.com; mtosa...@redhat.com;
 alex.william...@redhat.com; eric.au...@linaro.org
 Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

 On 06/11/2015 01:51 PM, Feng Wu wrote:
 From: Eric Auger eric.au...@linaro.org

 This patch adds and documents a new KVM_DEV_VFIO_DEVICE
   group
 and 2 device attributes:
 KVM_DEV_VFIO_DEVICE_FORWARD_IRQ,
 KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. The purpose is to be
   able
 to set a VFIO device IRQ as forwarded or not forwarded.
 the command takes as argument a handle to a new struct named
 kvm_vfio_dev_irq.
 Is there no way to do this automatically?  After all, vfio knows 
 that
 a
 device interrupt is forwarded to some eventfd, and kvm knows that
   some
 eventfd is forwarded to a guest interrupt.  If they compare notes
 through a central registry, they can figure out that the interrupt
 needs
 to be forwarded.
 Oh, just like Eric mentioned in his reply, this description is out 
 of
 context
   of
 this series, I will remove them in the next version.

 I suspect Avi's question was more general.  While forward/unforward
 is
 out of context for this series, it's very similar in nature to
 enabling/disabling posted interrupts.  So I think the question 
 remains
 whether we really need userspace to participate in creating this
 shortcut or if kvm and vfio can some how orchestrate figuring it out
 automatically.

 Personally I don't know how we could do it automatically.  We've
 always
 relied on userspace to independently setup vfio and kvm such that
 neither have any idea that the other is there and update each side
 independently when anything changes.  So it seems consistent to
   continue
 that here.  It doesn't seem like there's much to gain
 performance-wise
 either, updates should be a relatively rare event I'd expect.

 There's really no metadata associated with an eventfd, so comparing
 notes automatically might imply some central registration entity.
 That
 immediately sounds like a much more complex solution, but maybe Avi
   has
 some ideas to manage it.  Thanks,

 The idea is to have a central registry maintained by a posted 
 interrupts
 manager.  Both vfio and kvm pass the filp (along with extra
 information)
 to the posted interrupts manager, which, when it detects a filp 
 match,
 tells each of them what to do.

 The advantages are:
 - old userspace gains the optimization without change
 - a userspace API is more expensive to maintain than internal kernel
 interfaces (CVEs, documentation, maintaining backwards compatibility)
 - if you can do it without a new interface, this indicates that all 
 the
 information in the new interface is redundant.  That means you have
 to
 check it for consistency with the existing information, so it's extra
 work (likely, it's exactly what the posted interrupt manager would be
 doing anyway).

 Yep, those all sound like good things and I believe that's similar in
 design to the way we had originally discussed this interaction at
 LPC/KVM Forum several years ago.  I'd be in favor of that approach.

I guess this discussion also is relevant wrt [RFC v6 00/16] KVM-VFIO
IRQ forward control series? Or is that central registry maintained by
a posted interrupts manager something more specific to x86?

   I'd think we'd want it for any sort of offload and supporting both
   posted-interrupts and irq-forwarding would be a good validation.  I
   imagine there would

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-29 Thread Wu, Feng

 -Original Message-
 From: Joerg Roedel [mailto:j...@8bytes.org]
 Sent: Monday, June 29, 2015 5:23 PM
 To: Wu, Feng
 Cc: Alex Williamson; Eric Auger; Avi Kivity; kvm@vger.kernel.org;
 linux-ker...@vger.kernel.org; pbonz...@redhat.com; mtosa...@redhat.com
 Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

 On Mon, Jun 29, 2015 at 09:14:54AM +, Wu, Feng wrote:
  Do you mean updating the hardware IRTEs for all the entries in the irq
  routing table, no matter whether it is the updated one?

 Right, that's what I mean. It seems wrong to me to work around the API
 interface by creating a diff between the old and the new routing table.

Yes the original usage model here doesn't care about the diff between
the old and new, it is a little intrusive to add the comparison code here.

 It is much simpler (and easier to maintain) to just update the IRTE
 and PI structures for all IRQs in the routing table, especially since
 this is not a hot-path.

Agree.

Thanks,
Feng

   Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-29 Thread Wu, Feng

 -Original Message-
 From: Joerg Roedel [mailto:j...@8bytes.org]
 Sent: Monday, June 29, 2015 5:06 PM
 To: Wu, Feng
 Cc: Alex Williamson; Eric Auger; Avi Kivity; kvm@vger.kernel.org;
 linux-ker...@vger.kernel.org; pbonz...@redhat.com; mtosa...@redhat.com
 Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

 Hi Feng,

 On Thu, Jun 25, 2015 at 09:11:52AM -0600, Alex Williamson wrote:
  So the trouble is that QEMU vfio updates a single MSI vector, but that
  just updates a single entry within a whole table of routes, then the
  whole table is pushed to KVM.  But in kvm_set_irq_routing() we have
  access to both the new and the old tables, so we do have the ability to
  detect the change.  We can therefore detect which GSI changed and
  cross-reference that to KVMs irqfds.  If we have an irqfd that matches
  the GSI then we have all the information we need, right?  We can use the
  eventfd_ctx of the irqfd to call into the IRQ bypass manager if we need
  to.  If it's an irqfd that's already enabled for bypass then we may
  already have the data we need to tweak the PI config.

  Yes, I agree it's more difficult, but it doesn't appear to be
  impossible, right?

 Since this also doesn't happen very often, you could also just update _all_
 PI data-structures from kvm_set_irq_routing, no? This would just
 resemble the way the API works anyway.

Thanks a lot for your suggestion, Joerg!

Do you mean updating the hardware IRTEs for all the entries in the irq
routing table, no matter whether it is the updated one?

Thanks,
Feng

 You just need to be careful to update the data structures only when the
 function can't fail anymore, so that you don't have to roll back
 anything.

   Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-25 Thread Wu, Feng

 -Original Message-
 From: Joerg Roedel [mailto:j...@8bytes.org]
 Sent: Wednesday, June 24, 2015 11:46 PM
 To: Alex Williamson
 Cc: Wu, Feng; Eric Auger; Avi Kivity; kvm@vger.kernel.org;
 linux-ker...@vger.kernel.org; pbonz...@redhat.com; mtosa...@redhat.com
 Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

 On Thu, Jun 18, 2015 at 02:04:08PM -0600, Alex Williamson wrote:
  There are plenty of details to be filled in,

 I also need to fill plenty of details in my head first, so here are some
 suggestions based on my current understanding. Please don't hesitate to
 correct me if where I got something wrong.

 So first I totally agree that the handling of PI/non-PI configurations
 should be transparent to user-space.

After thinking about this a bit more, I recall that why I used user-space
to trigger the IRTE update for posted-interrupts, here is the reason:

Let's take MSI for an example:
When guest updates the MSI configuration, here is the code path in
QEMU and KVM:

vfio_update_msi() -- vfio_update_kvm_msi_virq() --
kvm_irqchip_update_msi_route() -- kvm_update_routing_entry() --
kvm_irqchip_commit_routes() -- kvm_irqchip_commit_routes() --
KVM_SET_GSI_ROUTING -- kvm_set_irq_routing()

It will finally go to kvm_set_irq_routing() in KVM, there are two problem:
1. It use RCU in this function, it is hard to find which entry in the irq 
routing
  table is being updated.
2. Even we find the updated entry, it is hard to find the associated assigned
  device with this irq routing entry.

So I used a VFIO API to notify KVM the updated MSI/MSIx configuration and
the associated assigned devices. I think we need to find a way to address
the above two issues before going forward. Alex, what is your opinion?
Thanks a lot!

Thanks,
Feng

 I read a bit through the VT-d spec, and my understanding of posted
 interrupts so far is that:

   1) Each VCPU gets a PI-Descriptor with its pending Posted
  Interrupts. This descriptor needs to be updated when a VCPU
  is migrated to another PCPU and should thus be under control
  of KVM.

  This is similar to the vAPIC backing page in the AMD version
  of this, except that the PCPU routing information is stored
  somewhere else on AMD.

   2) As long as the VCPU runs the IRTEs are configured for
  posting, when the VCPU goes to sleep the old remapped entry is
  established again. So when the VCPU sleeps the interrupt
  would get routed to VFIO and forwarded through the eventfd.

  This would be different to the AMD version, where we have a
  running bit. When this is clear the IOMMU will trigger an event
  in its event-log. This might need special handling in VFIO
  ('might' because VFIO does not need to forward the interrupt,
   it just needs to make sure the VCPU wakes up).

  Please correct me if my understanding of the Intel version is
  wrong.

 So most of the data structures the IOMMU reads for this need to be
 updated from KVM code (either x86-generic or AMD/Intel specific code),
 as KVM has the information about VCPU load/unload and the IRQ routing.

 What KVM needs from VFIO are the informations about the physical
 interrupts, and it makes total sense to attach them as metadata to the
 eventfd.

 But the problems start at how this metadata should look like. It would
 be good to have some generic description, but not sure if this is
 possible. Otherwise this metadata would need to be requested by VFIO
 from the IOMMU driver and passed on to KVM, which it then passes back to
 the IOMMU driver. Or something like that.

   Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-24 Thread Wu, Feng

-Original Message-
From: Alex Williamson [mailto:alex.william...@redhat.com]
Sent: Thursday, June 25, 2015 3:49 AM
To: Eric Auger
Cc: Joerg Roedel; Avi Kivity; Wu, Feng; kvm@vger.kernel.org;
linux-ker...@vger.kernel.org; pbonz...@redhat.com; mtosa...@redhat.com
Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

On Wed, 2015-06-24 at 18:25 +0200, Eric Auger wrote:
Hi Joerg,

On 06/24/2015 05:50 PM, Joerg Roedel wrote:
On Mon, Jun 15, 2015 at 06:17:03PM +0200, Eric Auger wrote:
I guess this discussion also is relevant wrt [RFC v6 00/16] KVM-VFIO
IRQ forward control series? Or is that central registry maintained by
a posted interrupts manager something more specific to x86?

From what I understood so far, the feature you implemented for ARM is a
bit different from the ones that get introduced to x86.

Can you please share some details on how the ARM version works? I am
interested in how the GICv2 is configured for IRQ forwarding. The
question is whether the forwarding information needs to be updated from
KVM and what information about the IRQ KVM needs for this.

The principle is that when you inject a virtual IRQ to a guest, you
program a register in the GIC, known as a list register. There you put
both the virtual IRQ you want to inject but also the physical IRQ it is
linked with (HWbit mode set = forwarding set). When the guest completes
the virtual IRQ the GIC HW automatically deactivates the physical IRQ
found in the list register. In that mode the physical IRQ deactivation
is under the ownership of the guest (actually automatically done by the HW).

If HWbit mode is *not* set (forwarding not set), you do not specify the
HW IRQ in the list register. The host deactivates the physical IRQ
masks it before triggering the virtual IRQ. Only the virtual IRQ ID is
programmed in the list register. When the guest completes the virtual
IRQ, a physical maintenance IRQ is triggered. The hyp mode is entered
and eventually the host unmasks the IRQ.

Some illustrations can be found in
http://www.linux-kvm.org/images/a/a8/01x04-ARMdevice.pdf

I think an important aspect for our design is that in the case of Posted
Interrupts, they're only used for edge triggered interrupts so VFIO is
only an information provider for KVM to configure it.

Exactly! For PI, KVM only needs some information from VFIO when the
guests set the irq affinity.

Thanks,
Feng

VFIO will
hopefully just see fewer interrupts as they magically appear directly in
the guest. IRQ Forwarding however affects the de-assertion of level
triggered interrupts. VFIO needs to switch to something more like an
edge handler when IRQ Forwarding is enabled. So in that model, VFIO
needs to provide information as well as consume it to change behavior.
Thanks,

Alex

N�r��yb�X��ǧv�^�)޺{.n�+h����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-24 Thread Wu, Feng

 -Original Message-
 From: Joerg Roedel [mailto:j...@8bytes.org]
 Sent: Wednesday, June 24, 2015 11:46 PM
 To: Alex Williamson
 Cc: Wu, Feng; Eric Auger; Avi Kivity; kvm@vger.kernel.org;
 linux-ker...@vger.kernel.org; pbonz...@redhat.com; mtosa...@redhat.com
 Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

 On Thu, Jun 18, 2015 at 02:04:08PM -0600, Alex Williamson wrote:
  There are plenty of details to be filled in,

 I also need to fill plenty of details in my head first, so here are some
 suggestions based on my current understanding. Please don't hesitate to
 correct me if where I got something wrong.

 So first I totally agree that the handling of PI/non-PI configurations
 should be transparent to user-space.

 I read a bit through the VT-d spec, and my understanding of posted
 interrupts so far is that:

   1) Each VCPU gets a PI-Descriptor with its pending Posted
  Interrupts. This descriptor needs to be updated when a VCPU
  is migrated to another PCPU and should thus be under control
  of KVM.

  This is similar to the vAPIC backing page in the AMD version
  of this, except that the PCPU routing information is stored
  somewhere else on AMD.

   2) As long as the VCPU runs the IRTEs are configured for
  posting, when the VCPU goes to sleep the old remapped entry is
  established again. So when the VCPU sleeps the interrupt
  would get routed to VFIO and forwarded through the eventfd.

When the vCPU sleeps, says, blocked when guest is running HLT, the
interrupt is still in posted mode. The solution is when the vCPU is blocked,
we use another notification vector (named wakeup notification vector) to
wakeup the blocked vCPU when interrupts happens. And in the wakeup
event handler, we unblock the vCPU.

Thanks,
Feng

  This would be different to the AMD version, where we have a
  running bit. When this is clear the IOMMU will trigger an event
  in its event-log. This might need special handling in VFIO
  ('might' because VFIO does not need to forward the interrupt,
   it just needs to make sure the VCPU wakes up).

  Please correct me if my understanding of the Intel version is
  wrong.

 So most of the data structures the IOMMU reads for this need to be
 updated from KVM code (either x86-generic or AMD/Intel specific code),
 as KVM has the information about VCPU load/unload and the IRQ routing.

Yes, this part has nothing to do with VFIO, KVM itself can handle it well.

 What KVM needs from VFIO are the informations about the physical
 interrupts, and it makes total sense to attach them as metadata to the
 eventfd.

When guest set the irq affinity, QEMU first gets the MSI/MSIx configuration,
then it passes these information to kernel space via VFIO infrastructure, we
need these MSI/MSIx configuration to update the associated posted-format
IRTE according. This is the key point for PI in term of VFIO.

Thanks,
Feng

 But the problems start at how this metadata should look like. It would
 be good to have some generic description, but not sure if this is
 possible. Otherwise this metadata would need to be requested by VFIO
 from the IOMMU driver and passed on to KVM, which it then passes back to
 the IOMMU driver. Or something like that.

   Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [v4 01/16] KVM: Extend struct pi_desc for VT-d Posted-Interrupts

2015-06-23 Thread Wu, Feng



 -Original Message-
 From: Paolo Bonzini [mailto:pbonz...@redhat.com]
 Sent: Tuesday, June 23, 2015 11:50 PM
 To: Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
 Cc: mtosa...@redhat.com; alex.william...@redhat.com;
 eric.au...@linaro.org
 Subject: Re: [v4 01/16] KVM: Extend struct pi_desc for VT-d Posted-Interrupts
 
 
 
 On 11/06/2015 12:51, Feng Wu wrote:
  +   union {
  +   struct {
  +   /* bit 256 - Outstanding Notification */
  +   u64 on  : 1,
  +   /* bit 257 - Suppress Notification */
  +   sn  : 1,
  +   /* bit 271:258 - Reserved */
  +   rsvd_1  : 14,
  +   /* bit 279:272 - Notification Vector */
  +   nv  : 8,
  +   /* bit 287:280 - Reserved */
  +   rsvd_2  : 8,
  +   /* bit 319:288 - Notification Destination */
  +   ndst: 32;
 
 Please do not use bitfields, Linus doesn't like them.  Use static
 inlines instead to extract or set the fields from the control field.

Do you mean we don't use bitfields at all, or the following format is
acceptable, in which, we use bitfileds as less as possible?

union {
struct
{
u16 on : 1,  /* bit 256 - Outstanding Notification */
sn : 1,  /* bit 257 - Suppress Notification */
rsvd_1 : 14; /* bit 271:258 - Reserved */
u8  nv;  /* bit 279:272 - Notification Vector */
u8  rsvd_2;  /* bit 287:280 - Reserved */
u32 ndst;/* bit 319:288 - Notification Destination */
};
u64 control;
};

Thanks,
Feng

 
 Paolo
 
  +   };
  +   u64 control;
  +   };
  +   u32 rsvd[6];
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-18 Thread Wu, Feng

 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Tuesday, June 16, 2015 12:45 AM
 To: Eric Auger
 Cc: Avi Kivity; Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org;
 pbonz...@redhat.com; mtosa...@redhat.com
 Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

 On Mon, 2015-06-15 at 18:17 +0200, Eric Auger wrote:
  Hi Alex, all,
  On 06/12/2015 09:03 PM, Alex Williamson wrote:
   On Fri, 2015-06-12 at 21:48 +0300, Avi Kivity wrote:
   On 06/12/2015 06:41 PM, Alex Williamson wrote:
   On Fri, 2015-06-12 at 00:23 +, Wu, Feng wrote:
   -Original Message-
   From: Avi Kivity [mailto:avi.kiv...@gmail.com]
   Sent: Friday, June 12, 2015 3:59 AM
   To: Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
   Cc: pbonz...@redhat.com; mtosa...@redhat.com;
   alex.william...@redhat.com; eric.au...@linaro.org
   Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

   On 06/11/2015 01:51 PM, Feng Wu wrote:
   From: Eric Auger eric.au...@linaro.org

   This patch adds and documents a new KVM_DEV_VFIO_DEVICE
 group
   and 2 device attributes: KVM_DEV_VFIO_DEVICE_FORWARD_IRQ,
   KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. The purpose is to be
 able
   to set a VFIO device IRQ as forwarded or not forwarded.
   the command takes as argument a handle to a new struct named
   kvm_vfio_dev_irq.
   Is there no way to do this automatically?  After all, vfio knows that 
   a
   device interrupt is forwarded to some eventfd, and kvm knows that
 some
   eventfd is forwarded to a guest interrupt.  If they compare notes
   through a central registry, they can figure out that the interrupt 
   needs
   to be forwarded.
   Oh, just like Eric mentioned in his reply, this description is out of 
   context
 of
   this series, I will remove them in the next version.

   I suspect Avi's question was more general.  While forward/unforward is
   out of context for this series, it's very similar in nature to
   enabling/disabling posted interrupts.  So I think the question remains
   whether we really need userspace to participate in creating this
   shortcut or if kvm and vfio can some how orchestrate figuring it out
   automatically.

   Personally I don't know how we could do it automatically.  We've always
   relied on userspace to independently setup vfio and kvm such that
   neither have any idea that the other is there and update each side
   independently when anything changes.  So it seems consistent to
 continue
   that here.  It doesn't seem like there's much to gain performance-wise
   either, updates should be a relatively rare event I'd expect.

   There's really no metadata associated with an eventfd, so comparing
   notes automatically might imply some central registration entity.  That
   immediately sounds like a much more complex solution, but maybe Avi
 has
   some ideas to manage it.  Thanks,

   The idea is to have a central registry maintained by a posted interrupts
   manager.  Both vfio and kvm pass the filp (along with extra information)
   to the posted interrupts manager, which, when it detects a filp match,
   tells each of them what to do.

   The advantages are:
   - old userspace gains the optimization without change
   - a userspace API is more expensive to maintain than internal kernel
   interfaces (CVEs, documentation, maintaining backwards compatibility)
   - if you can do it without a new interface, this indicates that all the
   information in the new interface is redundant.  That means you have to
   check it for consistency with the existing information, so it's extra
   work (likely, it's exactly what the posted interrupt manager would be
   doing anyway).

   Yep, those all sound like good things and I believe that's similar in
   design to the way we had originally discussed this interaction at
   LPC/KVM Forum several years ago.  I'd be in favor of that approach.

  I guess this discussion also is relevant wrt [RFC v6 00/16] KVM-VFIO
  IRQ forward control series? Or is that central registry maintained by
  a posted interrupts manager something more specific to x86?

 I'd think we'd want it for any sort of offload and supporting both
 posted-interrupts and irq-forwarding would be a good validation.  I
 imagine there would be registration/de-registration callbacks separate
 for interrupt producers vs interrupt consumers.  Each registration
 function would likely provide a struct of callbacks, probably similar to
 the get_symbol callbacks proposed for the kvm-vfio device on the IRQ
 producer side.  The eventfd would be the token that the manager would
 use to match producers and consumers.  The hard part is probably
 figuring out what information to retrieve from the producer and provide
 to the consumer in a generic way between pci and platform, but as an
 internal interface, it's not a big deal if we screw it up a few times to
 start.  Thanks,

On posted-interrupts side, the main

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-15 Thread Wu, Feng

-Original Message-
From: Alex Williamson [mailto:alex.william...@redhat.com]
Sent: Saturday, June 13, 2015 3:04 AM
To: Avi Kivity
Cc: Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org;
pbonz...@redhat.com; mtosa...@redhat.com; eric.au...@linaro.org
Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

On Fri, 2015-06-12 at 21:48 +0300, Avi Kivity wrote:
On 06/12/2015 06:41 PM, Alex Williamson wrote:
On Fri, 2015-06-12 at 00:23 +, Wu, Feng wrote:
-Original Message-
From: Avi Kivity [mailto:avi.kiv...@gmail.com]
Sent: Friday, June 12, 2015 3:59 AM
To: Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
Cc: pbonz...@redhat.com; mtosa...@redhat.com;
alex.william...@redhat.com; eric.au...@linaro.org
Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

On 06/11/2015 01:51 PM, Feng Wu wrote:
From: Eric Auger eric.au...@linaro.org

This patch adds and documents a new KVM_DEV_VFIO_DEVICE group
and 2 device attributes: KVM_DEV_VFIO_DEVICE_FORWARD_IRQ,
KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. The purpose is to be able
to set a VFIO device IRQ as forwarded or not forwarded.
the command takes as argument a handle to a new struct named
kvm_vfio_dev_irq.
Is there no way to do this automatically? After all, vfio knows that a
device interrupt is forwarded to some eventfd, and kvm knows that some
eventfd is forwarded to a guest interrupt. If they compare notes
through a central registry, they can figure out that the interrupt needs
to be forwarded.
Oh, just like Eric mentioned in his reply, this description is out of
context of
this series, I will remove them in the next version.

I suspect Avi's question was more general. While forward/unforward is
out of context for this series, it's very similar in nature to
enabling/disabling posted interrupts. So I think the question remains
whether we really need userspace to participate in creating this
shortcut or if kvm and vfio can some how orchestrate figuring it out
automatically.

Personally I don't know how we could do it automatically. We've always
relied on userspace to independently setup vfio and kvm such that
neither have any idea that the other is there and update each side
independently when anything changes. So it seems consistent to continue
that here. It doesn't seem like there's much to gain performance-wise
either, updates should be a relatively rare event I'd expect.

There's really no metadata associated with an eventfd, so comparing
notes automatically might imply some central registration entity. That
immediately sounds like a much more complex solution, but maybe Avi has
some ideas to manage it. Thanks,

The idea is to have a central registry maintained by a posted interrupts
manager. Both vfio and kvm pass the filp (along with extra information)
to the posted interrupts manager, which, when it detects a filp match,
tells each of them what to do.

The advantages are:
- old userspace gains the optimization without change
- a userspace API is more expensive to maintain than internal kernel
interfaces (CVEs, documentation, maintaining backwards compatibility)
- if you can do it without a new interface, this indicates that all the
information in the new interface is redundant. That means you have to
check it for consistency with the existing information, so it's extra
work (likely, it's exactly what the posted interrupt manager would be
doing anyway).

Yep, those all sound like good things and I believe that's similar in
design to the way we had originally discussed this interaction at
LPC/KVM Forum several years ago. I'd be in favor of that approach.
Thanks,

This seems a little complex compared to the current solution, since I am
not quite familiar with VFIO, Alex, can you help on this if we need to do this
that way, especially for the VFIO part?

Thanks,
Feng

Alex

N�r��yb�X��ǧv�^�)޺{.n�+h����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf

RE: [v4 13/16] KVM: x86: kvm-vfio: VT-d posted-interrupts setup



 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, June 12, 2015 1:16 AM
 To: Wu, Feng
 Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org; pbonz...@redhat.com;
 mtosa...@redhat.com; eric.au...@linaro.org
 Subject: Re: [v4 13/16] KVM: x86: kvm-vfio: VT-d posted-interrupts setup
 
 On Thu, 2015-06-11 at 18:51 +0800, Feng Wu wrote:
  This patch defines macro __KVM_HAVE_ARCH_KVM_VFIO_POST and
  implement kvm_arch_vfio_update_pi_irte for x86 architecture.
 
 
 What's vfio specific in any of this?  It's obviously called from the
 kvm-vfio device interface, but nothing below is vfio related.  This is
 simply the posted interrupt interface to kvm.  Thanks,
 
 Alex
 

Oh, yes, maybe I need to move this part to vmx.c.

Thanks,
Feng

 
  Signed-off-by: Feng Wu feng...@intel.com
  ---
   arch/x86/include/asm/kvm_host.h |  2 +
   arch/x86/kvm/Makefile   |  3 +-
   arch/x86/kvm/kvm_vfio_x86.c | 85
 +
   3 files changed, 89 insertions(+), 1 deletion(-)
   create mode 100644 arch/x86/kvm/kvm_vfio_x86.c
 
  diff --git a/arch/x86/include/asm/kvm_host.h
 b/arch/x86/include/asm/kvm_host.h
  index 31a495f..1605bf8 100644
  --- a/arch/x86/include/asm/kvm_host.h
  +++ b/arch/x86/include/asm/kvm_host.h
  @@ -81,6 +81,8 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t
 base_gfn, int level)
  (base_gfn  KVM_HPAGE_GFN_SHIFT(level));
   }
 
  +#define __KVM_HAVE_ARCH_KVM_VFIO_POST
  +
   #define KVM_PERMILLE_MMU_PAGES 20
   #define KVM_MIN_ALLOC_MMU_PAGES 64
   #define KVM_MMU_HASH_SHIFT 10
  diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
  index 16e8f96..6bafc89 100644
  --- a/arch/x86/kvm/Makefile
  +++ b/arch/x86/kvm/Makefile
  @@ -12,7 +12,8 @@ kvm-y += $(KVM)/kvm_main.o
 $(KVM)/coalesced_mmio.o \
   kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
 
   kvm-y  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
  -  i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
  +  i8254.o ioapic.o irq_comm.o cpuid.o pmu.o \
  +  kvm_vfio_x86.o
   kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)+= assigned-dev.o iommu.o
   kvm-intel-y+= vmx.o
   kvm-amd-y  += svm.o
  diff --git a/arch/x86/kvm/kvm_vfio_x86.c b/arch/x86/kvm/kvm_vfio_x86.c
  new file mode 100644
  index 000..a2d74f9
  --- /dev/null
  +++ b/arch/x86/kvm/kvm_vfio_x86.c
  @@ -0,0 +1,85 @@
  +/*
  + * Copyright (C) 2014 Intel Corporation.
  + * Authors: Feng Wu feng...@intel.com
  + *
  + * This program is free software; you can redistribute it and/or modify
  + * it under the terms of the GNU General Public License, version 2, as
  + * published by the Free Software Foundation.
  + *
  + * This program is distributed in the hope that it will be useful,
  + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  + * GNU General Public License for more details.
  + */
  +
  +#include linux/kvm_host.h
  +#include asm/irq_remapping.h
  +
  +/*
  + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
  + *
  + * @kvm: kvm
  + * @host_irq: host irq of the interrupt
  + * @guest_irq: gsi of the interrupt
  + * @set: set or unset PI
  + * returns 0 on success,  0 on failure
  + */
  +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
  +uint32_t guest_irq, bool set)
  +{
  +   struct kvm_kernel_irq_routing_entry *e;
  +   struct kvm_irq_routing_table *irq_rt;
  +   struct kvm_lapic_irq irq;
  +   struct kvm_vcpu *vcpu;
  +   struct vcpu_data vcpu_info;
  +   int idx, ret = -EINVAL;
  +
  +   idx = srcu_read_lock(kvm-irq_srcu);
  +   irq_rt = srcu_dereference(kvm-irq_routing, kvm-irq_srcu);
  +   BUG_ON(guest_irq = irq_rt-nr_rt_entries);
  +
  +   hlist_for_each_entry(e, irq_rt-map[guest_irq], link) {
  +   if (e-type != KVM_IRQ_ROUTING_MSI)
  +   continue;
  +   /*
  +* VT-d PI cannot support posting multicast/broadcast
  +* interrupts to a VCPU, we still use interrupt remapping
  +* for these kind of interrupts.
  +*
  +* For lowest-priority interrupts, we only support
  +* those with single CPU as the destination, e.g. user
  +* configures the interrupts via /proc/irq or uses
  +* irqbalance to make the interrupts single-CPU.
  +*
  +* We will support full lowest-priority interrupt later.
  +*
  +*/
  +
  +   kvm_set_msi_irq(e, irq);
  +   if (!kvm_intr_is_single_vcpu(kvm, irq, vcpu))
  +   continue;
  +
  +   vcpu_info.pi_desc_addr = kvm_x86_ops-get_pi_desc_addr(vcpu);
  +   vcpu_info.vector = irq.vector;
  +
  +   if (set)
  +   ret = irq_set_vcpu_affinity

RE: [v4 12/16] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, June 12, 2015 1:15 AM
 To: Wu, Feng
 Cc: kvm@vger.kernel.org; linux-ker...@vger.kernel.org; pbonz...@redhat.com;
 mtosa...@redhat.com; eric.au...@linaro.org
 Subject: Re: [v4 12/16] KVM: kvm-vfio: implement the VFIO skeleton for VT-d
 Posted-Interrupts

 On Thu, 2015-06-11 at 18:51 +0800, Feng Wu wrote:
  This patch adds the kvm-vfio interface for VT-d Posted-Interrupts.
  When guests update MSI/MSI-x information for an assigned-device,
  QEMU will use KVM_DEV_VFIO_DEVICE_POST_IRQ attribute to setup
  IRTE for VT-d PI. Userspace program can also use
  KVM_DEV_VFIO_DEVICE_UNPOST_IRQ to change back to irq remapping
 mode.
  This patch implements these IRQ attributes.

  Signed-off-by: Feng Wu feng...@intel.com
  ---
   include/linux/kvm_host.h |  22 +
   virt/kvm/vfio.c  | 126
 +++
   2 files changed, 148 insertions(+)

  diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
  index f591f7c..69f8711 100644
  --- a/include/linux/kvm_host.h
  +++ b/include/linux/kvm_host.h
  @@ -1073,6 +1073,28 @@ extern struct kvm_device_ops kvm_xics_ops;
   extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
   extern struct kvm_device_ops kvm_arm_vgic_v3_ops;

  +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST
  +/*
  + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
  + *
  + * @kvm: kvm
  + * @host_irq: host irq of the interrupt
  + * @guest_irq: gsi of the interrupt
  + * @set: set or unset PI
  + * returns 0 on success,  0 on failure
  + */
  +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
  +uint32_t guest_irq, bool set);
  +#else
  +static inline int kvm_arch_vfio_update_pi_irte(struct kvm *kvm,
  +  unsigned int host_irq,
  +  uint32_t guest_irq,
  +  bool set)
  +{
  +   return 0;
  +}

 The code below can't get to this function without
 __KVM_HAVE_ARCH_KVM_VFIO_POST, but this seems like it should return an
 error if not implemented.

kvm_arch_vfio_update_pi_irte() is called by kvm_vfio_control_pi(), if we remove 
the
dummy definition of kvm_arch_vfio_update_pi_irte(), kvm_vfio_control_pi() is 
also
needed to be included in __KVM_HAVE_ARCH_KVM_VFIO_POST, I will handle this
in the next version.

  +#endif
  +
   #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT

   static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool
 val)
  diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
  index 80a45e4..547fc51 100644
  --- a/virt/kvm/vfio.c
  +++ b/virt/kvm/vfio.c
  @@ -18,6 +18,7 @@
   #include linux/slab.h
   #include linux/uaccess.h
   #include linux/vfio.h
  +#include asm/irq_remapping.h

 This only exists on x86.   

But in kvm_vfio_has_attr(), we can only return 0 when posted interrupt is 
supported
via calling  irq_remapping_cap(IRQ_POSTING_CAP) which needs this header file.
Do you think how can I handle this?

 Are we also getting lucky with some of the
 include chains that give us the PCI related defines?  It looks like
 we're implicitly assuming CONFIG_PCI

Yes, I think the PCI related header files are included implicitly here. Anyway
I can add #include linux/pci.h explicitly.

   #include vfio.h

   struct kvm_vfio_group {
  @@ -276,12 +277,128 @@ static int kvm_vfio_set_group(struct kvm_device
 *dev, long attr, u64 arg)
  return -ENXIO;
   }

  +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
  +{
  +   if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
  +   u8 pin;
  +
  +   pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, pin);
  +   if (pin)
  +   return 1;
  +   } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
  +   return pci_msi_vec_count(pdev);
  +   } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
  +   return pci_msix_vec_count(pdev);
  +   }
  +
  +   return 0;
  +}
  +
  +static int kvm_vfio_control_pi(struct kvm_device *kdev,
  +  int32_t __user *argp, bool set)
  +{
  +   struct kvm_vfio_dev_irq pi_info;
  +   uint32_t *gsi;
  +   unsigned long minsz;
  +   struct vfio_device *vdev;
  +   struct msi_desc *entry;
  +   struct device *dev;
  +   struct pci_dev *pdev;
  +   int i, max, ret;
  +
  +   minsz = offsetofend(struct kvm_vfio_dev_irq, count);
  +
  +   if (copy_from_user(pi_info, (void __user *)argp, minsz))
  +   return -EFAULT;
  +
  +   if (pi_info.argsz  minsz || pi_info.index = VFIO_PCI_NUM_IRQS)
  +   return -EINVAL;

 Could we also abort on pi_info.count == 0?

Yes, that is a good point.

  +
  +   vdev = kvm_vfio_get_vfio_device(pi_info.fd);
  +   if (IS_ERR(vdev))
  +   return PTR_ERR(vdev);
  +
  +   dev = kvm_vfio_external_base_device(vdev);
  +   if (!dev || !dev_is_pci(dev

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding



 -Original Message-
 From: Avi Kivity [mailto:avi.kiv...@gmail.com]
 Sent: Friday, June 12, 2015 3:59 AM
 To: Wu, Feng; kvm@vger.kernel.org; linux-ker...@vger.kernel.org
 Cc: pbonz...@redhat.com; mtosa...@redhat.com;
 alex.william...@redhat.com; eric.au...@linaro.org
 Subject: Re: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding
 
 On 06/11/2015 01:51 PM, Feng Wu wrote:
  From: Eric Auger eric.au...@linaro.org
 
  This patch adds and documents a new KVM_DEV_VFIO_DEVICE group
  and 2 device attributes: KVM_DEV_VFIO_DEVICE_FORWARD_IRQ,
  KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. The purpose is to be able
  to set a VFIO device IRQ as forwarded or not forwarded.
  the command takes as argument a handle to a new struct named
  kvm_vfio_dev_irq.
 
 Is there no way to do this automatically?  After all, vfio knows that a
 device interrupt is forwarded to some eventfd, and kvm knows that some
 eventfd is forwarded to a guest interrupt.  If they compare notes
 through a central registry, they can figure out that the interrupt needs
 to be forwarded.

Oh, just like Eric mentioned in his reply, this description is out of context of
this series, I will remove them in the next version.

Thanks,
Feng

 
 
  Signed-off-by: Eric Auger eric.au...@linaro.org
  ---
Documentation/virtual/kvm/devices/vfio.txt | 34
 --
include/uapi/linux/kvm.h   | 12 +++
2 files changed, 40 insertions(+), 6 deletions(-)
 
  diff --git a/Documentation/virtual/kvm/devices/vfio.txt
 b/Documentation/virtual/kvm/devices/vfio.txt
  index ef51740..6186e6d 100644
  --- a/Documentation/virtual/kvm/devices/vfio.txt
  +++ b/Documentation/virtual/kvm/devices/vfio.txt
  @@ -4,15 +4,20 @@ VFIO virtual device
Device types supported:
  KVM_DEV_TYPE_VFIO
 
  -Only one VFIO instance may be created per VM.  The created device
  -tracks VFIO groups in use by the VM and features of those groups
  -important to the correctness and acceleration of the VM.  As groups
  -are enabled and disabled for use by the VM, KVM should be updated
  -about their presence.  When registered with KVM, a reference to the
  -VFIO-group is held by KVM.
  +Only one VFIO instance may be created per VM.
  +
  +The created device tracks VFIO groups in use by the VM and features
  +of those groups important to the correctness and acceleration of
  +the VM.  As groups are enabled and disabled for use by the VM, KVM
  +should be updated about their presence.  When registered with KVM,
  +a reference to the VFIO-group is held by KVM.
  +
  +The device also enables to control some IRQ settings of VFIO devices:
  +forwarding/posting.
 
Groups:
  KVM_DEV_VFIO_GROUP
  +  KVM_DEV_VFIO_DEVICE
 
KVM_DEV_VFIO_GROUP attributes:
  KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device
 tracking
  @@ -20,3 +25,20 @@ KVM_DEV_VFIO_GROUP attributes:
 
For each, kvm_device_attr.addr points to an int32_t file descriptor
for the VFIO group.
  +
  +KVM_DEV_VFIO_DEVICE attributes:
  +  KVM_DEV_VFIO_DEVICE_FORWARD_IRQ: set a VFIO device IRQ as
 forwarded
  +  KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: set a VFIO device IRQ as
 not forwarded
  +
  +For each, kvm_device_attr.addr points to a kvm_vfio_dev_irq struct.
  +
  +When forwarded, a physical IRQ is completed by the guest and not by the
  +host. This requires HW support in the interrupt controller.
  +
  +Forwarding can only be set when the corresponding VFIO IRQ is not masked
  +(would it be through VFIO_DEVICE_SET_IRQS command or as a consequence
 of this
  +IRQ being currently handled) or active at interrupt controller level.
  +In such a situation, -EAGAIN is returned. It is advised to to set the
  +forwarding before the VFIO signaling is set up, this avoids trial and 
  errors.
  +
  +Unforwarding can happen at any time.
  diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
  index 4b60056..798f3e4 100644
  --- a/include/uapi/linux/kvm.h
  +++ b/include/uapi/linux/kvm.h
  @@ -999,6 +999,9 @@ struct kvm_device_attr {
#define  KVM_DEV_VFIO_GROUP   1
#define   KVM_DEV_VFIO_GROUP_ADD  1
#define   KVM_DEV_VFIO_GROUP_DEL  2
  +#define  KVM_DEV_VFIO_DEVICE   2
  +#define   KVM_DEV_VFIO_DEVICE_FORWARD_IRQ  1
  +#define   KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ2
 
enum kvm_device_type {
  KVM_DEV_TYPE_FSL_MPIC_20= 1,
  @@ -1018,6 +1021,15 @@ enum kvm_device_type {
  KVM_DEV_TYPE_MAX,
};
 
  +struct kvm_vfio_dev_irq {
  +   __u32   argsz;  /* structure length */
  +   __u32   fd; /* file descriptor of the VFIO device */
  +   __u32   index;  /* VFIO device IRQ index */
  +   __u32   start;  /* start of subindex range */
  +   __u32   count;  /* size of subindex range */
  +   __u32   gsi[];  /* gsi, ie. virtual IRQ number */
  +};
  +
/*
 * ioctls

RE: [v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding