Re: [PATCH v8 10/13] KVM: Update Posted-Interrupts Descriptor when vCPU is preempted

2015-09-16 Thread Paolo Bonzini


On 16/09/2015 10:50, Feng Wu wrote:
> +
> + if (!irq_remapping_cap(IRQ_POSTING_CAP) ||
> + (!kvm_arch_has_assigned_device(vcpu->kvm)))
> + return;
> +

Better:

if (!arch_has_assigned_device(vcpu->kvm)) ||
!irq_remapping_cap(IRQ_POSTING_CAP))
return;

(In the future we might add a static_key here).

Paolo
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 00/13] Add VT-d Posted-Interrupts support

2015-09-16 Thread Paolo Bonzini


On 16/09/2015 10:49, Feng Wu wrote:
> VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
> With VT-d Posted-Interrupts enabled, external interrupts from
> direct-assigned devices can be delivered to guests without VMM
> intervention when guest is running in non-root mode.
> 
> You can find the VT-d Posted-Interrtups Spec. in the following URL:
> http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html
> 
> v8:
> refer to the changelog in each patch

Thanks, it mostly looks good.

Since we've more or less converged, could you post the whole series for
v9, including the other prerequisite series?

Paolo
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 02/13] KVM: Add some helper functions for Posted-Interrupts

2015-09-16 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu 
Reviewed-by: Paolo Bonzini 
---
 arch/x86/kvm/vmx.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 271dd70..316f9bf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -443,6 +443,8 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
@@ -483,6 +485,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc 
*pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)_desc->control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
-- 
2.1.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 09/13] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-09-16 Thread Feng Wu
This patch adds an arch specific hooks 'arch_update' in
'struct kvm_kernel_irqfd'. On Intel side, it is used to
update the IRTE when VT-d posted-interrupts is used.

Signed-off-by: Feng Wu 
---
v8:
- Remove callback .arch_update()
- Remove kvm_arch_irqfd_init()
- Call kvm_arch_update_irqfd_routing() instead.

 arch/x86/kvm/x86.c   |  7 +++
 include/linux/kvm_host.h |  2 ++
 virt/kvm/eventfd.c   | 19 ++-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 79dac02..e189a94 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8293,6 +8293,13 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *cons,
   " fails: %d\n", irqfd->consumer.token, ret);
 }
 
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set)
+{
+   return !kvm_x86_ops->update_pi_irte ? -EINVAL :
+   kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5f183fb..feba1fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1174,6 +1174,8 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *,
   struct irq_bypass_producer *);
 void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 #endif
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c0a56a1..89c9635 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -266,6 +266,12 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start(
struct irq_bypass_consumer *cons)
 {
 }
+int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
+   struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq, bool set)
+{
+   return 0;
+}
 #endif
 
 static int
@@ -582,13 +588,24 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
+   int ret;
struct kvm_kernel_irqfd *irqfd;
 
spin_lock_irq(>irqfds.lock);
 
-   list_for_each_entry(irqfd, >irqfds.items, list)
+   list_for_each_entry(irqfd, >irqfds.items, list) {
irqfd_update(kvm, irqfd);
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+   if (irqfd->producer) {
+   ret = kvm_arch_update_irqfd_routing(
+   irqfd->kvm, irqfd->producer->irq,
+   irqfd->gsi, 1);
+   WARN_ON(ret);
+   }
+#endif
+   }
+
spin_unlock_irq(>irqfds.lock);
 }
 
-- 
2.1.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 07/13] KVM: x86: Update IRTE for posted-interrupts

2015-09-16 Thread Feng Wu
This patch adds the routine to update IRTE for posted-interrupts
when guest changes the interrupt configuration.

Signed-off-by: Feng Wu 
---
v8:
- Move 'kvm_arch_update_pi_irte' to vmx.c as a callback
- Only update the PI irte when VM has assigned devices
- Add a trace point for VT-d posted-interrupts when we update
  or disable it for a specific irq.

 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/trace.h| 33 
 arch/x86/kvm/vmx.c  | 83 +
 arch/x86/kvm/x86.c  |  2 +
 4 files changed, 121 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index daa6126..8c44286 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -862,6 +862,9 @@ struct kvm_x86_ops {
   gfn_t offset, unsigned long mask);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
+
+   int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4eae7c3..539a9e4 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -974,6 +974,39 @@ TRACE_EVENT(kvm_enter_smm,
  __entry->smbase)
 );
 
+/*
+ * Tracepoint for VT-d posted-interrupts.
+ */
+TRACE_EVENT(kvm_pi_irte_update,
+   TP_PROTO(unsigned int vcpu_id, unsigned int gsi,
+unsigned int gvec, u64 pi_desc_addr, bool set),
+   TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set),
+
+   TP_STRUCT__entry(
+   __field(unsigned int,   vcpu_id )
+   __field(unsigned int,   gsi )
+   __field(unsigned int,   gvec)
+   __field(u64,pi_desc_addr)
+   __field(bool,   set )
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id= vcpu_id;
+   __entry->gsi= gsi;
+   __entry->gvec   = gvec;
+   __entry->pi_desc_addr   = pi_desc_addr;
+   __entry->set= set;
+   ),
+
+   TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, "
+ "gvec: 0x%x, pi_desc_addr: 0x%llx",
+ __entry->set ? "enabled and being updated" : "disabled",
+ __entry->vcpu_id,
+ __entry->gsi,
+ __entry->gvec,
+ __entry->pi_desc_addr)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 316f9bf..5a25651 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "trace.h"
 #include "pmu.h"
@@ -605,6 +606,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return &(to_vmx(vcpu)->pi_desc);
+}
+
 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
 #define FIELD(number, name)[number] = VMCS12_OFFSET(name)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
@@ -10344,6 +10350,81 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * vmx_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP) ||
+   (!kvm_arch_has_assigned_device(kvm)))
+   return 0;
+
+   idx = srcu_read_lock(>irq_srcu);
+   irq_rt = srcu_dereference(kvm->irq_routing, >irq_srcu);
+   BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+   hlist_for_each_entry(e, _rt->map[guest_irq], link) {
+   if (e->type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a vCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*
+* For lowest-priority interrupts, we only support
+* those with single CPU as the destination, e.g. user
+* configures the interrupts via 

[PATCH v8 11/13] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-16 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu 
---
v8:
- Rename 'pi_pre_block' to 'pre_block'
- Rename 'pi_post_block' to 'post_block'
- Change some comments
- Only add the vCPU to the blocking list when the VM has assigned devices.

 arch/x86/include/asm/kvm_host.h |  13 
 arch/x86/kvm/vmx.c  | 157 +++-
 arch/x86/kvm/x86.c  |  53 +++---
 include/linux/kvm_host.h|   3 +
 virt/kvm/kvm_main.c |   3 +
 5 files changed, 217 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0ddd353..304fbb5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -552,6 +552,8 @@ struct kvm_vcpu_arch {
 */
bool write_fault_to_shadow_pgtable;
 
+   bool halted;
+
/* set at EPT violation at this point */
unsigned long exit_qualification;
 
@@ -864,6 +866,17 @@ struct kvm_x86_ops {
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 
+   /*
+* Architecture specific hooks for vCPU blocking due to
+* HLT instruction.
+* Returns for .pre_block():
+*- 0 means continue to block the vCPU.
+*- 1 means we cannot block the vCPU since some event
+*happens during this period, such as, 'ON' bit in
+*posted-interrupts descriptor is set.
+*/
+   int (*pre_block)(struct kvm_vcpu *vcpu);
+   void (*post_block)(struct kvm_vcpu *vcpu);
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
  uint32_t guest_irq, bool set);
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5ceb280..9888c43 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -879,6 +879,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -1959,10 +1966,10 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int 
cpu)
/*
 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
 * are two possible cases:
-* 1. After running 'pi_pre_block', context switch
+* 1. After running 'pre_block', context switch
 *happened. For this case, 'sn' was set in
 *vmx_vcpu_put(), so we need to clear it here.
-* 2. After running 'pi_pre_block', we were blocked,
+* 2. After running 'pre_block', we were blocked,
 *and woken up by some other guy. For this case,
 *we don't need to do anything, 'pi_post_block'
 *will do everything for us. However, we cannot
@@ -2985,6 +2992,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -6105,6 +6114,25 @@ static void update_ple_window_actual_max(void)
ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, _cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   kvm_vcpu_kick(vcpu);
+   }
+   spin_unlock(_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static __init int hardware_setup(void)
 {
int r = -ENOMEM, i, msr;
@@ -6289,6 +6317,8 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
 
+   kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+
return alloc_kvm_area();
 
 out8:
@@ -10414,6 +10444,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
 }
 
 /*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is 

[PATCH v8 10/13] KVM: Update Posted-Interrupts Descriptor when vCPU is preempted

2015-09-16 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is preempted.

sched out:
- Set 'SN' to suppress furture non-urgent interrupts posted for
the vCPU.

sched in:
- Clear 'SN'
- Change NDST if vCPU is scheduled to a different CPU
- Set 'NV' to POSTED_INTR_VECTOR

Signed-off-by: Feng Wu 
---
v8:
- Add two wrapper fucntion vmx_vcpu_pi_load() and vmx_vcpu_pi_put().
- Only handle VT-d PI related logic when the VM has assigned devices.

 arch/x86/kvm/vmx.c | 63 ++
 1 file changed, 63 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5a25651..5ceb280 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1943,6 +1943,52 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
preempt_enable();
 }
 
+static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old, new;
+   unsigned int dest;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP) ||
+   (!kvm_arch_has_assigned_device(vcpu->kvm)))
+   return;
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   /*
+* If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
+* are two possible cases:
+* 1. After running 'pi_pre_block', context switch
+*happened. For this case, 'sn' was set in
+*vmx_vcpu_put(), so we need to clear it here.
+* 2. After running 'pi_pre_block', we were blocked,
+*and woken up by some other guy. For this case,
+*we don't need to do anything, 'pi_post_block'
+*will do everything for us. However, we cannot
+*check whether it is case #1 or case #2 here
+*(maybe, not needed), so we also clear sn here,
+*I think it is not a big deal.
+*/
+   if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
+   if (vcpu->cpu != cpu) {
+   dest = cpu_physical_id(cpu);
+
+   if (x2apic_enabled())
+   new.ndst = dest;
+   else
+   new.ndst = (dest << 8) & 0xFF00;
+   }
+
+   /* set 'NV' to 'notification vector' */
+   new.nv = POSTED_INTR_VECTOR;
+   }
+
+   /* Allow posting non-urgent interrupts */
+   new.sn = 0;
+   } while (cmpxchg(_desc->control, old.control,
+   new.control) != old.control);
+}
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -1993,10 +2039,27 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
+
+   vmx_vcpu_pi_load(vcpu, cpu);
+}
+
+static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP) ||
+   (!kvm_arch_has_assigned_device(vcpu->kvm)))
+   return;
+
+   /* Set SN when the vCPU is preempted */
+   if (vcpu->preempted)
+   pi_set_sn(pi_desc);
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   vmx_vcpu_pi_put(vcpu);
+
__vmx_load_host_state(to_vmx(vcpu));
if (!vmm_exclusive) {
__loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
-- 
2.1.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 12/13] KVM: Warn if 'SN' is set during posting interrupts by software

2015-09-16 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot post
interrupts when 'SN' is set.

If the vcpu is in guest mode, it cannot have been scheduled out,
and that's the only case when SN is set currently, warning if
SN is set.

Signed-off-by: Feng Wu 
Reviewed-by: Paolo Bonzini 
---
 arch/x86/kvm/vmx.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9888c43..58fbbc6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4498,6 +4498,22 @@ static inline bool 
kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
if (vcpu->mode == IN_GUEST_MODE) {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   /*
+* Currently, we don't support urgent interrupt,
+* all interrupts are recognized as non-urgent
+* interrupt, so we cannot post interrupts when
+* 'SN' is set.
+*
+* If the vcpu is in guest mode, it means it is
+* running instead of being scheduled out and
+* waiting in the run queue, and that's the only
+* case when 'SN' is set currently, warning if
+* 'SN' is set.
+*/
+   WARN_ON_ONCE(pi_test_sn(>pi_desc));
+
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
return true;
-- 
2.1.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-16 Thread Feng Wu
This patch defines a new interface kvm_intr_is_single_vcpu(),
which can returns whether the interrupt is for single-CPU or not.

It is used by VT-d PI, since now we only support single-CPU
interrupts, For lowest-priority interrupts, if user configures
it via /proc/irq or uses irqbalance to make it single-CPU, we
can use PI to deliver the interrupts to it. Full functionality
of lowest-priority support will be added later.

Signed-off-by: Feng Wu 
---
v8:
- Some optimizations in kvm_intr_is_single_vcpu().
- Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
- Add kvm_intr_is_single_vcpu_fast() as the fast path to find
  the target vCPU for the single-destination interrupt

 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/irq_comm.c | 94 +
 arch/x86/kvm/lapic.c|  5 +--
 arch/x86/kvm/lapic.h|  2 +
 4 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec903..af11bca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 int x86_set_memory_region(struct kvm *kvm,
  const struct kvm_userspace_memory_region *mem);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..97ba1d6 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -297,6 +297,100 @@ out:
return r;
 }
 
+static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
+struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   struct kvm_apic_map *map;
+   bool ret = false;
+   struct kvm_lapic *dst = NULL;
+
+   if (irq->shorthand)
+   return false;
+
+   rcu_read_lock();
+   map = rcu_dereference(kvm->arch.apic_map);
+
+   if (!map)
+   goto out;
+
+   if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+   if (irq->dest_id == 0xFF)
+   goto out;
+
+   if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
+   WARN_ON_ONCE(1);
+   goto out;
+   }
+
+   dst = map->phys_map[irq->dest_id];
+   if (dst && kvm_apic_present(dst->vcpu))
+   *dest_vcpu = dst->vcpu;
+   else
+   goto out;
+   } else {
+   u16 cid;
+   unsigned long bitmap = 1;
+   int i, r = 0;
+
+   if (!kvm_apic_logical_map_valid(map)) {
+   WARN_ON_ONCE(1);
+   goto out;
+   }
+
+   apic_logical_id(map, irq->dest_id, , (u16 *));
+
+   if (cid >= ARRAY_SIZE(map->logical_map)) {
+   WARN_ON_ONCE(1);
+   goto out;
+   }
+
+   for_each_set_bit(i, , 16) {
+   dst = map->logical_map[cid][i];
+   if (++r == 2)
+   goto out;
+   }
+
+   if (dst && kvm_apic_present(dst->vcpu))
+   *dest_vcpu = dst->vcpu;
+   else
+   goto out;
+   }
+
+   ret = true;
+out:
+   rcu_read_unlock();
+   return ret;
+}
+
+
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   int i, r = 0;
+   struct kvm_vcpu *vcpu;
+
+   if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
+   return true;
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+   irq->dest_id, irq->dest_mode))
+   continue;
+
+   if (++r == 2)
+   return false;
+
+   *dest_vcpu = vcpu;
+   }
+
+   return r == 1;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97..9848cd50 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -136,13 +136,12 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
 /* The logical map is definitely wrong if we have multiple
  * modes at the same time.  (Physical map is always right.)
  */
-static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
+bool 

Re: [PATCH v8 03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-16 Thread Paolo Bonzini


On 16/09/2015 10:49, Feng Wu wrote:
> This patch defines a new interface kvm_intr_is_single_vcpu(),
> which can returns whether the interrupt is for single-CPU or not.
> 
> It is used by VT-d PI, since now we only support single-CPU
> interrupts, For lowest-priority interrupts, if user configures
> it via /proc/irq or uses irqbalance to make it single-CPU, we
> can use PI to deliver the interrupts to it. Full functionality
> of lowest-priority support will be added later.
> 
> Signed-off-by: Feng Wu 
> ---
> v8:
> - Some optimizations in kvm_intr_is_single_vcpu().
> - Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
> - Add kvm_intr_is_single_vcpu_fast() as the fast path to find
>   the target vCPU for the single-destination interrupt
> 
>  arch/x86/include/asm/kvm_host.h |  3 ++
>  arch/x86/kvm/irq_comm.c | 94 
> +
>  arch/x86/kvm/lapic.c|  5 +--
>  arch/x86/kvm/lapic.h|  2 +
>  4 files changed, 101 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 49ec903..af11bca 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
>  int x86_set_memory_region(struct kvm *kvm,
> const struct kvm_userspace_memory_region *mem);
>  
> +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> +  struct kvm_vcpu **dest_vcpu);
> +
>  #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 9efff9e..97ba1d6 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -297,6 +297,100 @@ out:
>   return r;
>  }
>  
> +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> +  struct kvm_lapic_irq *irq,
> +  struct kvm_vcpu **dest_vcpu)

Please put this in lapic.c, similar to kvm_irq_delivery_to_apic_fast, so
that you do not have to export other functions.

> +{
> + struct kvm_apic_map *map;
> + bool ret = false;
> + struct kvm_lapic *dst = NULL;
> +
> + if (irq->shorthand)
> + return false;
> +
> + rcu_read_lock();
> + map = rcu_dereference(kvm->arch.apic_map);
> +
> + if (!map)
> + goto out;
> +
> + if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> + if (irq->dest_id == 0xFF)
> + goto out;
> +
> + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {

Warning here is wrong, the guest can trigger it.

> + WARN_ON_ONCE(1);
> + goto out;
> + }
> +
> + dst = map->phys_map[irq->dest_id];
> + if (dst && kvm_apic_present(dst->vcpu))
> + *dest_vcpu = dst->vcpu;
> + else
> + goto out;
> + } else {
> + u16 cid;
> + unsigned long bitmap = 1;
> + int i, r = 0;
> +
> + if (!kvm_apic_logical_map_valid(map)) {
> + WARN_ON_ONCE(1);

Same here.

> + goto out;
> + }
> +
> + apic_logical_id(map, irq->dest_id, , (u16 *));
> +
> + if (cid >= ARRAY_SIZE(map->logical_map)) {
> + WARN_ON_ONCE(1);

Same here.

Otherwise looks good.

Paolo

> + goto out;
> + }
> +
> + for_each_set_bit(i, , 16) {
> + dst = map->logical_map[cid][i];
> + if (++r == 2)
> + goto out;
> + }
> +
> + if (dst && kvm_apic_present(dst->vcpu))
> + *dest_vcpu = dst->vcpu;
> + else
> + goto out;
> + }
> +
> + ret = true;
> +out:
> + rcu_read_unlock();
> + return ret;
> +}
> +
> +
> +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> +  struct kvm_vcpu **dest_vcpu)
> +{
> + int i, r = 0;
> + struct kvm_vcpu *vcpu;
> +
> + if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
> + return true;
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!kvm_apic_present(vcpu))
> + continue;
> +
> + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> + irq->dest_id, irq->dest_mode))
> + continue;
> +
> + if (++r == 2)
> + return false;
> +
> + *dest_vcpu = vcpu;
> + }
> +
> + return r == 1;
> +}
> +EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
> +
>  #define IOAPIC_ROUTING_ENTRY(irq) \
>   { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
> .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
> diff --git 

Re: [PATCH V3 2/2] debugfs: don't assume sizeof(bool) to be 4 bytes

2015-09-16 Thread Ingo Molnar

* Steven Rostedt  wrote:

> But please, next time, go easy on the Cc list. Maybe just use bcc for those 
> not 
> on the list, stating that you BCC'd a lot of people to make sure this is 
> sane, 
> but didn't want to spam everyone with every reply.

Not just that, such a long Cc: list is a semi-guarantee that various list 
engines 
(vger included I think) would drop the mail as spam and nobody else would get 
the 
mail...

Thanks,

Ingo
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 09/13] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-09-16 Thread Paolo Bonzini


On 16/09/2015 10:50, Feng Wu wrote:
> +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
> +uint32_t guest_irq, bool set)
> +{
> + return !kvm_x86_ops->update_pi_irte ? -EINVAL :
> + kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
> +}
> +

Just use "if" here.  No need to resend if this is the only comment.

> 
>  }
> +int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
> + struct kvm *kvm, unsigned

Empty line after "}".

Paolo
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V3 2/2] debugfs: don't assume sizeof(bool) to be 4 bytes

2015-09-16 Thread Borislav Petkov
On Tue, Sep 15, 2015 at 01:47:32PM -0400, Steven Rostedt wrote:
> What do others think when there's a change that goes across the board
> this much? BCC OK with you, as just an FYI, I'm doing this? Or should
> just the lists be enough and if you don't see it, too bad?

Bcc sounds good to me.

(Let's see how many bounces I get when I reply to such a huge CC list.
:-))

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: About amd-iommu support for kdump kernel

2015-09-16 Thread Baoquan He
On 09/15/15 at 08:06pm, Baoquan He wrote:
> Hi Joerg,
> 
> Recently I am free and can try to work out the amd-iommu support for
> kdump kernel. Now I have some plans and draft them into codes and debugging.
> And also there are prlblems. I brief them here, could you please have a
> look and give some suggestions?

Well, this mail looks messy. I will split them into several patches for
better understand and with patch log. Now I am trying to debug with
adding device_flush_xxx, still don't know why timer interrupt is
impacted and cause reboot.

> 
> Two parts:
> 
> 1) IO page mapping
>  .> Checking if it's in kdump kernel and previously enabled
>  .> If yes do below operatons:
>   .> Do not disable amd iommu
>   .> Copy dev table form old kernel and set the old domain id in 
> amd_iommu_pd_alloc_bitmap
>   .> Don't call update_domain() to update device table until the first 
> __map_single() is called by device driver init
> 
> 2)interrupt remapping
>  .> I didn't think of this well. Now I only copy the old irq table when it 
> first calls get_irq_table(). 
> 
> 
> Attach the patches here, the first 2 patches are clean up patch,
> attach them too for better code understanding.
> 
> The problem happened in check_timer(). Seems timer interrupt doesn't
> work well after modify_irte(). I don't know why it happened. Though I
> have copied the old irte tables.
> 
> [   12.296525] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
> [   12.302513] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.2.0+ #18
> [   12.308500] Hardware name: AMD Dinar/Dinar, BIOS RDN1505B 06/05/2013
> [   12.314832]   85c693e9 880030d6fd58
> 8139746f
> [   12.322239]  00a0 880030d6fd90 814b4813
> 880030d283c0
> [   12.329645]  880030d2e100 0002 
> 880030c29808
> [   12.337052] Call Trace:
> [   12.339493]  [] dump_stack+0x44/0x55
> [   12.344616]  [] modify_irte+0x23/0xc0
> [   12.349827]  [] irq_remapping_deactivate+0x1c/0x20
> [   12.356162]  [] irq_remapping_activate+0xe/0x10
> [   12.362238]  [] irq_domain_activate_irq+0x41/0x50
> [   12.368486]  [] irq_domain_activate_irq+0x2b/0x50
> [   12.374736]  [] setup_IO_APIC+0x33e/0x7e4
> [   12.380294]  [] ? clear_IO_APIC+0x39/0x60
> [   12.385853]  [] apic_bsp_setup+0xa1/0xac
> [   12.391323]  [] native_smp_prepare_cpus+0x25f/0x2db
> [   12.397747]  [] kernel_init_freeable+0xc9/0x228
> [   12.403824]  [] ? rest_init+0x80/0x80
> [   12.409034]  [] kernel_init+0xe/0xe0
> [   12.414158]  [] ret_from_fork+0x3f/0x70
> [   12.419541]  [] ? rest_init+0x80/0x80
> [   12.424751]   modify_irte   devid: 00:14.0 index: 2, vector:48
> [   12.440491] Kernel panic - not syncing: timer doesn't work through
> Interrupt-remapped IO-APIC
> [   12.449022] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.2.0+ #18
> [   12.455008] Hardware name: AMD Dinar/Dinar, BIOS RDN1505B 06/05/2013
> [   12.461340]   85c693e9 880030d6fd58
> 8139746f
> [   12.468753]  81a3cdf8 880030d6fde0 8119e921
> 0008
> [   12.476165]  880030d6fdf0 880030d6fd88 85c693e9
> 813a41b5
> [   12.483577] Call Trace:
> [   12.486018]  [] dump_stack+0x44/0x55
> [   12.491142]  [] panic+0xd3/0x20b
> [   12.495919]  [] ? delay_tsc+0x25/0x60
> [   12.501129]  [] panic_if_irq_remap+0x1a/0x20
> [   12.506947]  [] setup_IO_APIC+0x375/0x7e4
> [   12.512503]  [] ? clear_IO_APIC+0x39/0x60
> [   12.518060]  [] apic_bsp_setup+0xa1/0xac
> [   12.523530]  [] native_smp_prepare_cpus+0x25f/0x2db
> [   12.529952]  [] kernel_init_freeable+0xc9/0x228
> [   12.536030]  [] ? rest_init+0x80/0x80
> [   12.541238]  [] kernel_init+0xe/0xe0
> [   12.546361]  [] ret_from_fork+0x3f/0x70
> [   12.551745]  [] ? rest_init+0x80/0x80
> [   12.556957] Rebooting in 10 seconds..

> From 09943d6354ee1626426f6ff060d92173bb164279 Mon Sep 17 00:00:00 2001
> From: Baoquan He 
> Date: Thu, 25 Jun 2015 16:46:16 +0800
> Subject: [PATCH 1/3] iommu/amd: Fix a code bug of bitmap operation
> 
> Signed-off-by: Baoquan He 
> ---
>  drivers/iommu/amd_iommu.c  | 2 +-
>  drivers/iommu/amd_iommu_init.c | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 45b7581..552730b 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -1901,7 +1901,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
>* mark the first page as allocated so we never return 0 as
>* a valid dma-address. So we can use 0 as error value
>*/
> - dma_dom->aperture[0]->bitmap[0] = 1;
> + __set_bit(0, dma_dom->aperture[0]->bitmap);
>   dma_dom->next_address = 0;
>  
>  
> diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
> index f954ae8..0fe7eb4 100644
> --- a/drivers/iommu/amd_iommu_init.c
> +++ 

[PATCH v8 04/13] KVM: Make struct kvm_irq_routing_table accessible

2015-09-16 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu 
Reviewed-by: Paolo Bonzini 
---
 include/linux/kvm_host.h | 14 ++
 virt/kvm/irqchip.c   | 10 --
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5ac8d21..5f183fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -328,6 +328,20 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c1424..2cf45d3 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
 #include 
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
2.1.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 12/13] KVM: Warn if 'SN' is set during posting interrupts by software

2015-09-16 Thread Paolo Bonzini


On 16/09/2015 10:50, Feng Wu wrote:
> Currently, we don't support urgent interrupt, all interrupts
> are recognized as non-urgent interrupt, so we cannot post
> interrupts when 'SN' is set.
> 
> If the vcpu is in guest mode, it cannot have been scheduled out,
> and that's the only case when SN is set currently, warning if
> SN is set.
> 
> Signed-off-by: Feng Wu 
> Reviewed-by: Paolo Bonzini 

Please fold this into patch 10.

Paolo

> ---
>  arch/x86/kvm/vmx.c | 16 
>  1 file changed, 16 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 9888c43..58fbbc6 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4498,6 +4498,22 @@ static inline bool 
> kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
>  {
>  #ifdef CONFIG_SMP
>   if (vcpu->mode == IN_GUEST_MODE) {
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> + /*
> +  * Currently, we don't support urgent interrupt,
> +  * all interrupts are recognized as non-urgent
> +  * interrupt, so we cannot post interrupts when
> +  * 'SN' is set.
> +  *
> +  * If the vcpu is in guest mode, it means it is
> +  * running instead of being scheduled out and
> +  * waiting in the run queue, and that's the only
> +  * case when 'SN' is set currently, warning if
> +  * 'SN' is set.
> +  */
> + WARN_ON_ONCE(pi_test_sn(>pi_desc));
> +
>   apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
>   POSTED_INTR_VECTOR);
>   return true;
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 11/13] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-16 Thread Paolo Bonzini


On 16/09/2015 10:50, Feng Wu wrote:
>* are two possible cases:
> -  * 1. After running 'pi_pre_block', context switch
> +  * 1. After running 'pre_block', context switch

Please fold this in the previous patch.

>*happened. For this case, 'sn' was set in
>*vmx_vcpu_put(), so we need to clear it here.
> -  * 2. After running 'pi_pre_block', we were blocked,
> +  * 2. After running 'pre_block', we were blocked,
>*and woken up by some other guy. For this case,

(Same).

> + spin_lock(_cpu(blocked_vcpu_on_cpu_lock, cpu));
> + list_for_each_entry(vcpu, _cpu(blocked_vcpu_on_cpu, cpu),
> + blocked_vcpu_list) {
> + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> +
> + if (pi_test_on(pi_desc) == 1)
> + kvm_vcpu_kick(vcpu);
> + }
> + spin_unlock(_cpu(blocked_vcpu_on_cpu_lock, cpu));
> +}

Please document the lock in Documentation/virtual/kvm/locking.txt.

Paolo
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/6] iommu: add ARM short descriptor page table allocator.

2015-09-16 Thread Will Deacon
Hello Yong,

On Mon, Sep 14, 2015 at 01:25:00PM +0100, Yong Wu wrote:
> On Tue, 2015-07-21 at 18:11 +0100, Will Deacon wrote:
> > > +   ret = _arm_short_map(data, iova, paddr, pgdprot, pteprot, large);
> > > +
> > > +   tlb->tlb_add_flush(iova, size, true, data->iop.cookie);
> > > +   tlb->tlb_sync(data->iop.cookie);
> > 
> > In _arm_short_map, it looks like you can only go from invalid -> valid,
> > so why do you need to flush the TLB here?
> 
> Hi Will,
>Here is about flush-tlb after map iova, I have deleted it in v4
> following this suggestion. But We meet a problem about it.

Ok.

> Take a example with JPEG. the test steps is:
> a).JPEG HW decode a picture with the source iova,like 0xfd78.
> b).JPEG HW decode done, It will unmap the iova(write 0 in pagetable and
> flush tlb).
> c).JPEG HW decode the second picture, whose source iova is also
> 0xfd78.
>Then our HW maybe fail due to it will auto prefetch, It may prefecth
> between the step b) and c). then the HW may fetch the pagetable content
> which has been unmapped in step b). then the HW will get the iova's
> physical address is 0, It will translation fault!

Oh no! So-called "negative caching" is certainly prohibited by the ARM
architecture, but if you've built it then we can probably work around it
as an additional quirk. I assume the prefetcher stops prefetching when
it sees an invalid descriptor?

> So I think our HW need flush-tlb after map iova. Could we add a
> QUIRK like "IO_PGTABLE_QUIRK_AUTO_PREFETCH_ENABLE" for it?
> If it's not allowed, we will have to add this in our internal function
> mtk_iommu_map of mtk_iommu.c.

Actually, this type of quirk is ringing bells with me (I think another
IOMMU needed something similar in the past), so maybe just add
IO_PGTABLE_QUIRK_TLBI_ON_MAP?

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 3/6] iommu: add ARM short descriptor page table allocator.

2015-09-16 Thread Will Deacon
On Mon, Aug 03, 2015 at 11:21:16AM +0100, Yong Wu wrote:
> This patch is for ARM Short Descriptor Format.
> 
> Signed-off-by: Yong Wu 
> ---
>  drivers/iommu/Kconfig|  18 +
>  drivers/iommu/Makefile   |   1 +
>  drivers/iommu/io-pgtable-arm-short.c | 813 
> +++
>  drivers/iommu/io-pgtable-arm.c   |   3 -
>  drivers/iommu/io-pgtable.c   |   4 +
>  drivers/iommu/io-pgtable.h   |  14 +
>  6 files changed, 850 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/iommu/io-pgtable-arm-short.c
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index f1fb1d3..3abd066 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -39,6 +39,24 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST
> 
>   If unsure, say N here.
> 
> +config IOMMU_IO_PGTABLE_SHORT
> +   bool "ARMv7/v8 Short Descriptor Format"
> +   select IOMMU_IO_PGTABLE
> +   depends on ARM || ARM64 || COMPILE_TEST
> +   help
> + Enable support for the ARM Short-descriptor pagetable format.
> + This allocator supports 2 levels translation tables which supports

Some minor rewording here:

"...2 levels of translation tables, which enables a 32-bit memory map based
 on..."

> + a memory map based on memory sections or pages.
> +
> +config IOMMU_IO_PGTABLE_SHORT_SELFTEST
> +   bool "Short Descriptor selftests"
> +   depends on IOMMU_IO_PGTABLE_SHORT
> +   help
> + Enable self-tests for Short-descriptor page table allocator.
> + This performs a series of page-table consistency checks during boot.
> +
> + If unsure, say N here.
> +
>  endmenu
> 
>  config IOMMU_IOVA

[...]

> +#define ARM_SHORT_PGDIR_SHIFT  20
> +#define ARM_SHORT_PAGE_SHIFT   12
> +#define ARM_SHORT_PTRS_PER_PTE \
> +   (1 << (ARM_SHORT_PGDIR_SHIFT - ARM_SHORT_PAGE_SHIFT))
> +#define ARM_SHORT_BYTES_PER_PTE\
> +   (ARM_SHORT_PTRS_PER_PTE * sizeof(arm_short_iopte))
> +
> +/* level 1 pagetable */
> +#define ARM_SHORT_PGD_TYPE_PGTABLE BIT(0)
> +#define ARM_SHORT_PGD_TYPE_SECTION BIT(1)
> +#define ARM_SHORT_PGD_BBIT(2)
> +#define ARM_SHORT_PGD_CBIT(3)
> +#define ARM_SHORT_PGD_PGTABLE_NS   BIT(3)
> +#define ARM_SHORT_PGD_SECTION_XN   BIT(4)
> +#define ARM_SHORT_PGD_IMPLEBIT(9)
> +#define ARM_SHORT_PGD_RD_WR(3 << 10)
> +#define ARM_SHORT_PGD_RDONLY   BIT(15)
> +#define ARM_SHORT_PGD_SBIT(16)
> +#define ARM_SHORT_PGD_nG   BIT(17)
> +#define ARM_SHORT_PGD_SUPERSECTION BIT(18)
> +#define ARM_SHORT_PGD_SECTION_NS   BIT(19)
> +
> +#define ARM_SHORT_PGD_TYPE_SUPERSECTION\
> +   (ARM_SHORT_PGD_TYPE_SECTION | ARM_SHORT_PGD_SUPERSECTION)
> +#define ARM_SHORT_PGD_SECTION_TYPE_MSK \
> +   (ARM_SHORT_PGD_TYPE_SECTION | ARM_SHORT_PGD_SUPERSECTION)
> +#define ARM_SHORT_PGD_PGTABLE_TYPE_MSK \
> +   (ARM_SHORT_PGD_TYPE_SECTION | ARM_SHORT_PGD_TYPE_PGTABLE)
> +#define ARM_SHORT_PGD_TYPE_IS_PGTABLE(pgd) \
> +   (((pgd) & ARM_SHORT_PGD_PGTABLE_TYPE_MSK) == 
> ARM_SHORT_PGD_TYPE_PGTABLE)
> +#define ARM_SHORT_PGD_TYPE_IS_SECTION(pgd) \
> +   (((pgd) & ARM_SHORT_PGD_SECTION_TYPE_MSK) == 
> ARM_SHORT_PGD_TYPE_SECTION)
> +#define ARM_SHORT_PGD_TYPE_IS_SUPERSECTION(pgd)\
> +   (((pgd) & ARM_SHORT_PGD_SECTION_TYPE_MSK) == \
> +   ARM_SHORT_PGD_TYPE_SUPERSECTION)
> +#define ARM_SHORT_PGD_PGTABLE_MSK  0xfc00

You could use (~(ARM_SHORT_BYTES_PER_PTE - 1)), I think.

> +#define ARM_SHORT_PGD_SECTION_MSK  (~(SZ_1M - 1))
> +#define ARM_SHORT_PGD_SUPERSECTION_MSK (~(SZ_16M - 1))
> +
> +/* level 2 pagetable */
> +#define ARM_SHORT_PTE_TYPE_LARGE   BIT(0)
> +#define ARM_SHORT_PTE_SMALL_XN BIT(0)
> +#define ARM_SHORT_PTE_TYPE_SMALL   BIT(1)
> +#define ARM_SHORT_PTE_BBIT(2)
> +#define ARM_SHORT_PTE_CBIT(3)
> +#define ARM_SHORT_PTE_RD_WR(3 << 4)
> +#define ARM_SHORT_PTE_RDONLY   BIT(9)
> +#define ARM_SHORT_PTE_SBIT(10)
> +#define ARM_SHORT_PTE_nG   BIT(11)
> +#define ARM_SHORT_PTE_LARGE_XN BIT(15)
> +#define ARM_SHORT_PTE_LARGE_MSK(~(SZ_64K - 1))
> +#define ARM_SHORT_PTE_SMALL_MSK(~(SZ_4K - 1))
> +#define ARM_SHORT_PTE_TYPE_MSK \
> +   (ARM_SHORT_PTE_TYPE_LARGE | ARM_SHORT_PTE_TYPE_SMALL)
> +#define ARM_SHORT_PTE_TYPE_IS_SMALLPAGE(pte)   \
> +   (((pte) & ARM_SHORT_PTE_TYPE_SMALL) == ARM_SHORT_PTE_TYPE_SMALL)

Maybe a comment 

Re: [PATCH V3 2/2] debugfs: don't assume sizeof(bool) to be 4 bytes

2015-09-16 Thread Charles Keepax
On Tue, Sep 15, 2015 at 02:04:59PM +0530, Viresh Kumar wrote:
> Long back 'bool' type used to be a typecast to 'int', but that changed
> in v2.6.19. And that is a typecast to _Bool now, which (mostly) takes
> just a byte. Anyway, the bool type is implementation defined, and better
> we don't assume its size to be 4 bytes or 1.
> 
> The problem with current code is that it reads/writes 4 bytes for a
> boolean, which will read/update 3 excess bytes following the boolean
> variable (when sizeof(bool) is 1 byte). And that can lead to hard to fix
> bugs. It was a nightmare cracking this one.
> 
> The debugfs code had this bug since the first time it got introduced,
> but was never got caught, strange. Maybe the bool variables (monitored
> by debugfs) were followed by an 'int' or something bigger and the pad
> bytes made sure, we never see this issue.
> 
> But the OPP (Operating performance points) library have three booleans
> allocated to contiguous bytes and this bug got hit quite soon (The
> debugfs support for OPP is yet to be merged). It showed up as corruption
> of the debugfs boolean symbols, where Y were becoming N and vice versa.
> 
> Fix it properly by changing the last argument of debugfs_create_bool(),
> to type 'bool *' instead of 'u32 *', so that it doesn't depend on sizeof
> bool at all.
> 
> That required updates to all user sites as well in a single commit.
> regmap core was also using debugfs_{read|write}_file_bool(), directly
> and variable types were updated for that to be bool as well.
> 
> Acked-by: Mark Brown 
> Signed-off-by: Viresh Kumar 
> ---

For the minor wm_adsp change:

Acked-by: Charles Keepax 

Thanks,
Charles
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 52/53] PCI: Introduce resource_disabled()

2015-09-16 Thread Yinghai Lu
Current is using !flags, and we are going to use
IORESOURCE_DISABLED instead of clearing resource flags.

Let's convert all !flags to helper function resource_disabled().
resource_disabled will check !flags and IORESOURCE_DISABLED both.

Cc: linux-al...@vger.kernel.org
Cc: linux-i...@vger.kernel.org
Cc: linux-am33-l...@redhat.com
Cc: linuxppc-...@lists.ozlabs.org
Cc: linux-s...@vger.kernel.org
Cc: sparcli...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-xte...@linux-xtensa.org
Cc: iommu@lists.linux-foundation.org
Cc: linux...@vger.kernel.org
Signed-off-by: Yinghai Lu 
---
 arch/alpha/kernel/pci.c   |  2 +-
 arch/ia64/pci/pci.c   |  4 ++--
 arch/microblaze/pci/pci-common.c  | 15 ---
 arch/mn10300/unit-asb2305/pci-asb2305.c   |  4 ++--
 arch/mn10300/unit-asb2305/pci.c   |  4 ++--
 arch/powerpc/kernel/pci-common.c  | 16 +---
 arch/powerpc/platforms/powernv/pci-ioda.c | 12 ++--
 arch/s390/pci/pci.c   |  2 +-
 arch/sparc/kernel/pci.c   |  2 +-
 arch/x86/pci/i386.c   |  4 ++--
 arch/xtensa/kernel/pci.c  |  4 ++--
 drivers/iommu/intel-iommu.c   |  3 ++-
 drivers/pci/host/pcie-rcar.c  |  2 +-
 drivers/pci/iov.c |  2 +-
 drivers/pci/probe.c   |  2 +-
 drivers/pci/quirks.c  |  4 ++--
 drivers/pci/rom.c |  2 +-
 drivers/pci/setup-bus.c   |  8 
 drivers/pci/setup-res.c   |  2 +-
 include/linux/ioport.h|  4 
 20 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 5f387ee..c89c8ef 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -282,7 +282,7 @@ pcibios_claim_one_bus(struct pci_bus *b)
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
struct resource *r = >resource[i];
 
-   if (r->parent || !r->start || !r->flags)
+   if (r->parent || !r->start || resource_disabled(r))
continue;
if (pci_has_flag(PCI_PROBE_ONLY) ||
(r->flags & IORESOURCE_PCI_FIXED)) {
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7cc3be9..cc293ea 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -501,7 +501,7 @@ void pcibios_fixup_device_resources(struct pci_dev *dev)
for (idx = 0; idx < PCI_BRIDGE_RESOURCES; idx++) {
struct resource *r = >resource[idx];
 
-   if (!r->flags || r->parent || !r->start)
+   if (resource_disabled(r) || r->parent || !r->start)
continue;
 
pci_claim_resource(dev, idx);
@@ -519,7 +519,7 @@ static void pcibios_fixup_bridge_resources(struct pci_dev 
*dev)
for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
struct resource *r = >resource[idx];
 
-   if (!r->flags || r->parent || !r->start)
+   if (resource_disabled(r) || r->parent || !r->start)
continue;
 
pci_claim_bridge_resource(dev, idx);
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 09b1af6..c123d3c 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -705,7 +705,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
}
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
struct resource *res = dev->resource + i;
-   if (!res->flags)
+   if (resource_disabled(res))
continue;
if (res->start == 0) {
pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]",
@@ -806,7 +806,7 @@ static void pcibios_fixup_bridge(struct pci_bus *bus)
pci_bus_for_each_resource(bus, res, i) {
if (!res)
continue;
-   if (!res->flags)
+   if (resource_disabled(res))
continue;
if (i >= 3 && bus->self->transparent)
continue;
@@ -985,7 +985,7 @@ static void pcibios_allocate_bus_resources(struct pci_bus 
*bus)
 pci_domain_nr(bus), bus->number);
 
pci_bus_for_each_resource(bus, res, i) {
-   if (!res || !res->flags
+   if (!res || resource_disabled(res)
|| res->start > res->end || res->parent)
continue;
if (bus->parent == NULL)
@@ -1087,7 +1087,8 @@ static void __init pcibios_allocate_resources(int pass)
r = >resource[idx];
if (r->parent)  /* Already allocated */
continue;
-  

[PATCH 2/4] ARM: OMAP3: hwmod data: Remove legacy IOMMU data

2015-09-16 Thread Suman Anna
The legacy-mode device creation logic for IOMMU devices has
been cleaned up, so the device attribute data, irq information
and address data are no longer required. Remove all of these
data for the ISP & IVA IOMMU devices.

Signed-off-by: Suman Anna 
---
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 42 --
 1 file changed, 42 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c 
b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index dc55f8dedf2c..01dfccaa0c3e 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -25,7 +25,6 @@
 #include "l4_3xxx.h"
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -2976,80 +2975,40 @@ static struct omap_hwmod_class omap3xxx_mmu_hwmod_class 
= {
 };
 
 /* mmu isp */
-
-static struct omap_mmu_dev_attr mmu_isp_dev_attr = {
-   .nr_tlb_entries = 8,
-};
-
 static struct omap_hwmod omap3xxx_mmu_isp_hwmod;
-static struct omap_hwmod_irq_info omap3xxx_mmu_isp_irqs[] = {
-   { .irq = 24 + OMAP_INTC_START, },
-   { .irq = -1 }
-};
-
-static struct omap_hwmod_addr_space omap3xxx_mmu_isp_addrs[] = {
-   {
-   .pa_start   = 0x480bd400,
-   .pa_end = 0x480bd47f,
-   .flags  = ADDR_TYPE_RT,
-   },
-   { }
-};
 
 /* l4_core -> mmu isp */
 static struct omap_hwmod_ocp_if omap3xxx_l4_core__mmu_isp = {
.master = _l4_core_hwmod,
.slave  = _mmu_isp_hwmod,
-   .addr   = omap3xxx_mmu_isp_addrs,
.user   = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
 static struct omap_hwmod omap3xxx_mmu_isp_hwmod = {
.name   = "mmu_isp",
.class  = _mmu_hwmod_class,
-   .mpu_irqs   = omap3xxx_mmu_isp_irqs,
.main_clk   = "cam_ick",
-   .dev_attr   = _isp_dev_attr,
.flags  = HWMOD_NO_IDLEST,
 };
 
 /* mmu iva */
 
-static struct omap_mmu_dev_attr mmu_iva_dev_attr = {
-   .nr_tlb_entries = 32,
-};
-
 static struct omap_hwmod omap3xxx_mmu_iva_hwmod;
-static struct omap_hwmod_irq_info omap3xxx_mmu_iva_irqs[] = {
-   { .irq = 28 + OMAP_INTC_START, },
-   { .irq = -1 }
-};
 
 static struct omap_hwmod_rst_info omap3xxx_mmu_iva_resets[] = {
{ .name = "mmu", .rst_shift = 1, .st_shift = 9 },
 };
 
-static struct omap_hwmod_addr_space omap3xxx_mmu_iva_addrs[] = {
-   {
-   .pa_start   = 0x5d00,
-   .pa_end = 0x5d7f,
-   .flags  = ADDR_TYPE_RT,
-   },
-   { }
-};
-
 /* l3_main -> iva mmu */
 static struct omap_hwmod_ocp_if omap3xxx_l3_main__mmu_iva = {
.master = _l3_main_hwmod,
.slave  = _mmu_iva_hwmod,
-   .addr   = omap3xxx_mmu_iva_addrs,
.user   = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
 static struct omap_hwmod omap3xxx_mmu_iva_hwmod = {
.name   = "mmu_iva",
.class  = _mmu_hwmod_class,
-   .mpu_irqs   = omap3xxx_mmu_iva_irqs,
.clkdm_name = "iva2_clkdm",
.rst_lines  = omap3xxx_mmu_iva_resets,
.rst_lines_cnt  = ARRAY_SIZE(omap3xxx_mmu_iva_resets),
@@ -3062,7 +3021,6 @@ static struct omap_hwmod omap3xxx_mmu_iva_hwmod = {
.idlest_idle_bit = OMAP3430_ST_IVA2_SHIFT,
},
},
-   .dev_attr   = _iva_dev_attr,
.flags  = HWMOD_NO_IDLEST,
 };
 
-- 
2.5.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/4] ARM: OMAP4: hwmod data: Remove legacy IOMMU attr and addrs

2015-09-16 Thread Suman Anna
OMAP4 has been DT-boot only for some time, and the legacy-mode
device creation logic for IOMMU devices has also been cleaned up,
so the dev_attr and address data is no longer required. So, remove
these attribute data and hwmod addr space for the IPU & DSP IOMMU
devices.

Signed-off-by: Suman Anna 
---
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c | 31 --
 1 file changed, 31 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c 
b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 43eebf2c59e2..56586b5d6051 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -30,7 +30,6 @@
 
 #include 
 #include 
-#include 
 #include 
 
 #include "omap_hwmod.h"
@@ -2088,30 +2087,16 @@ static struct omap_hwmod_class omap44xx_mmu_hwmod_class 
= {
 
 /* mmu ipu */
 
-static struct omap_mmu_dev_attr mmu_ipu_dev_attr = {
-   .nr_tlb_entries = 32,
-};
-
 static struct omap_hwmod omap44xx_mmu_ipu_hwmod;
 static struct omap_hwmod_rst_info omap44xx_mmu_ipu_resets[] = {
{ .name = "mmu_cache", .rst_shift = 2 },
 };
 
-static struct omap_hwmod_addr_space omap44xx_mmu_ipu_addrs[] = {
-   {
-   .pa_start   = 0x55082000,
-   .pa_end = 0x550820ff,
-   .flags  = ADDR_TYPE_RT,
-   },
-   { }
-};
-
 /* l3_main_2 -> mmu_ipu */
 static struct omap_hwmod_ocp_if omap44xx_l3_main_2__mmu_ipu = {
.master = _l3_main_2_hwmod,
.slave  = _mmu_ipu_hwmod,
.clk= "l3_div_ck",
-   .addr   = omap44xx_mmu_ipu_addrs,
.user   = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
@@ -2130,35 +2115,20 @@ static struct omap_hwmod omap44xx_mmu_ipu_hwmod = {
.modulemode   = MODULEMODE_HWCTRL,
},
},
-   .dev_attr   = _ipu_dev_attr,
 };
 
 /* mmu dsp */
 
-static struct omap_mmu_dev_attr mmu_dsp_dev_attr = {
-   .nr_tlb_entries = 32,
-};
-
 static struct omap_hwmod omap44xx_mmu_dsp_hwmod;
 static struct omap_hwmod_rst_info omap44xx_mmu_dsp_resets[] = {
{ .name = "mmu_cache", .rst_shift = 1 },
 };
 
-static struct omap_hwmod_addr_space omap44xx_mmu_dsp_addrs[] = {
-   {
-   .pa_start   = 0x4a066000,
-   .pa_end = 0x4a0660ff,
-   .flags  = ADDR_TYPE_RT,
-   },
-   { }
-};
-
 /* l4_cfg -> dsp */
 static struct omap_hwmod_ocp_if omap44xx_l4_cfg__mmu_dsp = {
.master = _l4_cfg_hwmod,
.slave  = _mmu_dsp_hwmod,
.clk= "l4_div_ck",
-   .addr   = omap44xx_mmu_dsp_addrs,
.user   = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
@@ -2177,7 +2147,6 @@ static struct omap_hwmod omap44xx_mmu_dsp_hwmod = {
.modulemode   = MODULEMODE_HWCTRL,
},
},
-   .dev_attr   = _dsp_dev_attr,
 };
 
 /*
-- 
2.5.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/4] ARM: OMAP2+: Remove legacy device instantiation of IOMMUs

2015-09-16 Thread Suman Anna
The legacy-style IOMMU device creation is maintained currently only
for OMAP3 SoC, as all other SoCs are DT-boot only, and also to ensure
functionality of the OMAP3 ISP driver, the only in-kernel client user
on OMAP3 that supported both modes.

Commit 78c66fbcec71 ("[media] v4l: omap3isp: Drop platform data support")
removed the legacy device support from the OMAP3 ISP driver, so the
legacy device instantiation of OMAP IOMMU devices is no longer
needed, and is cleaned up.

Signed-off-by: Suman Anna 
---
 arch/arm/mach-omap2/omap-iommu.c | 66 
 1 file changed, 66 deletions(-)
 delete mode 100644 arch/arm/mach-omap2/omap-iommu.c

diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
deleted file mode 100644
index 8867eb4025bf..
--- a/arch/arm/mach-omap2/omap-iommu.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * omap iommu: omap device registration
- *
- * Copyright (C) 2008-2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include "soc.h"
-#include "omap_hwmod.h"
-#include "omap_device.h"
-
-static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused)
-{
-   struct platform_device *pdev;
-   struct iommu_platform_data *pdata;
-   struct omap_mmu_dev_attr *a = (struct omap_mmu_dev_attr *)oh->dev_attr;
-   static int i;
-
-   pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
-   if (!pdata)
-   return -ENOMEM;
-
-   pdata->name = oh->name;
-   pdata->nr_tlb_entries = a->nr_tlb_entries;
-
-   if (oh->rst_lines_cnt == 1) {
-   pdata->reset_name = oh->rst_lines->name;
-   pdata->assert_reset = omap_device_assert_hardreset;
-   pdata->deassert_reset = omap_device_deassert_hardreset;
-   }
-
-   pdev = omap_device_build("omap-iommu", i, oh, pdata, sizeof(*pdata));
-
-   kfree(pdata);
-
-   if (IS_ERR(pdev)) {
-   pr_err("%s: device build err: %ld\n", __func__, PTR_ERR(pdev));
-   return PTR_ERR(pdev);
-   }
-
-   i++;
-
-   return 0;
-}
-
-static int __init omap_iommu_init(void)
-{
-   /* If dtb is there, the devices will be created dynamically */
-   if (of_have_populated_dt())
-   return -ENODEV;
-
-   return omap_hwmod_for_each_by_class("mmu", omap_iommu_dev_init, NULL);
-}
-omap_subsys_initcall(omap_iommu_init);
-/* must be ready before omap3isp is probed */
-- 
2.5.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/4] ARM: OMAP2+: Remove omap_mmu_dev_attr structure

2015-09-16 Thread Suman Anna
The structure omap_mmu_dev_attr was used in the hwmod data for
supplying device-specific data through the .dev_attr field and
used in constructing the platform data for legacy device creation.
The legacy device creation of OMAP IOMMU devices has been cleaned
up, and this structure is no longer needed, so remove it.

Signed-off-by: Suman Anna 
---
 include/linux/platform_data/iommu-omap.h | 9 -
 1 file changed, 9 deletions(-)

diff --git a/include/linux/platform_data/iommu-omap.h 
b/include/linux/platform_data/iommu-omap.h
index 54a0a9582fad..0496d171700a 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -29,15 +29,6 @@ struct omap_iommu_arch_data {
struct omap_iommu *iommu_dev;
 };
 
-/**
- * struct omap_mmu_dev_attr - OMAP mmu device attributes for omap_hwmod
- * @nr_tlb_entries:number of entries supported by the translation
- * look-aside buffer (TLB).
- */
-struct omap_mmu_dev_attr {
-   int nr_tlb_entries;
-};
-
 struct iommu_platform_data {
const char *name;
const char *reset_name;
-- 
2.5.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/4] Cleanup legacy OMAP IOMMU device creation

2015-09-16 Thread Suman Anna
Hi Tony,

The following series removes the legacy platform device creation
logic for OMAP IOMMU devices. I will cleanup the legacy support
from the OMAP IOMMU driver in a subsequent merge window after
this series makes it to mainline.

Patches are based on 4.3-rc1 + the OMAP3 ISP instantiation cleanup
patch [1]. All the patches need to be picked up sequentially,
otherwise a NULL pointer dereference crash might be seen on OMAP3
legacy boots as the dev attribute structure is deferenced directly
in mach-omap2/omap-iommu.c during platform data creation. Also, the
last patch removes the structure definition altogether, so will
cause build issues if picked separately from the hwmod cleanup
patches.

I do not have any boards where I can still perform a legacy-style
boot, so patches verified using DT-boot only.

regards
Suman

[1] https://patchwork.kernel.org/patch/6806891/

Suman Anna (4):
  ARM: OMAP2+: Remove legacy device instantiation of IOMMUs
  ARM: OMAP3: hwmod data: Remove legacy IOMMU data
  ARM: OMAP4: hwmod data: Remove legacy IOMMU attr and addrs
  ARM: OMAP2+: Remove omap_mmu_dev_attr structure

 arch/arm/mach-omap2/omap-iommu.c   | 66 --
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 42 ---
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c | 31 --
 include/linux/platform_data/iommu-omap.h   |  9 
 4 files changed, 148 deletions(-)
 delete mode 100644 arch/arm/mach-omap2/omap-iommu.c

-- 
2.5.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v8 09/13] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-09-16 Thread Wu, Feng


> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Wednesday, September 16, 2015 5:27 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; k...@vger.kernel.org;
> iommu@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v8 09/13] KVM: Add an arch specific hooks in 'struct
> kvm_kernel_irqfd'
> 
> 
> 
> On 16/09/2015 10:50, Feng Wu wrote:
> > +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
> > +  uint32_t guest_irq, bool set)
> > +{
> > +   return !kvm_x86_ops->update_pi_irte ? -EINVAL :
> > +   kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
> > +}
> > +
> 
> Just use "if" here.  No need to resend if this is the only comment.

I am sorry, I don't quite understand. Do you mean I don't need to include
this patch in v9? If so, what about other patches with your Reviewed-by?

Thanks,
Feng

> 
> >
> >  }
> > +int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
> > +   struct kvm *kvm, unsigned
> 
> Empty line after "}".
> 
> Paolo
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v8 03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-16 Thread Wu, Feng


> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Wednesday, September 16, 2015 5:23 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; k...@vger.kernel.org;
> iommu@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v8 03/13] KVM: Define a new interface
> kvm_intr_is_single_vcpu()
> 
> 
> 
> On 16/09/2015 10:49, Feng Wu wrote:
> > This patch defines a new interface kvm_intr_is_single_vcpu(),
> > which can returns whether the interrupt is for single-CPU or not.
> >
> > It is used by VT-d PI, since now we only support single-CPU
> > interrupts, For lowest-priority interrupts, if user configures
> > it via /proc/irq or uses irqbalance to make it single-CPU, we
> > can use PI to deliver the interrupts to it. Full functionality
> > of lowest-priority support will be added later.
> >
> > Signed-off-by: Feng Wu 
> > ---
> > v8:
> > - Some optimizations in kvm_intr_is_single_vcpu().
> > - Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
> > - Add kvm_intr_is_single_vcpu_fast() as the fast path to find
> >   the target vCPU for the single-destination interrupt
> >
> >  arch/x86/include/asm/kvm_host.h |  3 ++
> >  arch/x86/kvm/irq_comm.c | 94
> +
> >  arch/x86/kvm/lapic.c|  5 +--
> >  arch/x86/kvm/lapic.h|  2 +
> >  4 files changed, 101 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> > index 49ec903..af11bca 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
> >  int x86_set_memory_region(struct kvm *kvm,
> >   const struct kvm_userspace_memory_region *mem);
> >
> > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> > +struct kvm_vcpu **dest_vcpu);
> > +
> >  #endif /* _ASM_X86_KVM_HOST_H */
> > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> > index 9efff9e..97ba1d6 100644
> > --- a/arch/x86/kvm/irq_comm.c
> > +++ b/arch/x86/kvm/irq_comm.c
> > @@ -297,6 +297,100 @@ out:
> > return r;
> >  }
> >
> > +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> > +struct kvm_lapic_irq *irq,
> > +struct kvm_vcpu **dest_vcpu)
> 
> Please put this in lapic.c, similar to kvm_irq_delivery_to_apic_fast, so
> that you do not have to export other functions.
> 
> > +{
> > +   struct kvm_apic_map *map;
> > +   bool ret = false;
> > +   struct kvm_lapic *dst = NULL;
> > +
> > +   if (irq->shorthand)
> > +   return false;
> > +
> > +   rcu_read_lock();
> > +   map = rcu_dereference(kvm->arch.apic_map);
> > +
> > +   if (!map)
> > +   goto out;
> > +
> > +   if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> > +   if (irq->dest_id == 0xFF)
> > +   goto out;
> > +
> > +   if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> 
> Warning here is wrong, the guest can trigger it.

Could you please share more information about how the guest
triggers these conditions (including the following two), Thanks
a lot!

Thanks,
Feng

> 
> > +   WARN_ON_ONCE(1);
> > +   goto out;
> > +   }
> > +
> > +   dst = map->phys_map[irq->dest_id];
> > +   if (dst && kvm_apic_present(dst->vcpu))
> > +   *dest_vcpu = dst->vcpu;
> > +   else
> > +   goto out;
> > +   } else {
> > +   u16 cid;
> > +   unsigned long bitmap = 1;
> > +   int i, r = 0;
> > +
> > +   if (!kvm_apic_logical_map_valid(map)) {
> > +   WARN_ON_ONCE(1);
> 
> Same here.
> 
> > +   goto out;
> > +   }
> > +
> > +   apic_logical_id(map, irq->dest_id, , (u16 *));
> > +
> > +   if (cid >= ARRAY_SIZE(map->logical_map)) {
> > +   WARN_ON_ONCE(1);
> 
> Same here.
> 
> Otherwise looks good.
> 
> Paolo
> 
> > +   goto out;
> > +   }
> > +
> > +   for_each_set_bit(i, , 16) {
> > +   dst = map->logical_map[cid][i];
> > +   if (++r == 2)
> > +   goto out;
> > +   }
> > +
> > +   if (dst && kvm_apic_present(dst->vcpu))
> > +   *dest_vcpu = dst->vcpu;
> > +   else
> > +   goto out;
> > +   }
> > +
> > +   ret = true;
> > +out:
> > +   rcu_read_unlock();
> > +   return ret;
> > +}
> > +
> > +
> > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> > +struct kvm_vcpu **dest_vcpu)
> > +{
> > +   int i, r = 0;
> > +   struct kvm_vcpu *vcpu;
> > +
> > +   if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))

[PATCH] iommu/amd: Fix BUG when faulting a PROT_NONE VMA

2015-09-16 Thread Jay Cornwall
handle_mm_fault indirectly triggers a BUG in do_numa_page when given
a VMA without read/write/execute access. Check this condition in do_fault.

do_fault -> handle_mm_fault -> handle_pte_fault -> do_numa_page

  mm/memory.c
  3147  static int do_numa_page(struct mm_struct *mm, struct vm_area_struct 
*vma,
  
  3159  /* A PROT_NONE fault should not end up here */
  3160  BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));

Signed-off-by: Jay Cornwall 
---
 drivers/iommu/amd_iommu_v2.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 1131664..d21d4ed 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -516,6 +516,13 @@ static void do_fault(struct work_struct *work)
goto out;
}
 
+   if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) {
+   /* handle_mm_fault would BUG_ON() */
+   up_read(>mmap_sem);
+   handle_fault_error(fault);
+   goto out;
+   }
+
ret = handle_mm_fault(mm, vma, address, write);
if (ret & VM_FAULT_ERROR) {
/* failed to service fault */
-- 
1.9.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu