On 29/11/11 23:25, Wolfgang Mauerer wrote:
> On 29/11/11 15:13, Philippe Gerum wrote:
>> On 11/28/2011 02:05 PM, Wolfgang Mauerer wrote:
>>> we are facing some difficulties with GSI interrupt storms
>>> originating from a PCI card that seem to be caused by
>>> ipipe: The card is passed through to qemu-kvm (the setup
>>> is based on the patches sent by Jan some time ago). Once
>>> the card becomes active, we are hit by a tremendous amount
>>> of interrupts (>  100000/s) that keep ipipe fully occupied.
>>> The observed pattern is (excerpt from the ipipe tracer)
>>>
>>> :| common_interrupt+0x20 (__ipipe_spin_unlock_irqrestore+0x62)
>>> :| __ipipe_handle_irq+0x11 (common_interrupt+0x27)
>>> (...)
>>> :  handle_irq+0x9 (do_IRQ+0x66)
>>> :  irq_to_desc+0x4 (handle_irq+0x15)
>>> :  handle_fasteoi_irq+0x14 (handle_irq+0x22)
>>> (...)
>>> :  unmask_ioapic_irq+0x4 (handle_fasteoi_irq+0x94)
>>> :  unmask_ioapic+0xd (unmask_ioapic_irq+0x14)
>>> :  __ipipe_spin_lock_irqsave+0x7 (unmask_ioapic+0x23)
>>> :| __ipipe_spin_lock_irqsave+0x93 (unmask_ioapic+0x23)
>>> :| __io_apic_modify_irq+0x4 (unmask_ioapic+0x41)
>>> :| __ipipe_unlock_irq+0x11 (unmask_ioapic+0x66)
>>> :| __ipipe_spin_unlock_irqrestore+0x9 (unmask_ioapic+0x75)
>>> :| __ipipe_spin_unlock_irqrestore+0x60 (unmask_ioapic+0x75)
>>> :| common_interrupt+0x20 (__ipipe_spin_unlock_irqrestore+0x62)
>>>
>>> That is, as soon as the IRQ in question is unmasked, the
>>> next one is immediately received, and the interrupt handler
>>> in non-RT context never gets a chance to actually service
>>> the interrupt.
>>>
>>> The problem seems to be caused by unmasking the IRQ in
>>> handle_fasteoi_irq(), and with a hack along the lines of
>>>
>>> --- a/kernel/irq/chip.c
>>> +++ b/kernel/irq/chip.c
>>> @@ -586,7 +586,8 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc
>>> *desc)
>>>          raw_spin_lock(&desc->lock);
>>>          desc->status&= ~IRQ_INPROGRESS;
>>>   #ifdef CONFIG_IPIPE
>>> -       desc->irq_data.chip->irq_unmask(&desc->irq_data);
>>> +       if (irq != WHICHEVER_IRQ_CAUSES_THE_STORM)
>>> +               desc->irq_data.chip->irq_unmask(&desc->irq_data);
>>>   out:
>>>   #else
>>>   out:
>>>
>>> the issue is solved.
>>>
>>> So the question is: Why is it okay to unconditionally unmask
>>> all interrupts in the fasteoi handler? All cards that re-send
>>> interrupts at high frequencies unless they are properly handled
>>> by their device driver should cause the same problem.
>>> I take the early unmasking is an optimisation, or are there any
>>> further reasons for the unconditional unmasking in
>>> handle_fasteoi_irq()?
>>
>> This is not an optimization, the flow for which this code was designed 
>> for is:
>>
>> hw IRQ receipt
>> chip->eoi()
>>      must mask the IRQ line
>> ...
>> real-time or Linux handling, clear device interrupt
>> ...
>> handle_fasteoi()
>>      unmask previous masking
>>
>> It does not cope well with the recent threaded interrupt model addition 
>> in the vanilla kernel. So it will likely break for any device with 
>> threaded level IRQ handling.
> ah true, with non-threaded IRQ handlers, the unmasking happens
> after the handler is finished.
> 
> Modulo any polishing etc., how about something like:

Too invasive ;) It turns out there's some Linux fasteoi
bug present in 2.6.38, patch follows in separate Mail. --Wolfgang

> 
> diff --git a/include/linux/irq.h b/include/linux/irq.h
> index eca7fa8..e28745f 100644
> --- a/include/linux/irq.h
> +++ b/include/linux/irq.h
> @@ -240,7 +240,8 @@ static inline void move_masked_irq(int irq) { }
>  extern int no_irq_affinity;
>  
>  /* Handle irq action chains: */
> -extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction 
> *action);
> +extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction 
> *action,
> +                                 short *threaded);
>  
>  /*
>   * Built-in IRQ handlers for various IRQ types,
> diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
> index 9671529..d36e327 100644
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -479,7 +479,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
>       desc->status |= IRQ_INPROGRESS;
>       raw_spin_unlock(&desc->lock);
>  
> -     action_ret = handle_IRQ_event(irq, action);
> +     action_ret = handle_IRQ_event(irq, action, NULL);
>       if (!noirqdebug)
>               note_interrupt(irq, desc, action_ret);
>  
> @@ -526,7 +526,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
>       desc->status |= IRQ_INPROGRESS;
>       raw_spin_unlock(&desc->lock);
>  
> -     action_ret = handle_IRQ_event(irq, action);
> +     action_ret = handle_IRQ_event(irq, action, NULL);
>       if (!noirqdebug)
>               note_interrupt(irq, desc, action_ret);
>  
> @@ -555,6 +555,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc 
> *desc)
>  {
>       struct irqaction *action;
>       irqreturn_t action_ret;
> +     short threaded = 0;
>  
>       raw_spin_lock(&desc->lock);
>  
> @@ -579,14 +580,15 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc 
> *desc)
>       desc->status &= ~IRQ_PENDING;
>       raw_spin_unlock(&desc->lock);
>  
> -     action_ret = handle_IRQ_event(irq, action);
> +     action_ret = handle_IRQ_event(irq, action, &threaded);
>       if (!noirqdebug)
>               note_interrupt(irq, desc, action_ret);
>  
>       raw_spin_lock(&desc->lock);
>       desc->status &= ~IRQ_INPROGRESS;
>  #ifdef CONFIG_IPIPE
> -     desc->irq_data.chip->irq_unmask(&desc->irq_data);
> +     if (!threaded)
> +             desc->irq_data.chip->irq_unmask(&desc->irq_data);
>  out:
>  #else
>  out:
> @@ -662,7 +664,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
>  
>               desc->status &= ~IRQ_PENDING;
>               raw_spin_unlock(&desc->lock);
> -             action_ret = handle_IRQ_event(irq, action);
> +             action_ret = handle_IRQ_event(irq, action, NULL);
>               if (!noirqdebug)
>                       note_interrupt(irq, desc, action_ret);
>               raw_spin_lock(&desc->lock);
> @@ -693,7 +695,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
>               desc->irq_data.chip->irq_ack(&desc->irq_data);
>  #endif /* CONFIG_IPIPE */
>  
> -     action_ret = handle_IRQ_event(irq, desc->action);
> +     action_ret = handle_IRQ_event(irq, desc->action, NULL);
>       if (!noirqdebug)
>               note_interrupt(irq, desc, action_ret);
>  
> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
> index 3540a71..a9cff8d 100644
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -58,7 +58,8 @@ static void warn_no_thread(unsigned int irq, struct 
> irqaction *action)
>   *
>   * Handles the action chain of an irq event
>   */
> -irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> +irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action,
> +                          short *threaded)
>  {
>       irqreturn_t ret, retval = IRQ_NONE;
>       unsigned int status = 0;
> @@ -97,6 +98,8 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct 
> irqaction *action)
>                                            &action->thread_flags))) {
>                               set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
>                               wake_up_process(action->thread);
> +                             if (threaded)
> +                                     *threaded = 1;
>                       }
>  
>                       /* Fall through to add to randomness */
> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
> index 9033c1c..db35e6b 100644
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -608,6 +608,9 @@ static int irq_thread(void *data)
>                       raw_spin_unlock_irq(&desc->lock);
>  
>                       action->thread_fn(action->irq, action->dev_id);
> +#ifdef CONFIG_IPIPE
> +                     desc->irq_data.chip->irq_unmask(&desc->irq_data);
> +#endif
>  
>                       if (oneshot)
>                               irq_finalize_oneshot(action->irq, desc);
> diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
> index 3089d3b..50e4f08 100644
> --- a/kernel/irq/spurious.c
> +++ b/kernel/irq/spurious.c
> @@ -71,7 +71,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
>                */
>               work = 1;
>               raw_spin_unlock(&desc->lock);
> -             handle_IRQ_event(irq, action);
> +             handle_IRQ_event(irq, action, NULL);
>               raw_spin_lock(&desc->lock);
>               desc->status &= ~IRQ_PENDING;
>       }
> 
> Cheers, Wolfgang
> 
> --
> Siemens AG, Open Source Platforms,
> Corporate Competence Centre Embedded Linux


_______________________________________________
Adeos-main mailing list
Adeos-main@gna.org
https://mail.gna.org/listinfo/adeos-main

Reply via email to