On 17/12/2020 14:13, Alexandru Elisei wrote:
> check_acked() has several peculiarities: is the only function among the
> check_* functions which calls report() directly, it does two things
> (waits for interrupts and checks for misfired interrupts) and it also
> mixes printf, report_info and report calls.
> 
> check_acked() also reports a pass and returns as soon all the target CPUs
> have received interrupts, However, a CPU not having received an interrupt
> *now* does not guarantee not receiving an erroneous interrupt if we wait
> long enough.
> 
> Rework the function by splitting it into two separate functions, each with
> a single responsibility: wait_for_interrupts(), which waits for the
> expected interrupts to fire, and check_acked() which checks that interrupts
> have been received as expected.
> 
> wait_for_interrupts() also waits an extra 100 milliseconds after the
> expected interrupts have been received in an effort to make sure we don't
> miss misfiring interrupts.
> 
> Splitting check_acked() into two functions will also allow us to
> customize the behavior of each function in the future more easily
> without using an unnecessarily long list of arguments for check_acked().

Yes, splitting this up looks much better, in general this is a nice
cleanup, thank you!

Some comments below:

> 
> CC: Andre Przywara <[email protected]>
> Signed-off-by: Alexandru Elisei <[email protected]>
> ---
>  arm/gic.c | 73 +++++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 47 insertions(+), 26 deletions(-)
> 
> diff --git a/arm/gic.c b/arm/gic.c
> index ec733719c776..a9ef1a5def56 100644
> --- a/arm/gic.c
> +++ b/arm/gic.c
> @@ -62,41 +62,42 @@ static void stats_reset(void)
>       }
>  }
>  
> -static void check_acked(const char *testname, cpumask_t *mask)
> +static void wait_for_interrupts(cpumask_t *mask)
>  {
> -     int missing = 0, extra = 0, unexpected = 0;
>       int nr_pass, cpu, i;
> -     bool bad = false;
>  
>       /* Wait up to 5s for all interrupts to be delivered */
> -     for (i = 0; i < 50; ++i) {
> +     for (i = 0; i < 50; i++) {
>               mdelay(100);
>               nr_pass = 0;
>               for_each_present_cpu(cpu) {
> +                     /*
> +                      * A CPU having received more than one interrupts will
> +                      * show up in check_acked(), and no matter how long we
> +                      * wait it cannot un-receive it. Consider at least one
> +                      * interrupt as a pass.
> +                      */
>                       nr_pass += cpumask_test_cpu(cpu, mask) ?
> -                             acked[cpu] == 1 : acked[cpu] == 0;
> -                     smp_rmb(); /* pairs with smp_wmb in ipi_handler */
> -
> -                     if (bad_sender[cpu] != -1) {
> -                             printf("cpu%d received IPI from wrong sender 
> %d\n",
> -                                     cpu, bad_sender[cpu]);
> -                             bad = true;
> -                     }
> -
> -                     if (bad_irq[cpu] != -1) {
> -                             printf("cpu%d received wrong irq %d\n",
> -                                     cpu, bad_irq[cpu]);
> -                             bad = true;
> -                     }
> +                             acked[cpu] >= 1 : acked[cpu] == 0;


I wonder if this logic was already flawed to begin with: For interrupts
we expect to fire, we wait for up to 5 seconds (really that long?), but
for interrupts we expect *not* to fire we are OK if they don't show up
in the first 100 ms. That does not sound consistent.

I am wondering if we should *not* have the initial 100ms wait at all,
since most interrupts will fire immediately (especially in KVM). And
then have *one* extra wait for, say 100ms, to cover latecomers and
spurious interrupts.

But this might be a topic for some extra work/patch?

>               }
> +
>               if (nr_pass == nr_cpus) {
> -                     report(!bad, "%s", testname);
>                       if (i)
> -                             report_info("took more than %d ms", i * 100);
> +                             report_info("interrupts took more than %d ms", 
> i * 100);
> +                     mdelay(100);

So this is the extra 100ms you mention in the commit message? I am not
convinced this is the right way (see above) or even the right place
(rather at the call site?) to wait. But at least it deserves a comment,
I believe.

>                       return;
>               }
>       }
>  
> +     report_info("interrupts timed-out (5s)");
> +}
> +
> +static bool check_acked(cpumask_t *mask)
> +{
> +     int missing = 0, extra = 0, unexpected = 0;
> +     bool pass = true;
> +     int cpu;
> +
>       for_each_present_cpu(cpu) {
>               if (cpumask_test_cpu(cpu, mask)) {
>                       if (!acked[cpu])
> @@ -107,11 +108,28 @@ static void check_acked(const char *testname, cpumask_t 
> *mask)
>                       if (acked[cpu])
>                               ++unexpected;
>               }
> +             smp_rmb(); /* pairs with smp_wmb in ipi_handler */
> +
> +             if (bad_sender[cpu] != -1) {
> +                     report_info("cpu%d received IPI from wrong sender %d",
> +                                     cpu, bad_sender[cpu]);
> +                     pass = false;
> +             }
> +
> +             if (bad_irq[cpu] != -1) {
> +                     report_info("cpu%d received wrong irq %d",
> +                                     cpu, bad_irq[cpu]);
> +                     pass = false;
> +             }
> +     }
> +
> +     if (missing || extra || unexpected) {
> +             report_info("ACKS: missing=%d extra=%d unexpected=%d",
> +                             missing, extra, unexpected);
> +             pass = false;

Thanks, that so much easier to read now.

Cheers,
Andre

>       }
>  
> -     report(false, "%s", testname);
> -     report_info("Timed-out (5s). ACKS: missing=%d extra=%d unexpected=%d",
> -                 missing, extra, unexpected);
> +     return pass;
>  }
>  
>  static void check_spurious(void)
> @@ -303,7 +321,8 @@ static void ipi_test_self(void)
>       cpumask_clear(&mask);
>       cpumask_set_cpu(smp_processor_id(), &mask);
>       gic->ipi.send_self();
> -     check_acked("IPI: self", &mask);
> +     wait_for_interrupts(&mask);
> +     report(check_acked(&mask), "Interrupts received");
>       report_prefix_pop();
>  }
>  
> @@ -318,7 +337,8 @@ static void ipi_test_smp(void)
>       for (i = smp_processor_id() & 1; i < nr_cpus; i += 2)
>               cpumask_clear_cpu(i, &mask);
>       gic_ipi_send_mask(IPI_IRQ, &mask);
> -     check_acked("IPI: directed", &mask);
> +     wait_for_interrupts(&mask);
> +     report(check_acked(&mask), "Interrupts received");
>       report_prefix_pop();
>  
>       report_prefix_push("broadcast");
> @@ -326,7 +346,8 @@ static void ipi_test_smp(void)
>       cpumask_copy(&mask, &cpu_present_mask);
>       cpumask_clear_cpu(smp_processor_id(), &mask);
>       gic->ipi.send_broadcast();
> -     check_acked("IPI: broadcast", &mask);
> +     wait_for_interrupts(&mask);
> +     report(check_acked(&mask), "Interrupts received");
>       report_prefix_pop();
>  }
>  
> 

_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to