core: Fix and expand the irq affinity descriptor

Thomas Gleixner Wed, 19 Dec 2018 02:53:59 -0800

On Tue, 4 Dec 2018, Dou Liyang wrote:

> Now,  Spreading the interrupt affinity info by a cpumask pointer is not
> enough, meets a problem[1] and hard to expand in the future.
> 
> Fix it by:
> 
>      +-----------------------------------+
>      |                                   |
>      |     struct cpumask *affinity      |
>      |                                   |
>      +-----------------------------------+
>                        |
>     +------------------v-------------------+
>     |                                      |
>     | struct irq_affinity_desc {           |
>     |     struct cpumask   mask;           |
>     |     unsigned int     is_managed : 1; |
>     | };                                   |
>     |                                      |
>     +--------------------------------------+
>


So, I've applied that lot for 4.21 (or whatever number it will be). That's
only the first step for solving Kashyap's problem.

IIRC, then Kashap wanted to get initial interrupt spreading for these extra
magic interrupts as well, but not have them marked managed.

That's trivial to do now with the two queued changes in that area:

  - The rework above
  
  - The support for interrupt sets from Jens

Just adding a small bitfield to struct irq_affinity which allows to tell
the core that a particular interrupt set is not managed does the trick.

Untested patch below.

Kashyap, is that what you were looking for and if so, does it work?

Thanks,

        tglx

8<-----------------

Subject: genirq/affinity: Add support for non-managed affinity sets
From: Thomas Gleixner <t...@linutronix.de>
Date: Tue, 18 Dec 2018 16:46:47 +0100

Some drivers need an extra set of interrupts which are not marked managed,
but should get initial interrupt spreading.

Add a bitmap to struct irq_affinity which allows the driver to mark a
particular set of interrupts as non managed. Check the bitmap during
spreading and use the result to mark the interrupts in the sets
accordingly.

The unmanaged interrupts get initial spreading, but user space can change
their affinity later on.

Usage example:

      struct irq_affinity affd = { .pre_vectors = 2 };
      int sets[2];

      /* Fill in sets[] */

      affd.nr_sets = 2;
      affd.sets = &sets;
      affd.unmanaged_sets = 0x02;

      ......

So both sets are properly spread out, but the second set is not marked
managed.

Signed-off-by: Thomas Gleixner <t...@linutronix.de>
---
 include/linux/interrupt.h |   10 ++++++----
 kernel/irq/affinity.c     |   24 ++++++++++++++----------
 2 files changed, 20 insertions(+), 14 deletions(-)

--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -99,7 +99,8 @@ static int __irq_build_affinity_masks(co
                                      cpumask_var_t *node_to_cpumask,
                                      const struct cpumask *cpu_mask,
                                      struct cpumask *nmsk,
-                                     struct irq_affinity_desc *masks)
+                                     struct irq_affinity_desc *masks,
+                                     bool managed)
 {
        int n, nodes, cpus_per_vec, extra_vecs, done = 0;
        int last_affv = firstvec + numvecs;
@@ -154,6 +155,7 @@ static int __irq_build_affinity_masks(co
                        }
                        irq_spread_init_one(&masks[curvec].mask, nmsk,
                                                cpus_per_vec);
+                       masks[curvec].is_managed = managed;
                }
 
                done += v;
@@ -176,7 +178,8 @@ static int __irq_build_affinity_masks(co
 static int irq_build_affinity_masks(const struct irq_affinity *affd,
                                    int startvec, int numvecs, int firstvec,
                                    cpumask_var_t *node_to_cpumask,
-                                   struct irq_affinity_desc *masks)
+                                   struct irq_affinity_desc *masks,
+                                   bool managed)
 {
        int curvec = startvec, nr_present, nr_others;
        int ret = -ENOMEM;
@@ -196,7 +199,8 @@ static int irq_build_affinity_masks(cons
        /* Spread on present CPUs starting from affd->pre_vectors */
        nr_present = __irq_build_affinity_masks(affd, curvec, numvecs,
                                                firstvec, node_to_cpumask,
-                                               cpu_present_mask, nmsk, masks);
+                                               cpu_present_mask, nmsk, masks,
+                                               managed);
 
        /*
         * Spread on non present CPUs starting from the next vector to be
@@ -211,7 +215,7 @@ static int irq_build_affinity_masks(cons
        cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
        nr_others = __irq_build_affinity_masks(affd, curvec, numvecs,
                                               firstvec, node_to_cpumask,
-                                              npresmsk, nmsk, masks);
+                                              npresmsk, nmsk, masks, managed);
        put_online_cpus();
 
        if (nr_present < numvecs)
@@ -268,10 +272,11 @@ irq_create_affinity_masks(int nvecs, con
 
        for (i = 0, usedvecs = 0; i < nr_sets; i++) {
                int this_vecs = affd->sets ? affd->sets[i] : affvecs;
+               bool managed = !test_bit(i, &affd->unmanaged_sets);
                int ret;
 
-               ret = irq_build_affinity_masks(affd, curvec, this_vecs,
-                                               curvec, node_to_cpumask, masks);
+               ret = irq_build_affinity_masks(affd, curvec, this_vecs, curvec,
+                                              node_to_cpumask, masks, managed);
                if (ret) {
                        kfree(masks);
                        masks = NULL;
@@ -289,10 +294,6 @@ irq_create_affinity_masks(int nvecs, con
        for (; curvec < nvecs; curvec++)
                cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 
-       /* Mark the managed interrupts */
-       for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
-               masks[i].is_managed = 1;
-
 outnodemsk:
        free_node_to_cpumask(node_to_cpumask);
        return masks;
@@ -316,6 +317,9 @@ int irq_calc_affinity_vectors(int minvec
        if (affd->nr_sets) {
                int i;
 
+               if (WARN_ON_ONCE(affd->nr_sets > BITS_PER_LONG))
+                       return 0;
+
                for (i = 0, set_vecs = 0;  i < affd->nr_sets; i++)
                        set_vecs += affd->sets[i];
        } else {
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -249,12 +249,14 @@ struct irq_affinity_notify {
  *                     the MSI(-X) vector space
  * @nr_sets:           Length of passed in *sets array
  * @sets:              Number of affinitized sets
+ * @unmanaged_sets:    Bitmap to mark members of @sets as unmanaged
  */
 struct irq_affinity {
-       int     pre_vectors;
-       int     post_vectors;
-       int     nr_sets;
-       int     *sets;
+       int             pre_vectors;
+       int             post_vectors;
+       int             nr_sets;
+       int             *sets;
+       unsigned long   unmanaged_sets;
 };
 
 /**

Re: [PATCH 0/3] irq/core: Fix and expand the irq affinity descriptor

Reply via email to