> Use the core's rmap notifiers and delete our own.
> 
> Acked-by: David Arinzon <[email protected]>
> Signed-off-by: Ahmed Zaki <[email protected]>
> ---
>  drivers/net/ethernet/amazon/ena/ena_netdev.c | 43 +-------------------
>  1 file changed, 1 insertion(+), 42 deletions(-)
> 
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index c1295dfad0d0..6aab85a7c60a 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -5,9 +5,6 @@
> 
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> 
> -#ifdef CONFIG_RFS_ACCEL
> -#include <linux/cpu_rmap.h>
> -#endif /* CONFIG_RFS_ACCEL */
>  #include <linux/ethtool.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
> @@ -162,30 +159,6 @@ int ena_xmit_common(struct ena_adapter
> *adapter,
>         return 0;
>  }
> 
> -static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) -{ -#ifdef
> CONFIG_RFS_ACCEL
> -       u32 i;
> -       int rc;
> -
> -       adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter-
> >num_io_queues);
> -       if (!adapter->netdev->rx_cpu_rmap)
> -               return -ENOMEM;
> -       for (i = 0; i < adapter->num_io_queues; i++) {
> -               int irq_idx = ENA_IO_IRQ_IDX(i);
> -
> -               rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
> -                                     pci_irq_vector(adapter->pdev, irq_idx));
> -               if (rc) {
> -                       free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> -                       adapter->netdev->rx_cpu_rmap = NULL;
> -                       return rc;
> -               }
> -       }
> -#endif /* CONFIG_RFS_ACCEL */
> -       return 0;
> -}
> -
>  static void ena_init_io_rings_common(struct ena_adapter *adapter,
>                                      struct ena_ring *ring, u16 qid)  { @@ 
> -1596,7 +1569,7 @@
> static int ena_enable_msix(struct ena_adapter *adapter)
>                 adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
>         }
> 
> -       if (ena_init_rx_cpu_rmap(adapter))
> +       if (netif_enable_cpu_rmap(adapter->netdev,
> + adapter->num_io_queues))
>                 netif_warn(adapter, probe, adapter->netdev,
>                            "Failed to map IRQs to CPUs\n");
> 
> @@ -1742,13 +1715,6 @@ static void ena_free_io_irq(struct ena_adapter
> *adapter)
>         struct ena_irq *irq;
>         int i;
> 
> -#ifdef CONFIG_RFS_ACCEL
> -       if (adapter->msix_vecs >= 1) {
> -               free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> -               adapter->netdev->rx_cpu_rmap = NULL;
> -       }
> -#endif /* CONFIG_RFS_ACCEL */
> -
>         for (i = ENA_IO_IRQ_FIRST_IDX; i <
> ENA_MAX_MSIX_VEC(io_queue_count); i++) {
>                 irq = &adapter->irq_tbl[i];
>                 irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4097,6 
> @@
> static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
>         ena_dev = adapter->ena_dev;
>         netdev = adapter->netdev;
> 
> -#ifdef CONFIG_RFS_ACCEL
> -       if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
> -               free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> -               netdev->rx_cpu_rmap = NULL;
> -       }
> -
> -#endif /* CONFIG_RFS_ACCEL */
>         /* Make sure timer and reset routine won't be called after
>          * freeing device resources.
>          */
> --
> 2.43.0

Hi Ahmed,

After the merging of this patch, I see the below stack trace when the IRQs are 
freed.
It can be reproduced by unloading and loading the driver using `modprobe -r 
ena; modprobe ena` (happens during unload)

Based on the patchset and the changes to other drivers, I think there's a 
missing call to the function
that releases the affinity notifier (The warn is in 
https://web.git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/tree/kernel/irq/manage.c#n2031)

I saw in the intel code in the patchset that ` netif_napi_set_irq(<napi>, -1);` 
is added?

After adding the code snippet I don't see this anymore, but I want to 
understand whether it's the right call by design.

@@ -1716,8 +1716,11 @@ static void ena_free_io_irq(struct ena_adapter *adapter)
        int i;

        for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); 
i++) {
+               struct ena_napi *ena_napi;
+
                irq = &adapter->irq_tbl[i];
                irq_set_affinity_hint(irq->vector, NULL);
+               ena_napi = (struct ena_napi *)irq->data;
+               netif_napi_set_irq(&ena_napi->napi, -1);
                free_irq(irq->vector, irq->data);
        }
 }

[  484.544586]  ? __warn+0x84/0x130
[  484.544843]  ? free_irq+0x5c/0x70
[  484.545105]  ? report_bug+0x18a/0x1a0
[  484.545390]  ? handle_bug+0x53/0x90
[  484.545664]  ? exc_invalid_op+0x14/0x70
[  484.545959]  ? asm_exc_invalid_op+0x16/0x20
[  484.546279]  ? free_irq+0x5c/0x70
[  484.546545]  ? free_irq+0x10/0x70
[  484.546807]  ena_free_io_irq+0x5f/0x70 [ena]
[  484.547138]  ena_down+0x250/0x3e0 [ena]
[  484.547435]  ena_destroy_device+0x118/0x150 [ena]
[  484.547796]  __ena_shutoff+0x5a/0xe0 [ena]
[  484.548110]  pci_device_remove+0x3b/0xb0
[  484.548412]  device_release_driver_internal+0x193/0x200
[  484.548804]  driver_detach+0x44/0x90
[  484.549084]  bus_remove_driver+0x69/0xf0
[  484.549386]  pci_unregister_driver+0x2a/0xb0
[  484.549717]  ena_cleanup+0xc/0x130 [ena]
[  484.550021]  __do_sys_delete_module.constprop.0+0x176/0x310
[  484.550438]  ? syscall_trace_enter+0xfb/0x1c0
[  484.550782]  do_syscall_64+0x5b/0x170
[  484.551067]  entry_SYSCALL_64_after_hwframe+0x76/0x7e

Thanks,
David

Reply via email to